xref: /titanic_52/usr/src/uts/common/avs/ns/sdbc/sd_bcache.h (revision 3270659f55e0928d6edec3d26217cc29398a8149)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #ifndef _SD_BCACHE_H
27 #define	_SD_BCACHE_H
28 
29 #ifdef __cplusplus
30 extern "C" {
31 #endif
32 
33 #ifdef DS_DDICT
34 #include <sys/nsctl/contract.h>
35 #endif
36 #include <sys/nsctl/nsctl.h>
37 #include <sys/nsctl/sdbc_ioctl.h>
38 #include <sys/nsctl/sd_hash.h>
39 #include <sys/nsctl/sd_cache.h>
40 #include <sys/nsctl/sd_conf.h>
41 #include <sys/nsctl/safestore.h>
42 
43 /*
44  * Definitions for kstats
45  */
46 #define	SDBC_KSTAT_CLASS	"storedge"
47 #define	SDBC_KSTAT_MODULE	"sdbc"
48 
49 #ifdef DEBUG
50 #define	SDBC_KSTAT_DYNMEM	"dynmem"
51 #endif
52 
53 #define	SDBC_KSTAT_CDNAME	"cdname"
54 #define	SDBC_KSTAT_CDSTATS	"cd"
55 #define	SDBC_KSTAT_GSTATS	"global"
56 #define	SDBC_KSTAT_STATS	"sdbcstats"
57 #define	SDBC_IOKSTAT_GSTATS	"gsdbc"
58 #define	SDBC_IOKSTAT_CDSTATS	"sdbc"
59 
60 /* Global kstat field names */
61 #define	SDBC_GKSTAT_COUNT	"sdbc_count"
62 #define	SDBC_GKSTAT_LOC_COUNT	"sdbc_loc_count"
63 #define	SDBC_GKSTAT_RDHITS	"sdbc_rdhits"
64 #define	SDBC_GKSTAT_RDMISS	"sdbc_rdmiss"
65 #define	SDBC_GKSTAT_WRHITS	"sdbc_wrhits"
66 #define	SDBC_GKSTAT_WRMISS	"sdbc_wrmiss"
67 #define	SDBC_GKSTAT_BLKSIZE	"sdbc_blksize"
68 #define	SDBC_GKSTAT_LRU_BLOCKS	"sdbc_lru_blocks"
69 
70 #ifdef DEBUG
71 #define	SDBC_GKSTAT_LRU_NOREQ	"sdbc_lru_noreq"
72 #define	SDBC_GKSTAT_LRU_REQ	"sdbc_lru_req"
73 #endif
74 
75 #define	SDBC_GKSTAT_WLRU_INQ	"sdbc_wlru_inq"
76 #define	SDBC_GKSTAT_CACHESIZE	"sdbc_cachesize"
77 #define	SDBC_GKSTAT_NUMBLOCKS	"sdbc_numblocks"
78 #define	SDBC_GKSTAT_NUM_SHARED	"sdbc_num_shared"
79 #define	SDBC_GKSTAT_WRCANCELNS	"sdbc_wrcancelns"
80 #define	SDBC_GKSTAT_DESTAGED	"sdbc_destaged"
81 #define	SDBC_GKSTAT_NODEHINTS	"sdbc_nodehints"
82 
83 /* per-cache descriptor kstats field names */
84 #define	SDBC_CDKSTAT_VOL_NAME	"sdbc_vol_name"
85 #define	SDBC_CDKSTAT_FAILED	"sdbc_failed"
86 #define	SDBC_CDKSTAT_CD		"sdbc_cd"
87 #define	SDBC_CDKSTAT_CACHE_READ	"sdbc_cache_read"
88 #define	SDBC_CDKSTAT_CACHE_WRITE	"sdbc_cache_write"
89 #define	SDBC_CDKSTAT_DISK_READ	"sdbc_disk_read"
90 #define	SDBC_CDKSTAT_DISK_WRITE	"sdbc_disk_write"
91 #define	SDBC_CDKSTAT_FILESIZE	"sdbc_filesize"
92 #define	SDBC_CDKSTAT_NUMDIRTY	"sdbc_numdirty"
93 #define	SDBC_CDKSTAT_NUMIO	"sdbc_numio"
94 #define	SDBC_CDKSTAT_NUMFAIL	"sdbc_numfail"
95 #define	SDBC_CDKSTAT_DESTAGED	"sdbc_destaged"
96 #define	SDBC_CDKSTAT_WRCANCELNS	"sdbc_wrcancelns"
97 #define	SDBC_CDKSTAT_CDHINTS	"sdbc_cdhints"
98 
99 #ifdef DEBUG
100 /* dynmem kstats field names */
101 #define	SDBC_DMKSTAT_MONITOR_DYNMEM	"sdbc_monitor_dynmem"
102 #define	SDBC_DMKSTAT_MAX_DYN_LIST	"sdbc_max_dyn_list"
103 #define	SDBC_DMKSTAT_CACHE_AGING_CT1	"sdbc_cache_aging_ct1"
104 #define	SDBC_DMKSTAT_CACHE_AGING_CT2	"sdbc_cache_aging_ct2"
105 #define	SDBC_DMKSTAT_CACHE_AGING_CT3	"sdbc_cache_aging_ct3"
106 #define	SDBC_DMKSTAT_CACHE_AGING_SEC1	"sdbc_cache_aging_sec1"
107 #define	SDBC_DMKSTAT_CACHE_AGING_SEC2	"sdbc_cache_aging_sec2"
108 #define	SDBC_DMKSTAT_CACHE_AGING_SEC3	"sdbc_cache_aging_sec3"
109 #define	SDBC_DMKSTAT_CACHE_AGING_PCNT1	"sdbc_cache_aging_pcnt1"
110 #define	SDBC_DMKSTAT_CACHE_AGING_PCNT2	"sdbc_cache_aging_pcnt2"
111 #define	SDBC_DMKSTAT_MAX_HOLDS_PCNT	"sdbc_max_holds_pcnt"
112 #define	SDBC_DMKSTAT_ALLOC_CNT		"sdbc_alloc_cnt"
113 #define	SDBC_DMKSTAT_DEALLOC_CNT	"sdbc_dealloc_cnt"
114 #define	SDBC_DMKSTAT_HISTORY		"sdbc_history"
115 #define	SDBC_DMKSTAT_NODATAS		"sdbc_nodatas"
116 #define	SDBC_DMKSTAT_CANDIDATES		"sdbc_candidates"
117 #define	SDBC_DMKSTAT_DEALLOCS		"sdbc_deallocs"
118 #define	SDBC_DMKSTAT_HOSTS		"sdbc_hosts"
119 #define	SDBC_DMKSTAT_PESTS		"sdbc_pests"
120 #define	SDBC_DMKSTAT_METAS		"sdbc_metas"
121 #define	SDBC_DMKSTAT_HOLDS		"sdbc_holds"
122 #define	SDBC_DMKSTAT_OTHERS		"sdbc_others"
123 #define	SDBC_DMKSTAT_NOTAVAIL		"sdbc_notavail"
124 #define	SDBC_DMKSTAT_PROCESS_DIRECTIVE	"sdbc_process_directive"
125 #define	SDBC_DMKSTAT_SIMPLECT		"sdbc_simplect"
126 
127 #endif
128 
129 /* ... values are in range [0-BLK_FBAS] */
130 typedef uint32_t sdbc_cblk_fba_t; /* FBA len or offset in cache block */
131 
132 typedef	unsigned char *ucaddr_t; /* unsigned char pointer */
133 
134 /*
135  * Atomic exchange function
136  */
137 
138 #ifdef _KERNEL
139 
140 /*
141  * Note: ldstub sets all bits in the memory byte.
142  * so far this is compatible with the usage of xmem_bu() whereby
143  * the values of ptr are either 0 or 1, and the xmem_bu() is used
144  * to set the byte to 1.
145  */
146 #define	xmem_bu(val, ptr)	nsc_ldstub((uint8_t *)ptr)
147 #define	atomic_swap		xmem_bu
148 #define	sd_serialize		nsc_membar_stld
149 
150 #endif /* _KERNEL */
151 
152 #if defined(_KERNEL) || defined(_KMEMUSER)
153 
154 #if defined(_SD_8K_BLKSIZE)
155 typedef unsigned short	_sd_bitmap_t;
156 #else
157 typedef unsigned char	_sd_bitmap_t;
158 #endif
159 
160 /*
161  * CCTL flag types
162  */
163 
164 /*
165  * Note: CC_INUSE and CC_PAGEIO are dummy flags that are used in
166  * individual flags bytes (cc_inuse and cc_pageio) NOT cc_flag.
167  * Thus they can take any convenient value, however, they must be
168  * distinct and non-zero.
169  */
170 #define	CC_INUSE 	0x01	/* Cache entry is in use */
171 #define	CC_PAGEIO 	0x02	/* Pagelist IO is active for cache entry */
172 
173 /*
174  * Real cc_flag values.
175  */
176 #define	CC_PEND_DIRTY	0x02    /* The entry needs to be reprocessed for io */
177 #define	CC_PINNED	0x04	/* The entry has data that is "pinned" */
178 #define	CC_PINNABLE	0x08	/* Issue pin if write fails */
179 #define	CC_QHEAD	0x10	/* NSC_NOCACHE: requeue at head */
180 
181 /* specify the size of _sd_cctl[] array */
182 #define	_SD_CCTL_GROUPS 32
183 
184 /*
185  * Individual SDBC cache block entry
186  *	"cc_lock" must be held when changing dirty/valid bits.
187  *	"cc_inuse" (optimistic) atomic exchange replaces check/set of
188  *	  CC_INUSE bit in cc_flag; special handling of rare collisions.
189  *	"cc_pageio" flusher / client locking of pagelist io operations,
190  *	  atomic exchange - needs machine ld/st protection.
191  *	"cc_iostatus" is set by flusher without holding cc_lock,
192  *	  writer will set CC_PEND_DIRTY if cc_iostatus is set.
193  * Thus "cc_inuse", "cc_iostatus" and "cc_pageio" are volatile.
194  *
195  * The cc_await_* values are in the main _sd_cctl to avoid over
196  * signalling _cc_blkcv.
197  *
198  * The _sd_cctl structure is aligned to group related members and
199  * to ensure good packing.
200  */
201 
202 typedef struct _sd_cctl_sync {
203 	kcondvar_t	_cc_blkcv;	/* Synchronisation var to block on */
204 	kmutex_t	_cc_lock;	/* Cache entry spinlock		*/
205 } _sd_cctl_sync_t;
206 
207 typedef struct sd_addr_s {		/* Generic address structure */
208 	unsigned char 	*sa_virt;	/* Virtual address of data */
209 } sd_addr_t;
210 
211 /*
212  * See notes above.
213  */
214 
215 typedef struct _sd_cctl {
216 	_sd_hash_hd_t cc_head;		/* hash information - must be first */
217 	struct _sd_cctl *cc_next, *cc_prev; /* next and prev in a chain */
218 	struct _sd_cctl *cc_chain;	/* chaining request centries */
219 	struct _sd_cctl *cc_dirty_next; /* for chaining sequential writes */
220 	struct _sd_cctl *cc_dirty_link; /* for chaining the dirty lists   */
221 	struct _sd_cctl *cc_dirty_net_next; /* for chaining net writes */
222 	struct _sd_cctl *cc_dirty_net_link; /* for chaining net lists   */
223 	uint_t		cc_seq;		/* sequence number: for lru optim */
224 	volatile int	net_iostatus;	/* net status of io 	*/
225 	volatile _sd_bitmap_t net_dirty; /* net cache block dirty mask */
226 	_sd_bitmap_t	cc_valid;	/* Cache block valid mask	   */
227 	_sd_bitmap_t	cc_toflush;	/* Cache block deferred dirty mask */
228 	volatile _sd_bitmap_t cc_dirty;	/* Cache block dirty mask	   */
229 	volatile ushort_t cc_await_use;	/* # waiting for this entry (inuse) */
230 	volatile ushort_t cc_await_page; /* # waiting for this entry (pageio) */
231 	volatile uchar_t cc_inuse;	/* atomic_swap(CC_INUSE, cc_inuse) */
232 	volatile uchar_t cc_pageio;	/* atomic_swap(CC_PAGEIO, cc_pageio) */
233 	uchar_t		cc_flag;	/* flag */
234 	char		cc_iocount;	/* number of ios in progress */
235 	volatile uchar_t cc_iostatus;	/* status of io		   */
236 	uchar_t		cc_prot;	/* Segmented LRU protection flag   */
237 	sd_addr_t	cc_addr;	/* Data address information	   */
238 	ss_centry_info_t  *cc_write;	/* mirrored writes control block */
239 	struct _sd_cctl_sync *cc_sync;	/* Cache block synchronisation blk */
240 
241 	/* support for backend i/o memory coalescing */
242 	sd_addr_t	cc_anon_addr;	/* address for backend mem coalescing */
243 	int		cc_anon_len;	/* length of anon mem */
244 
245 	clock_t		cc_creat;
246 	int		cc_hits;
247 
248 	/* dynamic memory support fields */
249 	uint_t			cc_aging_dm;		/* For bit settings */
250 							/* see defines */
251 	int			cc_alloc_size_dm;	/* mem allocation */
252 							/* size bytes */
253 	struct _sd_cctl	*cc_head_dm;			/* ptr to host centry */
254 							/* for a host/pest */
255 							/* chain */
256 	struct _sd_cctl	*cc_next_dm;			/* ptr to next centry */
257 							/* in host/pest chain */
258 	struct _sd_cctl	*cc_link_list_dm;		/* simple link list */
259 							/* ptr of all centrys */
260 	/* dynmem chains */
261 	/* _sd_queue_t	*cc_dmchain_q;	dmqueue */
262 	int		cc_cblocks;	/* number of centrys for size_dm */
263 
264 	/* debugging stats */
265 	int			cc_alloc_ct_dm;
266 	int			cc_dealloc_ct_dm;
267 
268 } _sd_cctl_t;
269 
270 /* cache entry allocation tokens */
271 typedef struct sdbc_allocbuf_s {
272 	intptr_t opaque[2]; /* must be initialized to 0 */
273 } sdbc_allocbuf_t;
274 
275 typedef struct sdbc_allocbuf_impl_s {
276 	_sd_cctl_t *sab_dmchain;
277 	int sab_q; /* dmqueue of last chain allocated */
278 	int reserved;  /* stats ? */
279 } sdbc_allocbuf_impl_t;
280 
281 /*
282  * bits for flag argument to sdbc_centry_alloc() and callees.
283  */
284 #define	ALLOC_LOCKED		0x1	/* locked status of sdbc_queue_lock */
285 #define	ALLOC_NOWAIT		0x2	/* do not block, return NULL */
286 
287 /*
288  * definitions supporting the dynmem dealloc thread
289  */
290 #define	LOW_RESOURCES_DM		-1
291 
292 #define	NO_THREAD_DM			-1
293 #define	PROCESS_CACHE_DM		0
294 #define	CACHE_SHUTDOWN_DM		1
295 #define	CACHE_THREAD_TERMINATED_DM	2
296 #define	TIME_DELAY_LVL0			3
297 #define	TIME_DELAY_LVL1			4
298 #define	TIME_DELAY_LVL2			5
299 #define	HISTORY_LVL0			(ushort_t)0
300 #define	HISTORY_LVL1			(ushort_t)0x00ff
301 #define	HISTORY_LVL2			(ushort_t)0xff00
302 /*
303  * definitions supporing the ddditional fields in the cache
304  * entry structure for dyn mem
305  */
306 #define	FIRST_AGING_DM		0x00000001
307 #define	FINAL_AGING_DM		0x000000ff
308 #define	FOUND_IN_HASH_DM	0x00000100	/* used to bring cent info */
309 						/* out of sd_centry_alloc() */
310 #define	FOUND_HOLD_OVER_DM	0x00000200	/* used to bring cent info */
311 						/* out of sd_centry_alloc() */
312 #define	HOST_ENTRY_DM		0x00000400
313 #define	PARASITIC_ENTRY_DM	0x00000800
314 #define	STICKY_METADATA_DM	0x00001000
315 #define	CATAGORY_ENTRY_DM	(HOST_ENTRY_DM|PARASITIC_ENTRY_DM| \
316 				    STICKY_METADATA_DM)
317 #define	ELIGIBLE_ENTRY_DM	0x00002000
318 #define	HASH_ENTRY_DM		0x00008000
319 #define	HOLD_ENTRY_DM		0x00010000
320 #define	ENTRY_FIELD_DM		(ELIGIBLE_ENTRY_DM|HASH_ENTRY_DM|HOLD_ENTRY_DM)
321 #define	AVAIL_ENTRY_DM		0x00020000
322 
323 /* info only */
324 #define	PREFETCH_BUF_I		0x00040000	/* implicit read-ahead */
325 #define	PREFETCH_BUF_E		0x00080000	/* explicit read-ahead */
326 #define	PREFETCH_BUF_IR		0x00100000	/* release when read complete */
327 
328 /* error processing */
329 #define	BAD_ENTRY_DM		0x20000000 /* inconsistent ccent */
330 #define	BAD_CHAIN_DM		0x40000000 /* chain containing bad ccent */
331 
332 /*
333  * definitions supporting the dynmem monitoring
334  */
335 #define	RPT_SHUTDOWN_PROCESS_DM	0x00000001
336 #define	RPT_DEALLOC_STATS1_DM	0x00000002	/* nodat,cand,host,pest,meta, */
337 						/* other,dealloc */
338 #define	RPT_DEALLOC_STATS2_DM	0x00000004 /* hysterisis,grossct */
339 /*
340  * definitions supporting the processing directive bit flags
341  */
342 #define	WAKE_DEALLOC_THREAD_DM		0x00000001	/* one shot - acted */
343 							/* on then cleared */
344 #define	MAX_OUT_ACCEL_HIST_FLAG_DM	0x00000002	/* one shot - acted */
345 							/* on then cleared */
346 /*
347  * Default - Max - Min definitions
348  */
349 #define	MAX_DYN_LIST_DEFAULT		8
350 #define	MONITOR_DYNMEM_PROCESS_DEFAULT	0
351 #define	CACHE_AGING_CT_DEFAULT		3
352 #define	CACHE_AGING_SEC1_DEFAULT	10
353 #define	CACHE_AGING_SEC2_DEFAULT	5
354 #define	CACHE_AGING_SEC3_DEFAULT	1
355 #define	CACHE_AGING_PCNT1_DEFAULT	50
356 #define	CACHE_AGING_PCNT2_DEFAULT	25
357 #define	MAX_HOLDS_PCNT_DEFAULT		0
358 #define	PROCESS_DIRECTIVE_DEFAULT	0
359 
360 #define	CACHE_AGING_CT_MAX	FINAL_AGING_DM	/* 255 */
361 #define	CACHE_AGING_SEC1_MAX	255	/* arbitrary but easy to remember */
362 #define	CACHE_AGING_SEC2_MAX	255	/* arbitrary but easy to remember */
363 #define	CACHE_AGING_SEC3_MAX	255	/* arbitrary but easy to remember */
364 #define	CACHE_AGING_PCNT1_MAX	100
365 #define	CACHE_AGING_PCNT2_MAX	100
366 #define	MAX_HOLDS_PCNT_MAX	100
367 /*
368  * dynmem global structure defn
369  */
370 typedef struct _dm_process_vars {
371 	kcondvar_t	thread_dm_cv;
372 	kmutex_t	thread_dm_lock;
373 	int	sd_dealloc_flagx; 	/* gen'l purpose bit flag */
374 	int	monitor_dynmem_process; /* bit flag indicating what to report */
375 	int	max_dyn_list;		/* max num of pages to allow list to */
376 					/* grow */
377 	/* cache aging parameter set */
378 	int	cache_aging_ct1;	/* hosts/pests - aging hits which */
379 					/* trigger dealloc */
380 	int	cache_aging_ct2;	/* metas - aging hits which */
381 					/* trigger dealloc not yet imple */
382 	int	cache_aging_ct3;	/* holds - aging hits which */
383 					/* trigger dealloc */
384 	int	cache_aging_sec1;	/* sleep time between cache list */
385 					/* exam - 100% to pcnt1 free */
386 	int	cache_aging_sec2;	/* sleep time between cache list */
387 					/* exam - pcnt1 to pcnt2 free */
388 	int	cache_aging_sec3;	/* sleep time between cache list */
389 					/* exam - pcnt2 to 0% free */
390 	int	cache_aging_pcnt1;	/* % free when to kick in accel */
391 					/* aging - sec2 */
392 	int	cache_aging_pcnt2;	/* % free when to kick in accel */
393 					/* aging - sec3 */
394 	int	max_holds_pcnt;		/* max % of cents to act as holdovers */
395 	/* stats - debug */
396 	int	alloc_ct;		/* gross count */
397 	int	dealloc_ct;		/* gross count */
398 	/* thread stats - debug and on the fly tuning of dealloc vars */
399 	int	history;		/* history flag */
400 	int	nodatas;		/* # cctls w/o data assigned */
401 	int	notavail;		/* # cctls w/data but in use */
402 	int	candidates;		/* # cand. for dealloc checking */
403 	int	deallocs;		/* # deallocs */
404 	int	hosts;			/* # hosts */
405 	int	pests;			/* # pests */
406 	int	metas;			/* # metas - sticky meata data */
407 	int	holds;			/* # holdovers - single page, fully */
408 					/* aged but not dealloc'd or hash */
409 					/* del'd */
410 	int	others;			/* # everybody else */
411 	int	process_directive;	/* processing directive bitmap flag */
412 	/* standard stats (no prefetch tallies here) */
413 	int	read_hits;		/* found in cache memory */
414 	int	read_misses;		/* not found in cache memory */
415 	int	write_hits;		/* found in cache memory */
416 	int	write_misses;		/* not found in cache memory */
417 	int	write_thru;		/* not bothering to put in cache mem */
418 	/*
419 	 * prefetch tracked by _sd_prefetch_valid_cnt and _sd_prefetch_busy_cnt
420 	 * might want different usage ?
421 	 */
422 	int	prefetch_hits;
423 	int	prefetch_misses;
424 } _dm_process_vars_t;
425 
426 /*
427  * dynmem interface
428  */
429 int sdbc_edit_xfer_process_vars_dm(_dm_process_vars_t *process_vars);
430 
431 /*
432  * Defines to hide the sd_addr_t structure
433  */
434 
435 #define	cc_data		cc_addr.sa_virt
436 
437 
438 /*
439  * Defines to hide the synchronisation block
440  */
441 
442 #define	cc_blkcv	cc_sync->_cc_blkcv
443 #define	cc_lock		cc_sync->_cc_lock
444 
445 /*
446  * This struct exists solely so that sd_info is able to
447  * extract this kind of data from sdbc without passing out
448  * the entire _sd_cctl_t which has lots of pointers which
449  * makes it impossible to deal with in 32bit program and an
450  * LP64 kernel.
451  */
452 
453 typedef struct {
454 	int		ci_write;	/* 0 == no wrt data */
455 	_sd_bitmap_t	ci_dirty;	/* dirty bits */
456 	_sd_bitmap_t	ci_valid;	/* valid bits */
457 	int		ci_cd;		/* the cd */
458 	nsc_off_t	ci_dblk;	/* the disk block number */
459 } sdbc_info_t;
460 
461 typedef struct _sd_wr_cctl {
462 	ss_resource_t wc_res;
463 	ss_centry_info_t wc_centry_info;
464 } _sd_wr_cctl_t;
465 
466 typedef struct _sd_queue {
467 	struct _sd_cctl sq_qhead;	/* LRU queue head */
468 	kmutex_t   sq_qlock;		/* LRU spinlock	  */
469 	char	   sq_await;		/* number blocked on lru sema */
470 	int	   sq_inq;		/* Number of LRU entries in q */
471 	unsigned int sq_seq;		/* sequence number for lru optim */
472 	unsigned int sq_req_stat;
473 	unsigned int sq_noreq_stat;
474 
475 	/* dmchain support */
476 	int	sq_dmchain_cblocks;	/* dmchain len in ccents */
477 } _sd_queue_t;
478 
479 
480 
481 /*
482  * The net structure contains which memory net has been configured for
483  * cache, the amount of space allocated, the write control and fault
484  * tolerant blocks etc
485  */
486 
487 typedef struct _sd_net {
488 	unsigned short	sn_psize;	/* Page size of memory in this net */
489 	unsigned char	sn_configured;	/* is this network configured */
490 	size_t	sn_csize;		/* Cache size in bytes */
491 	uint_t	sn_wsize;		/* Write size in bytes */
492 	int 	sn_cpages;		/* number of pages for Cache	  */
493 }_sd_net_t;
494 
495 #endif /* _KERNEL || _KMEMUSER */
496 
497 
498 /*
499  * Shared structure shared between cds and statistics
500  *
501  * NOTE - this structure is visible as an ioctl result.
502  * If anything changes here _sd_get_stats() and convert_stats()
503  * will need to be changed.
504  */
505 typedef struct _sd_shared {
506 	nsc_size_t sh_filesize;		/* Filesize  (in FBAs) */
507 	volatile uchar_t sh_alloc;	/* Is this allocated? */
508 	volatile uchar_t sh_failed;	/* Disk failure status (0 == ok, */
509 					/* 1 == i/o error, 2 == open failed ) */
510 	unsigned short sh_cd;		/* the cache descriptor. (for stats) */
511 	int sh_cache_read;		/* Number of FBAs read from cache */
512 	int sh_cache_write;		/* Number of FBAs written  to cache */
513 	int sh_disk_read;		/* Number of FBAs read from disk */
514 	int sh_disk_write;		/* Number of FBAs written  to disk */
515 	volatile int sh_numdirty;	/* Number of dirty blocks */
516 	volatile int sh_numio;		/* Number of blocks on way to disk */
517 	volatile int sh_numfail;	/* Number of blocks failed */
518 	int sh_flushloop;		/* Loops delayed so far */
519 	int sh_flag;			/* Flags visible to user programs    */
520 	int sh_destaged;		/* number of bytes destaged to disk */
521 	int sh_wrcancelns;		/* number of writes to dirty blocks */
522 	char sh_filename[NSC_MAXPATH];
523 } _sd_shared_t;
524 
525 
526 #if defined(_KERNEL) || defined(_KMEMUSER)
527 
528 /*
529  * Cache descriptor information.
530  */
531 typedef struct _sd_cd_info {
532 	int cd_desc;			/* The cache descriptor		*/
533 	int cd_flag;			/* Flag				*/
534 	nsc_fd_t *cd_rawfd;		/* File descriptor for raw device */
535 	strategy_fn_t cd_strategy;	/* Cached copy of strategy func */
536 	dev_t cd_crdev;			/* The device this represents	*/
537 	nsc_iodev_t *cd_iodev;		/* I/O device for callbacks	*/
538 	kmutex_t cd_lock; 		/* spinlock guarding this cd	*/
539 	volatile uchar_t  cd_writer;	/* Disk writer status		*/
540 	unsigned int  cd_hint;		/* Hints for this descriptor	*/
541 	ss_voldata_t *cd_global;  /* RM information for this cd   */
542 	struct _sd_cctl *cd_dirty_head, *cd_dirty_tail;	/* dirty chain	*/
543 	struct _sd_cctl *cd_last_ent;	/* last entry in dirty chain, for */
544 	int cd_lastchain;		/* sequential optimization	*/
545 	struct _sd_cctl *cd_lastchain_ptr; /* last sequential chain	*/
546 	struct _sd_cctl *cd_io_head, *cd_io_tail; /* io in progress q	*/
547 	struct _sd_cctl *cd_fail_head;
548 	struct _sd_shared *cd_info;	/* shared info (filename, size)  */
549 	char cd_failover;		/* done nsc_reserve during failover */
550 	volatile char cd_recovering;    /* cd is being recovered failover or */
551 					/* disk_online */
552 	char cd_write_inprogress;
553 	struct sd_net_hnd *net_hnd;
554 } _sd_cd_info_t;
555 
556 typedef struct _sd_buf_hlist {
557 	_sd_buf_handle_t hl_top;
558 	kmutex_t hl_lock;
559 	short   hl_count;
560 } _sd_buf_hlist_t;
561 
562 #endif /* _KERNEL || _KMEMUSER */
563 
564 /*
565  * Index into the following st_mem_sizes[] array
566  */
567 #define	_SD_LOCAL_MEM 	0x00	/* type of memory to allocate */
568 #define	_SD_CACHE_MEM	0x01
569 #define	_SD_IOBUF_MEM	0x02
570 #define	_SD_HASH_MEM	0x03
571 #define	_SD_GLOBAL_MEM 	0x04
572 #define	_SD_STATS_MEM 	0x05
573 #define	_SD_MAX_MEM	_SD_STATS_MEM + 1
574 
575 /* maintain stat struct layout */
576 #define	NUM_WQ_PAD 4
577 /*
578  * cache statistics structure
579  *
580  * NOTE - if anything changes here _sd_get_stats() and convert_stats()
581  * must be changed and _sd_stats32_t must also be synchronized.
582  *
583  */
584 typedef struct _sd_stats {
585 	int net_dirty;
586 	int net_pending;
587 	int net_free;
588 	int st_count;			/* number of opens for device	*/
589 	int st_loc_count;		/* number of open devices	*/
590 	int st_rdhits;			/* number of read hits		*/
591 	int st_rdmiss;			/* number of read misses	*/
592 	int st_wrhits;			/* number of write hits		*/
593 	int st_wrmiss;			/* number of write misses	*/
594 	int st_blksize;			/* cache block size (in bytes)	*/
595 	uint_t st_lru_blocks;
596 	uint_t st_lru_noreq;
597 	uint_t st_lru_req;
598 	int st_wlru_inq;		/* number of write blocks	*/
599 	int st_cachesize;		/* cache size (in bytes)	*/
600 	int st_numblocks;		/* # of cache blocks		*/
601 	int st_wrcancelns;		/* # of write cancellations	*/
602 	int st_destaged;		/* # of bytes destaged to disk	*/
603 	_sd_shared_t st_shared[1];	/* shared structures		*/
604 } _sd_stats_t;
605 
606 typedef struct _sd_stats_32 {
607 	int net_dirty;
608 	int net_pending;
609 	int net_free;
610 	int st_count;			/* number of opens for device	*/
611 	int st_loc_count;		/* number of open devices	*/
612 	int st_rdhits;			/* number of read hits		*/
613 	int st_rdmiss;			/* number of read misses	*/
614 	int st_wrhits;			/* number of write hits		*/
615 	int st_wrmiss;			/* number of write misses	*/
616 	int st_blksize;			/* cache block size (in bytes)	*/
617 	uint_t st_lru_blocks;
618 	uint_t st_lru_noreq;
619 	uint_t st_lru_req;
620 	int st_wlru_inq;		/* number of write blocks	*/
621 	int st_cachesize;		/* cache size (in bytes)	*/
622 	int st_numblocks;		/* # of cache blocks		*/
623 	int st_wrcancelns;		/* # of write cancellations	*/
624 	int st_destaged;		/* # of bytes destaged to disk	*/
625 	_sd_shared_t st_shared[1];	/* shared structures		*/
626 } _sd_stats32_t;
627 
628 
629 #if defined(_KERNEL) || defined(_KMEMUSER)
630 
631 /*
632  * The map structure contains mapping between a mask and relevent information
633  * that would take some computation at runtime.
634  * Given a mask, what is the first LSB set (stpos)
635  * Given a mask, what are the consecutive number of LSB bits set (len)
636  * Given a mask, what would be a new mask if the consecutive LSB bits are reset
637  * Given a mask, how many ios would be needed to flush this block.
638  * Given a mask, how many buffer descriptor lists (bdls) would be needed
639  *	on a read.
640  */
641 
642 typedef struct _sd_map_info {
643 	unsigned char mi_stpos;		/* position of first LSB set	*/
644 	unsigned char mi_len;		/* Length of consecutive LSB set */
645 	unsigned char mi_dirty_count;	/* number of fragmented bits	*/
646 	unsigned char mi_io_count;	/* number of bdls for a given mask */
647 	_sd_bitmap_t  mi_mask;		/* new mask with cons. LSB's reset */
648 } _sd_map_info_t;
649 
650 
651 /*
652  * cc_inuse is set with atomic exchange instruction
653  * when clearing, must check for waiters.
654  * sd_serialize prohibits speculative reads
655  */
656 #define	CENTRY_INUSE(centry)	((centry)->cc_inuse)
657 #define	SET_CENTRY_INUSE(centry) \
658 	((centry)->cc_inuse || atomic_swap(CC_INUSE, &(centry)->cc_inuse))
659 #define	CLEAR_CENTRY_INUSE(centry) { \
660 	(centry)->cc_inuse = 0; \
661 	sd_serialize(); \
662 	if ((centry)->cc_await_use) { \
663 		mutex_enter(&(centry)->cc_lock); \
664 		cv_broadcast(&(centry)->cc_blkcv); \
665 		mutex_exit(&(centry)->cc_lock); \
666 	} \
667 }
668 
669 
670 /*
671  * cc_pageio is set with atomic exchange instruction
672  * when clearing, must check for waiters.
673  * sd_serialize prohibits speculative reads
674  */
675 #define	CENTRY_PAGEIO(centry)	((centry)->cc_pageio)
676 #define	SET_CENTRY_PAGEIO(centry) \
677 	((centry)->cc_pageio || atomic_swap(CC_PAGEIO, &(centry)->cc_pageio))
678 #define	WAIT_CENTRY_PAGEIO(centry, stat) { \
679 	while (SET_CENTRY_PAGEIO(centry)) { \
680 		(stat)++; \
681 		_sd_cc_wait(CENTRY_CD(centry), CENTRY_BLK(centry), \
682 			centry, CC_PAGEIO); \
683 	} \
684 }
685 #define	CLEAR_CENTRY_PAGEIO(centry) { \
686 	(centry)->cc_pageio = 0; \
687 	sd_serialize(); \
688 	if ((centry)->cc_await_page) { \
689 		mutex_enter(&(centry)->cc_lock); \
690 		cv_broadcast(&(centry)->cc_blkcv); \
691 		mutex_exit(&(centry)->cc_lock); \
692 	} \
693 }
694 
695 
696 #define	CENTRY_DIRTY_PENDING(centry)	((centry)->cc_flag & CC_PEND_DIRTY)
697 #define	CENTRY_PINNED(centry)	((centry)->cc_flag & CC_PINNED)
698 #define	CENTRY_PINNABLE(centry)	((centry)->cc_flag & CC_PINNABLE)
699 #define	CENTRY_QHEAD(centry)	((centry)->cc_flag & CC_QHEAD)
700 
701 #define	CENTRY_DIRTY(centry)	((centry)->cc_dirty)
702 #define	CENTRY_CD(centry)	((centry)->cc_head.hh_cd)
703 #define	CENTRY_BLK(centry)	((centry)->cc_head.hh_blk_num)
704 #define	CENTRY_IO_INPROGRESS(centry)	((centry)->cc_iostatus)
705 
706 #define	HANDLE_CD(handle)		((handle)->bh_cd)
707 
708 #endif /* _KERNEL || _KMEMUSER */
709 
710 #if defined(_KERNEL)
711 
712 #define	CENTRY_SET_FTPOS(centry) \
713 	(centry)->cc_write->sc_cd = CENTRY_CD(centry), \
714 	(centry)->cc_write->sc_fpos = CENTRY_BLK(centry)
715 
716 #define	CC_CD_BLK_MATCH(cd, blk, centry)  \
717 	(((centry)->cc_head.hh_cd == cd) && \
718 	((centry)->cc_head.hh_blk_num == blk))
719 
720 
721 #define	_SD_ZEROADDR	((ucaddr_t)(_sd_net_config.sn_zeroaddr))
722 
723 
724 #define	ASSERT_LEN(len) \
725 	if (len > _SD_MAX_FBAS) {\
726 		cmn_err(CE_WARN, \
727 		    "!sdbc(ASSERT_LEN) fba exceeds limits. fba_len %" \
728 		    NSC_SZFMT ". Max %d", len, _SD_MAX_FBAS); \
729 		return (EIO);    }
730 
731 #define	ASSERT_IO_SIZE(fba_num, fba_len, cd) \
732 	if ((fba_num + fba_len) > \
733 	    (_sd_cache_files[(cd)].cd_info->sh_filesize)) { \
734 		cmn_err(CE_WARN, \
735 		    "!sdbc(ASSERT_IO_SIZE) io beyond end of file." \
736 		    " fpos %" NSC_SZFMT " len %" NSC_SZFMT " file size 0 - %" \
737 		    NSC_SZFMT "\n", fba_num, fba_len, \
738 		    (_sd_cache_files[(cd)].cd_info->sh_filesize)); \
739 		return (EIO); \
740 	}
741 
742 
743 #define	ASSERT_HANDLE_LIMITS(m_h1, m_fpos, m_flen) \
744 	if (((m_fpos) < (m_h1)->bh_fba_pos) || \
745 	    (((m_fpos) + (m_flen)) > \
746 	    ((m_h1)->bh_fba_pos + (m_h1)->bh_fba_len))) { \
747 		cmn_err(CE_WARN, \
748 		    "!sdbc(ASSERT_HANDLE_LIMITS) operation out of bounds" \
749 		    " cd %x want %" NSC_SZFMT " to %" NSC_SZFMT ". Handle %" \
750 		    NSC_SZFMT " to %" NSC_SZFMT, HANDLE_CD(m_h1), m_fpos,\
751 		    m_flen, (m_h1)->bh_fba_pos, (m_h1)->bh_fba_len); \
752 		return (EINVAL); \
753 	}
754 
755 
756 #define	_SD_HANDLE_ACTIVE(handle)	((handle)->bh_flag & NSC_HACTIVE)
757 
758 #define	_SD_CD_HINTS(cd)	(_sd_cache_files[(cd)].cd_hint)
759 #define	_SD_NODE_HINTS		(_sd_node_hint)
760 
761 #define	_SD_SETUP_HANDLE(hndl, cd, fpos, flen, flag) { \
762 		hndl->bh_cd = cd; \
763 		hndl->bh_vec = hndl->bh_bufvec; \
764 		hndl->bh_fba_pos = fpos; \
765 		hndl->bh_fba_len = flen; \
766 		hndl->bh_busy_thread = nsc_threadp(); \
767 		if (cd == _CD_NOHASH) \
768 			hndl->bh_flag |= \
769 			    (flag | _SD_NODE_HINTS | NSC_HACTIVE); \
770 		else \
771 			hndl->bh_flag |= \
772 			    (flag | _SD_CD_HINTS(cd) | \
773 			    _SD_NODE_HINTS | NSC_HACTIVE); \
774 	}
775 
776 #define	_SD_NOT_WRTHRU(handle)  (((handle)->bh_flag & _SD_WRTHRU_MASK) == 0)
777 #define	_SD_IS_WRTHRU(handle)   ((handle)->bh_flag & _SD_WRTHRU_MASK)
778 
779 #define	FILE_OPENED(cd)	(((cd) >= 0) && ((cd) < (sdbc_max_devs)) && \
780 			(_sd_cache_files[(cd)].cd_info != NULL) && \
781 			(_sd_cache_files[(cd)].cd_info->sh_alloc \
782 			& CD_ALLOCATED))
783 
784 /*
785  * bitmap stuff
786  */
787 
788 #define	SDBC_LOOKUP_STPOS(mask)	(_sd_lookup_map[(mask)].mi_stpos)
789 #define	SDBC_LOOKUP_LEN(mask)	(_sd_lookup_map[(mask)].mi_len)
790 #define	SDBC_LOOKUP_MASK(mask)	(_sd_lookup_map[(mask)].mi_mask)
791 #define	SDBC_LOOKUP_DTCOUNT(mask) (_sd_lookup_map[(mask)].mi_dirty_count)
792 #define	SDBC_LOOKUP_IOCOUNT(mask) (_sd_lookup_map[(mask)].mi_io_count)
793 #define	SDBC_LOOKUP_MODIFY(mask) (mask &= ~(_sd_lookup_map[(mask)].mi_mask))
794 
795 #define	SDBC_IS_FRAGMENTED(bmap)	(!_sd_contig_bmap[(bmap)])
796 #define	SDBC_IS_CONTIGUOUS(bmap)	(_sd_contig_bmap[(bmap)])
797 
798 #endif /* _KERNEL */
799 
800 #if defined(_KERNEL) || defined(_KMEMUSER)
801 
802 #define	SDBC_GET_BITS(fba_off, fba_len) \
803 	(_fba_bits[(fba_len)] << (fba_off))
804 
805 #define	SDBC_SET_VALID_BITS(fba_off, fba_len, cc_entry) \
806 	(cc_entry)->cc_valid |= SDBC_GET_BITS(fba_off, fba_len)
807 
808 #define	SDBC_SET_DIRTY(fba_off, fba_len, cc_entry) { \
809 	_sd_bitmap_t dirty, newdb = SDBC_GET_BITS(fba_off, fba_len); \
810 	ss_centry_info_t *gl = (cc_entry)->cc_write; \
811 	(cc_entry)->cc_valid |= newdb; \
812 	dirty = ((cc_entry)->cc_dirty |= newdb);  \
813 	gl->sc_dirty = dirty; \
814 	gl->sc_flag = (int)(cc_entry)->cc_flag;	\
815 	SSOP_SETCENTRY(sdbc_safestore, gl); }
816 
817 #define	SDBC_SET_TOFLUSH(fba_off, fba_len, cc_entry) { \
818 	_sd_bitmap_t dirty, newdb = SDBC_GET_BITS(fba_off, fba_len); \
819 	ss_centry_info_t *gl = (cc_entry)->cc_write; \
820 	(cc_entry)->cc_toflush |= newdb; \
821 	(cc_entry)->cc_valid |= newdb;  \
822 	dirty = (cc_entry)->cc_toflush | (cc_entry)->cc_dirty; \
823 	gl->sc_dirty = dirty;	\
824 	SSOP_SETCENTRY(sdbc_safestore, gl); }
825 
826 #define	SDBC_VALID_BITS(fba_off, fba_len, cc_entry) \
827 	((((cc_entry)->cc_valid) & (SDBC_GET_BITS(fba_off, fba_len))) \
828 	== (SDBC_GET_BITS(fba_off, fba_len)))
829 
830 
831 #define	SDBC_DIRTY_NEIGHBORS(last, next) \
832 	((SDBC_IS_CONTIGUOUS((last)->cc_dirty)) && \
833 	(SDBC_IS_CONTIGUOUS((next)->cc_dirty)) && \
834 (((last)->cc_dirty & (1 << (BLK_FBAS - 1))) && ((next)->cc_dirty & 0x01)))
835 
836 
837 #define	FULLY_VALID(cc_entry)	((cc_entry)->cc_valid == BLK_FBA_BITS)
838 #define	SET_FULLY_VALID(cc_entry) \
839 	((cc_entry)->cc_valid = BLK_FBA_BITS)
840 
841 #define	FULLY_DIRTY(cc_entry)   ((cc_entry)->cc_dirty == BLK_FBA_BITS)
842 
843 #define	_SD_BIT_ISSET(bmap, bit) 	((bmap & (1 << bit)) ? 1 : 0)
844 #define	_SD_BMAP_ISFULL(bmap)		(bmap == BLK_FBA_BITS)
845 
846 #endif /* _KERNEL || _KMEMUSER */
847 
848 #if defined(_KERNEL)
849 
850 #if !defined(_SD_NOSTATS)
851 #define	CACHE_FBA_READ(cd, blks) \
852 	if (((cd) >= 0) && ((cd) < sdbc_max_devs))\
853 		_sd_cache_stats->st_shared[(cd)].sh_cache_read += (blks)
854 #define	DISK_FBA_READ(cd, blks) \
855 	if (((cd) >= 0) && ((cd) < sdbc_max_devs))\
856 		_sd_cache_stats->st_shared[(cd)].sh_disk_read += (blks)
857 #define	CACHE_FBA_WRITE(cd, blks) \
858 	if (((cd) >= 0) && ((cd) < sdbc_max_devs))\
859 		_sd_cache_stats->st_shared[(cd)].sh_cache_write += (blks)
860 #define	DISK_FBA_WRITE(cd, blks) \
861 	if (((cd) >= 0) && ((cd) < sdbc_max_devs))\
862 		_sd_cache_stats->st_shared[(cd)].sh_disk_write += (blks)
863 #define	CACHE_READ_HIT		_sd_cache_stats->st_rdhits++
864 #define	CACHE_READ_MISS		_sd_cache_stats->st_rdmiss++
865 #define	CACHE_WRITE_HIT		_sd_cache_stats->st_wrhits++
866 #define	CACHE_WRITE_MISS 	_sd_cache_stats->st_wrmiss++
867 
868 #define	CACHE_WRITE_CANCELLATION(cd) {\
869 	if ((cd) < sdbc_max_devs)\
870 		_sd_cache_stats->st_shared[(cd)].sh_wrcancelns++;\
871 	_sd_cache_stats->st_wrcancelns++;\
872 }
873 
874 #define	WRITE_DESTAGED(cd, bytes) {\
875 	if (((cd) >= 0) && ((cd) < sdbc_max_devs))\
876 		_sd_cache_stats->st_shared[(cd)].sh_destaged += (bytes);\
877 	_sd_cache_stats->st_destaged += (bytes);\
878 }
879 
880 #define	FBA_READ_IO_KSTATS(cd, bytes) {\
881 	if (((cd) >= 0) && ((cd) < sdbc_max_devs) && sdbc_cd_io_kstats[(cd)]) {\
882 		KSTAT_IO_PTR(sdbc_cd_io_kstats[(cd)])->reads++;\
883 		KSTAT_IO_PTR(sdbc_cd_io_kstats[(cd)])->nread += (bytes);\
884 	}\
885 	if (sdbc_global_io_kstat) {\
886 		KSTAT_IO_PTR(sdbc_global_io_kstat)->reads++;\
887 		KSTAT_IO_PTR(sdbc_global_io_kstat)->nread += (bytes);\
888 	}\
889 }
890 
891 #define	FBA_WRITE_IO_KSTATS(cd, bytes) {\
892 	if (((cd) >= 0) && ((cd) < sdbc_max_devs) && sdbc_cd_io_kstats[(cd)]) {\
893 		KSTAT_IO_PTR(sdbc_cd_io_kstats[(cd)])->writes++;\
894 		KSTAT_IO_PTR(sdbc_cd_io_kstats[(cd)])->nwritten += (bytes);\
895 	}\
896 	if (sdbc_global_io_kstat) {\
897 		KSTAT_IO_PTR(sdbc_global_io_kstat)->writes++;\
898 		KSTAT_IO_PTR(sdbc_global_io_kstat)->nwritten += (bytes);\
899 	}\
900 }
901 
902 /* start timer measuring amount of time spent in the cache */
903 #define	KSTAT_RUNQ_ENTER(cd) {\
904 	if (((cd) >= 0) && ((cd) < sdbc_max_devs) && \
905 	    sdbc_cd_io_kstats[(cd)] && sdbc_cd_io_kstats_mutexes) {\
906 		mutex_enter(sdbc_cd_io_kstats[(cd)]->ks_lock);\
907 		kstat_runq_enter(KSTAT_IO_PTR(sdbc_cd_io_kstats[(cd)]));\
908 		mutex_exit(sdbc_cd_io_kstats[(cd)]->ks_lock);\
909 	}\
910 	if (sdbc_global_io_kstat) {\
911 		mutex_enter(sdbc_global_io_kstat->ks_lock);\
912 		kstat_runq_enter(KSTAT_IO_PTR(sdbc_global_io_kstat));\
913 		mutex_exit(sdbc_global_io_kstat->ks_lock);\
914 	}\
915 }
916 
917 /* stop timer measuring amount of time spent in the cache */
918 #define	KSTAT_RUNQ_EXIT(cd) {\
919 	if (((cd) >= 0) && ((cd) < sdbc_max_devs) && \
920 	    sdbc_cd_io_kstats[(cd)] && sdbc_cd_io_kstats_mutexes) {\
921 		mutex_enter(sdbc_cd_io_kstats[(cd)]->ks_lock);\
922 		kstat_runq_exit(KSTAT_IO_PTR(sdbc_cd_io_kstats[(cd)]));\
923 		mutex_exit(sdbc_cd_io_kstats[(cd)]->ks_lock);\
924 	}\
925 	if (sdbc_global_io_kstat) {\
926 		mutex_enter(sdbc_global_io_kstat->ks_lock);\
927 		kstat_runq_exit(KSTAT_IO_PTR(sdbc_global_io_kstat));\
928 		mutex_exit(sdbc_global_io_kstat->ks_lock);\
929 	}\
930 }
931 
932 #else
933 #define	CACHE_FBA_READ(cd, blks)
934 #define	DISK_FBA_READ(cd, blks)
935 #define	CACHE_FBA_WRITE(cd, blks)
936 #define	DISK_FBA_WRITE(cd, blks)
937 #define	CACHE_READ_HIT
938 #define	CACHE_READ_MISS
939 #define	CACHE_WRITE_HIT
940 #define	CACHE_WRITE_MISS
941 #define	CACHE_WRITE_CANCELLATION(cd)
942 #define	WRITE_DESTAGED(cd, bytes)
943 #endif
944 
945 #endif /* _KERNEL */
946 
947 /* defines for sh_alloc */
948 
949 #define	CD_ALLOC_IN_PROGRESS 	0x0001
950 #define	CD_ALLOCATED		0x0002
951 #define	CD_CLOSE_IN_PROGRESS	0x0010
952 
953 /* defines for sh_flag */
954 
955 #define	CD_ATTACHED		0x0001
956 
957 #ifdef _KERNEL
958 
959 typedef void (*sdbc_ea_fn_t) (blind_t, nsc_off_t, nsc_size_t, int);
960 
961 #define	_SD_DISCONNECT_CALLBACK(hndl)	\
962 	if ((hndl)->bh_disconnect_cb) { \
963 		SDTRACE(SDF_DISCONNECT, (hndl)->bh_cd, (hndl)->bh_fba_len, \
964 			(hndl)->bh_fba_pos, (hndl)->bh_flag, 0); \
965 		((*((hndl)->bh_disconnect_cb))(hndl)); \
966 	}
967 #define	_SD_READ_CALLBACK(hndl)	\
968 	if ((hndl)->bh_read_cb) \
969 	    ((*((hndl)->bh_read_cb))(hndl)); \
970 	else cmn_err(CE_WARN, \
971 	    "!sdbc(_SD_READ_CALLBACK) not registered. io lost");
972 #define	_SD_WRITE_CALLBACK(hndl)	\
973 	if ((hndl)->bh_write_cb) \
974 		((*((hndl)->bh_write_cb))(hndl)); \
975 	else cmn_err(CE_WARN, \
976 	    "!sdbc(_SD_WRITE_CALLBACK) not registered. io lost");
977 
978 #endif /* _KERNEL */
979 
980 
981 #if defined(_SD_LRU_OPTIMIZE)
982 /*
983  * Do not requeue if we fall into the tail 25% of the lru
984  */
985 #define	LRU_REQ_LIMIT(q) 	(q->sq_inq >> 2)
986 
987 #define	_sd_lru_reinsert(q, ent) \
988 	(((q->sq_seq - ent->cc_seq) > LRU_REQ_LIMIT(q)) ?\
989 	1 : ((q->sq_noreq_stat)++, 0))
990 #else
991 #define	_sd_lru_reinsert(ent) 1
992 #endif
993 
994 #if defined(_KERNEL)
995 #define	SD_WR_NUMIO 	100
996 #define	SD_DCON_THRESH	0x10000	/* Disconnect if io len greater than 64 */
997 
998 /*
999  * These defines are the hardwired values after sd_config_param was
1000  * zapped. Ought to remove the use of these entirely ....
1001  */
1002 
1003 #define	_SD_CD_WRITER(cd)	((_sd_cache_files[(cd)].cd_info->sh_numdirty>\
1004 				SD_WR_NUMIO) ? \
1005 				cd_writer(cd) : 0)
1006 #define	_SD_FORCE_DISCONNECT(len)	(SD_DCON_THRESH < FBA_SIZE(len))
1007 
1008 /* -------------------------------- END sd_config_param defines ---------- */
1009 
1010 #define	_SD_CD_WBLK_USED(cd)	(_sd_cache_stats->st_shared[(cd)].sh_numio +\
1011 				_sd_cache_stats->st_shared[(cd)].sh_numdirty)
1012 
1013 #define	_SD_CD_ALL_WRITES(cd)	(_sd_cache_stats->st_shared[(cd)].sh_numio +\
1014 				_sd_cache_stats->st_shared[(cd)].sh_numdirty+\
1015 				_sd_cache_stats->st_shared[(cd)].sh_numfail)
1016 
1017 
1018 
1019 /*
1020  * ncall usage
1021  */
1022 #define	SD_ENABLE		(NCALL_SDBC +  0)
1023 #define	SD_DISABLE		(NCALL_SDBC +  1)
1024 #define	SD_DUAL_WRITE		(NCALL_SDBC +  2)
1025 #define	SD_DUAL_READ		(NCALL_SDBC +  3)
1026 #define	SD_SET_CD		(NCALL_SDBC +  4)
1027 #define	SD_GETSIZE		(NCALL_SDBC +  5)
1028 #define	SD_DUAL_OPEN		(NCALL_SDBC +  6)
1029 #define	SD_REMOTE_FLUSH		(NCALL_SDBC +  7)
1030 #define	SD_SGREMOTE_FLUSH	(NCALL_SDBC +  8)
1031 #define	SD_DISK_IO		(NCALL_SDBC +  9)
1032 #define	SD_GET_BMAP		(NCALL_SDBC + 10)
1033 #define	SD_CD_DISCARD		(NCALL_SDBC + 11)
1034 #define	SD_PING			(NCALL_SDBC + 12)
1035 #define	SD_DC_MAIN_LOOP		(NCALL_SDBC + 13)
1036 #define	SD_DATA			(NCALL_SDBC + 14)
1037 #define	SD_BDATA		(NCALL_SDBC + 15)
1038 #define	SD_UPDATE		(NCALL_SDBC + 16)
1039 #define	SD_GET_SYSID		(NCALL_SDBC + 17)
1040 
1041 #ifdef lint
1042 #include <sys/nsctl/nsctl.h>
1043 #define	LINTUSED(x)	(void)(x)++
1044 #else
1045 #define	LINTUSED(x)
1046 #endif
1047 
1048 
1049 extern int BLK_FBAS;
1050 extern _sd_bitmap_t BLK_FBA_BITS;
1051 extern _sd_bitmap_t _fba_bits[];
1052 extern _sd_cctl_t	*_sd_cctl[];
1053 extern _sd_cd_info_t	*_sd_cache_files;
1054 extern _sd_hash_table_t *_sd_htable;
1055 extern _sd_map_info_t _sd_lookup_map[];
1056 extern _sd_net_t	 _sd_net_config;
1057 extern _sd_queue_t _sd_lru_q;
1058 extern _sd_stats_t *_sd_cache_stats;
1059 extern char _sd_contig_bmap[];
1060 extern int CACHE_BLOCK_SIZE;
1061 extern int CBLOCKS;
1062 extern int _sd_cctl_groupsz;
1063 extern int sdbc_static_cache;
1064 extern kmutex_t _sd_cache_lock;
1065 extern nsc_def_t _sd_sdbc_def[];
1066 extern nsc_io_t *sdbc_io;
1067 extern nsc_mem_t *sdbc_iobuf_mem, *sdbc_hash_mem;
1068 extern uint_t _sd_node_hint;
1069 extern int _sd_minidsp;
1070 extern krwlock_t sdbc_queue_lock;
1071 extern safestore_ops_t *sdbc_safestore;
1072 extern ss_common_config_t safestore_config;
1073 extern ss_voldata_t *_sdbc_gl_file_info;
1074 
1075 extern int _sdbc_cache_configure(int cblocks, spcs_s_info_t kstatus);
1076 extern void _sdbc_cache_deconfigure(void);
1077 extern void _sd_requeue(_sd_cctl_t *centry);
1078 extern void _sd_requeue_head(_sd_cctl_t *centry);
1079 extern int _sd_open(char *filename, int flag);
1080 extern int _sd_open_cd(char *filename, const int cd, const int flag);
1081 extern int _sd_close(int cd);
1082 extern int _sdbc_remote_store_pinned(int cd);
1083 extern int _sdbc_io_attach_cd(blind_t xcd);
1084 extern int _sdbc_io_detach_cd(blind_t xcd);
1085 extern int _sd_get_pinned(blind_t cd);
1086 extern void _sd_cc_copy(_sd_cctl_t *cc_real, _sd_cctl_t *cc_shadow);
1087 extern _sd_buf_handle_t *_sd_allocate_buf(int cd, nsc_off_t fba_pos,
1088     nsc_size_t fba_len, int flag, int *sts);
1089 extern void _sd_cc_wait(int cd, nsc_off_t cblk, _sd_cctl_t *centry, int flag);
1090 extern int _sd_alloc_buf(blind_t xcd, nsc_off_t fba_pos, nsc_size_t fba_len,
1091     int flag, _sd_buf_handle_t **handle_p);
1092 extern int _sd_free_buf(_sd_buf_handle_t *handle);
1093 extern _sd_cctl_t *_sd_centry_alloc(int, int, int *, int, int);
1094 extern int _sd_centry_setup_dm(_sd_cctl_t *, int, int);
1095 extern void _sdbc_dealloc_deconfigure_dm(void);
1096 extern int _sdbc_dealloc_configure_dm(void);
1097 extern _sd_cctl_t *_sd_shadow_centry(_sd_cctl_t *, _sd_cctl_t *, int, int, int);
1098 extern void _sd_centry_release(_sd_cctl_t *centry);
1099 extern int _sd_alloc_write(_sd_cctl_t *centry, int *stall);
1100 extern int _sd_read(_sd_buf_handle_t *handle, nsc_off_t fba_pos,
1101     nsc_size_t fba_len, int flag);
1102 extern void _sd_read_complete(_sd_buf_handle_t *handle, nsc_off_t fba_pos,
1103     nsc_size_t fba_len, int error);
1104 extern int _sd_write(_sd_buf_handle_t *handle, nsc_off_t fba_pos,
1105     nsc_size_t fba_len, int flag);
1106 extern int _sd_zero(_sd_buf_handle_t *handle, nsc_off_t fba_pos,
1107     nsc_size_t fba_len, int flag);
1108 extern int _sd_copy(_sd_buf_handle_t *handle1, _sd_buf_handle_t *handle2,
1109     nsc_off_t fba_pos1, nsc_off_t fba_pos2, nsc_size_t fba_len);
1110 extern void _sd_enqueue_dirty(int cd, _sd_cctl_t *chain, _sd_cctl_t *cc_last,
1111     int numq);
1112 extern void _sd_enqueue_dirty_chain(int cd, _sd_cctl_t *chain_first,
1113     _sd_cctl_t *chain_last, int numq);
1114 extern int _sd_get_stats(_sd_stats_t *uptr, int convert_32);
1115 extern int _sd_set_hint(int cd, uint_t hint);
1116 extern int _sd_clear_hint(int cd, uint_t hint);
1117 extern int _sd_get_cd_hint(int cd, uint_t *hint);
1118 extern int _sd_set_node_hint(uint_t hint);
1119 extern int _sd_clear_node_hint(uint_t hint);
1120 extern int _sd_get_node_hint(uint_t *hint);
1121 extern int _sd_get_partsize(blind_t cd, nsc_size_t *ptr);
1122 extern int _sd_get_maxfbas(blind_t cd, int flag, nsc_size_t *ptr);
1123 extern int _sd_discard_pinned(blind_t cd, nsc_off_t fba_pos,
1124     nsc_size_t fba_len);
1125 extern void _sdbc_handles_unload(void);
1126 extern int _sdbc_handles_load(void);
1127 extern int _sdbc_handles_configure();
1128 extern void _sdbc_handles_deconfigure(void);
1129 extern _sd_buf_handle_t *_sd_alloc_handle(sdbc_callback_fn_t d_cb,
1130     sdbc_callback_fn_t r_cb, sdbc_callback_fn_t w_cb);
1131 extern int _sd_free_handle(_sd_buf_handle_t *handle);
1132 extern void _sd_init_contig_bmap(void);
1133 extern void _sd_init_lookup_map(void);
1134 extern int sd_get_file_info_size(void *uaddrp);
1135 extern int sd_get_file_info_data(char *uaddrp);
1136 extern int sd_get_glmul_sizes(int *uaddrp);
1137 extern int sd_get_glmul_info(char *uaddrp);
1138 extern _sd_cctl_t *sdbc_centry_alloc(int, nsc_off_t, nsc_size_t, int *,
1139 					sdbc_allocbuf_t *, int);
1140 extern _sd_cctl_t *sdbc_centry_alloc_blks(int, nsc_off_t, nsc_size_t, int);
1141 extern int _sdbc_ft_hold_io;
1142 extern kcondvar_t _sdbc_ft_hold_io_cv;
1143 extern kmutex_t _sdbc_ft_hold_io_lk;
1144 
1145 #ifdef DEBUG
1146 /* for testing only */
1147 extern int _sdbc_flush_flag; /* inhibit flush for testing */
1148 extern int _sdbc_clear_ioerr(int);
1149 extern int _sdbc_inject_ioerr(int, int, int);
1150 extern void _sdbc_ioj_set_dev(int, dev_t);
1151 extern void _sdbc_ioj_load();
1152 extern void _sdbc_ioj_unload();
1153 #endif
1154 
1155 #endif /* _KERNEL */
1156 
1157 #ifdef __cplusplus
1158 }
1159 #endif
1160 
1161 #endif /* _SD_BCACHE_H */
1162