xref: /titanic_50/usr/src/cmd/sendmail/db/include/mp.h (revision 9cd928fe5e3ea4e05f64cfb380beb54b2623e7dc)
1 /*-
2  * See the file LICENSE for redistribution information.
3  *
4  * Copyright (c) 1996, 1997, 1998
5  *	Sleepycat Software.  All rights reserved.
6  *
7  *	@(#)mp.h	10.37 (Sleepycat) 1/1/99
8  */
9 
10 struct __bh;		typedef struct __bh BH;
11 struct __db_mpreg;	typedef struct __db_mpreg DB_MPREG;
12 struct __mpool;		typedef struct __mpool MPOOL;
13 struct __mpoolfile;	typedef struct __mpoolfile MPOOLFILE;
14 
15 					/* Default mpool name. */
16 #define	DB_DEFAULT_MPOOL_FILE	"__db_mpool.share"
17 
18 /*
19  * We default to 256K (32 8K pages) if the user doesn't specify, and
20  * require a minimum of 20K.
21  */
22 #ifndef	DB_CACHESIZE_DEF
23 #define	DB_CACHESIZE_DEF	(256 * 1024)
24 #endif
25 #define	DB_CACHESIZE_MIN	( 20 * 1024)
26 
27 #define	INVALID		0		/* Invalid shared memory offset. */
28 
29 /*
30  * There are three ways we do locking in the mpool code:
31  *
32  * Locking a handle mutex to provide concurrency for DB_THREAD operations.
33  * Locking the region mutex to provide mutual exclusion while reading and
34  *    writing structures in the shared region.
35  * Locking buffer header mutexes during I/O.
36  *
37  * The first will not be further described here.  We use the shared mpool
38  * region lock to provide mutual exclusion while reading/modifying all of
39  * the data structures, including the buffer headers.  We use a per-buffer
40  * header lock to wait on buffer I/O.  The order of locking is as follows:
41  *
42  * Searching for a buffer:
43  *	Acquire the region lock.
44  *	Find the buffer header.
45  *	Increment the reference count (guarantee the buffer stays).
46  *	While the BH_LOCKED flag is set (I/O is going on) {
47  *	    Release the region lock.
48  *		Explicitly yield the processor if it's not the first pass
49  *		through this loop, otherwise, we can simply spin because
50  *		we'll be simply switching between the two locks.
51  *	    Request the buffer lock.
52  *	    The I/O will complete...
53  *	    Acquire the buffer lock.
54  *	    Release the buffer lock.
55  *	    Acquire the region lock.
56  *	}
57  *	Return the buffer.
58  *
59  * Reading/writing a buffer:
60  *	Acquire the region lock.
61  *	Find/create the buffer header.
62  *	If reading, increment the reference count (guarantee the buffer stays).
63  *	Set the BH_LOCKED flag.
64  *	Acquire the buffer lock (guaranteed not to block).
65  *	Release the region lock.
66  *	Do the I/O and/or initialize the buffer contents.
67  *	Release the buffer lock.
68  *	    At this point, the buffer lock is available, but the logical
69  *	    operation (flagged by BH_LOCKED) is not yet completed.  For
70  *	    this reason, among others, threads checking the BH_LOCKED flag
71  *	    must loop around their test.
72  *	Acquire the region lock.
73  *	Clear the BH_LOCKED flag.
74  *	Release the region lock.
75  *	Return/discard the buffer.
76  *
77  * Pointers to DB_MPOOL, MPOOL, DB_MPOOLFILE and MPOOLFILE structures are not
78  * reacquired when a region lock is reacquired because they couldn't have been
79  * closed/discarded and because they never move in memory.
80  */
81 #define	LOCKINIT(dbmp, mutexp)						\
82 	if (F_ISSET(dbmp, MP_LOCKHANDLE | MP_LOCKREGION))		\
83 		(void)__db_mutex_init(mutexp,				\
84 		    MUTEX_LOCK_OFFSET((dbmp)->reginfo.addr, mutexp))
85 
86 #define	LOCKHANDLE(dbmp, mutexp)					\
87 	if (F_ISSET(dbmp, MP_LOCKHANDLE))				\
88 		(void)__db_mutex_lock(mutexp, (dbmp)->reginfo.fd)
89 #define	UNLOCKHANDLE(dbmp, mutexp)					\
90 	if (F_ISSET(dbmp, MP_LOCKHANDLE))				\
91 		(void)__db_mutex_unlock(mutexp, (dbmp)->reginfo.fd)
92 
93 #define	LOCKREGION(dbmp)						\
94 	if (F_ISSET(dbmp, MP_LOCKREGION))				\
95 		(void)__db_mutex_lock(&((RLAYOUT *)(dbmp)->mp)->lock,	\
96 		    (dbmp)->reginfo.fd)
97 #define	UNLOCKREGION(dbmp)						\
98 	if (F_ISSET(dbmp, MP_LOCKREGION))				\
99 		(void)__db_mutex_unlock(&((RLAYOUT *)(dbmp)->mp)->lock,	\
100 		(dbmp)->reginfo.fd)
101 
102 #define	LOCKBUFFER(dbmp, bhp)						\
103 	if (F_ISSET(dbmp, MP_LOCKREGION))				\
104 		(void)__db_mutex_lock(&(bhp)->mutex, (dbmp)->reginfo.fd)
105 #define	UNLOCKBUFFER(dbmp, bhp)						\
106 	if (F_ISSET(dbmp, MP_LOCKREGION))				\
107 		(void)__db_mutex_unlock(&(bhp)->mutex, (dbmp)->reginfo.fd)
108 
109 /* Check for region catastrophic shutdown. */
110 #define	MP_PANIC_CHECK(dbmp) {						\
111 	if ((dbmp)->mp->rlayout.panic)					\
112 		return (DB_RUNRECOVERY);				\
113 }
114 
115 /*
116  * DB_MPOOL --
117  *	Per-process memory pool structure.
118  */
119 struct __db_mpool {
120 /* These fields need to be protected for multi-threaded support. */
121 	db_mutex_t	*mutexp;	/* Structure lock. */
122 
123 					/* List of pgin/pgout routines. */
124 	LIST_HEAD(__db_mpregh, __db_mpreg) dbregq;
125 
126 					/* List of DB_MPOOLFILE's. */
127 	TAILQ_HEAD(__db_mpoolfileh, __db_mpoolfile) dbmfq;
128 
129 /* These fields are not protected. */
130 	DB_ENV     *dbenv;		/* Reference to error information. */
131 	REGINFO	    reginfo;		/* Region information. */
132 
133 	MPOOL	   *mp;			/* Address of the shared MPOOL. */
134 
135 	void	   *addr;		/* Address of shalloc() region. */
136 
137 	DB_HASHTAB *htab;		/* Hash table of bucket headers. */
138 
139 #define	MP_LOCKHANDLE	0x01		/* Threaded, lock handles and region. */
140 #define	MP_LOCKREGION	0x02		/* Concurrent access, lock region. */
141 	u_int32_t  flags;
142 };
143 
144 /*
145  * DB_MPREG --
146  *	DB_MPOOL registry of pgin/pgout functions.
147  */
148 struct __db_mpreg {
149 	LIST_ENTRY(__db_mpreg) q;	/* Linked list. */
150 
151 	int ftype;			/* File type. */
152 					/* Pgin, pgout routines. */
153 	int (DB_CALLBACK *pgin) __P((db_pgno_t, void *, DBT *));
154 	int (DB_CALLBACK *pgout) __P((db_pgno_t, void *, DBT *));
155 };
156 
157 /*
158  * DB_MPOOLFILE --
159  *	Per-process DB_MPOOLFILE information.
160  */
161 struct __db_mpoolfile {
162 /* These fields need to be protected for multi-threaded support. */
163 	db_mutex_t	*mutexp;	/* Structure lock. */
164 
165 	int	   fd;			/* Underlying file descriptor. */
166 
167 	u_int32_t ref;			/* Reference count. */
168 
169 	/*
170 	 * !!!
171 	 * This field is a special case -- it's protected by the region lock
172 	 * NOT the thread lock.  The reason for this is that we always have
173 	 * the region lock immediately before or after we modify the field,
174 	 * and we don't want to use the structure lock to protect it because
175 	 * then I/O (which is done with the structure lock held because of
176 	 * the race between the seek and write of the file descriptor) will
177 	 * block any other put/get calls using this DB_MPOOLFILE structure.
178 	 */
179 	u_int32_t pinref;		/* Pinned block reference count. */
180 
181 /* These fields are not protected. */
182 	TAILQ_ENTRY(__db_mpoolfile) q;	/* Linked list of DB_MPOOLFILE's. */
183 
184 	DB_MPOOL  *dbmp;		/* Overlying DB_MPOOL. */
185 	MPOOLFILE *mfp;			/* Underlying MPOOLFILE. */
186 
187 	void	  *addr;		/* Address of mmap'd region. */
188 	size_t	   len;			/* Length of mmap'd region. */
189 
190 /* These fields need to be protected for multi-threaded support. */
191 #define	MP_READONLY	0x01		/* File is readonly. */
192 #define	MP_UPGRADE	0x02		/* File descriptor is readwrite. */
193 #define	MP_UPGRADE_FAIL	0x04		/* Upgrade wasn't possible. */
194 	u_int32_t  flags;
195 };
196 
197 /*
198  * MPOOL --
199  *	Shared memory pool region.  One of these is allocated in shared
200  *	memory, and describes the pool.
201  */
202 struct __mpool {
203 	RLAYOUT	    rlayout;		/* General region information. */
204 
205 	SH_TAILQ_HEAD(__bhq) bhq;	/* LRU list of buckets. */
206 	SH_TAILQ_HEAD(__bhfq) bhfq;	/* Free buckets. */
207 	SH_TAILQ_HEAD(__mpfq) mpfq;	/* List of MPOOLFILEs. */
208 
209 	/*
210 	 * We make the assumption that the early pages of the file are far
211 	 * more likely to be retrieved than the later pages, which means
212 	 * that the top bits are more interesting for hashing since they're
213 	 * less likely to collide.  On the other hand, since 512 4K pages
214 	 * represents a 2MB file, only the bottom 9 bits of the page number
215 	 * are likely to be set.  We XOR in the offset in the MPOOL of the
216 	 * MPOOLFILE that backs this particular page, since that should also
217 	 * be unique for the page.
218 	 */
219 #define	BUCKET(mp, mf_offset, pgno)					\
220 	(((pgno) ^ ((mf_offset) << 9)) % (mp)->htab_buckets)
221 
222 	size_t	    htab;		/* Hash table offset. */
223 	size_t	    htab_buckets;	/* Number of hash table entries. */
224 
225 	DB_LSN	    lsn;		/* Maximum checkpoint LSN. */
226 	u_int32_t   lsn_cnt;		/* Checkpoint buffers left to write. */
227 
228 	DB_MPOOL_STAT stat;		/* Global mpool statistics. */
229 
230 #define	MP_LSN_RETRY	0x01		/* Retry all BH_WRITE buffers. */
231 	u_int32_t  flags;
232 };
233 
234 /*
235  * MPOOLFILE --
236  *	Shared DB_MPOOLFILE information.
237  */
238 struct __mpoolfile {
239 	SH_TAILQ_ENTRY  q;		/* List of MPOOLFILEs */
240 
241 	u_int32_t ref;			/* Reference count. */
242 
243 	int	  ftype;		/* File type. */
244 
245 	int32_t	  lsn_off;		/* Page's LSN offset. */
246 	u_int32_t clear_len;		/* Bytes to clear on page create. */
247 
248 	size_t	  path_off;		/* File name location. */
249 	size_t	  fileid_off;		/* File identification location. */
250 
251 	size_t	  pgcookie_len;		/* Pgin/pgout cookie length. */
252 	size_t	  pgcookie_off;		/* Pgin/pgout cookie location. */
253 
254 	u_int32_t lsn_cnt;		/* Checkpoint buffers left to write. */
255 
256 	db_pgno_t last_pgno;		/* Last page in the file. */
257 	db_pgno_t orig_last_pgno;	/* Original last page in the file. */
258 
259 #define	MP_CAN_MMAP	0x01		/* If the file can be mmap'd. */
260 #define	MP_TEMP		0x02		/* Backing file is a temporary. */
261 	u_int32_t  flags;
262 
263 	DB_MPOOL_FSTAT stat;		/* Per-file mpool statistics. */
264 };
265 
266 /*
267  * BH --
268  *	Buffer header.
269  */
270 struct __bh {
271 	db_mutex_t	mutex;		/* Structure lock. */
272 
273 	u_int16_t	ref;		/* Reference count. */
274 
275 #define	BH_CALLPGIN	0x001		/* Page needs to be reworked... */
276 #define	BH_DIRTY	0x002		/* Page was modified. */
277 #define	BH_DISCARD	0x004		/* Page is useless. */
278 #define	BH_LOCKED	0x008		/* Page is locked (I/O in progress). */
279 #define	BH_TRASH	0x010		/* Page is garbage. */
280 #define	BH_WRITE	0x020		/* Page scheduled for writing. */
281 	u_int16_t  flags;
282 
283 	SH_TAILQ_ENTRY	q;		/* LRU queue. */
284 	SH_TAILQ_ENTRY	hq;		/* MPOOL hash bucket queue. */
285 
286 	db_pgno_t pgno;			/* Underlying MPOOLFILE page number. */
287 	size_t	  mf_offset;		/* Associated MPOOLFILE offset. */
288 
289 	/*
290 	 * !!!
291 	 * This array must be size_t aligned -- the DB access methods put PAGE
292 	 * and other structures into it, and expect to be able to access them
293 	 * directly.  (We guarantee size_t alignment in the db_mpool(3) manual
294 	 * page as well.)
295 	 */
296 	u_int8_t   buf[1];		/* Variable length data. */
297 };
298 
299 #include "mp_ext.h"
300