xref: /titanic_52/usr/src/cmd/sendmail/db/db_int.h (revision 0eb822a1c0c2bea495647510b75f77f0e57633eb)
1 /*-
2  * See the file LICENSE for redistribution information.
3  *
4  * Copyright (c) 1996, 1997, 1998
5  *	Sleepycat Software.  All rights reserved.
6  *
7  *	@(#)db_int.h	10.77 (Sleepycat) 1/3/99
8  */
9 
10 #ifndef _DB_INTERNAL_H_
11 #define	_DB_INTERNAL_H_
12 
13 #include "db.h"				/* Standard DB include file. */
14 #include "queue.h"
15 #include "shqueue.h"
16 
17 /*******************************************************
18  * General purpose constants and macros.
19  *******************************************************/
20 #define	UINT16_T_MAX	    0xffff	/* Maximum 16 bit unsigned. */
21 #define	UINT32_T_MAX	0xffffffff	/* Maximum 32 bit unsigned. */
22 
23 #define	DB_MIN_PGSIZE	0x000200	/* Minimum page size. */
24 #define	DB_MAX_PGSIZE	0x010000	/* Maximum page size. */
25 
26 #define	DB_MINCACHE	10		/* Minimum cached pages */
27 
28 #define	MEGABYTE	1048576
29 
30 /*
31  * If we are unable to determine the underlying filesystem block size, use
32  * 8K on the grounds that most OS's use less than 8K as their VM page size.
33  */
34 #define	DB_DEF_IOSIZE	(8 * 1024)
35 
36 /*
37  * Aligning items to particular sizes or in pages or memory.  ALIGNP is a
38  * separate macro, as we've had to cast the pointer to different integral
39  * types on different architectures.
40  *
41  * We cast pointers into unsigned longs when manipulating them because C89
42  * guarantees that u_long is the largest available integral type and further,
43  * to never generate overflows.  However, neither C89 or C9X  requires that
44  * any integer type be large enough to hold a pointer, although C9X created
45  * the intptr_t type, which is guaranteed to hold a pointer but may or may
46  * not exist.  At some point in the future, we should test for intptr_t and
47  * use it where available.
48  */
49 #undef	ALIGNTYPE
50 #define	ALIGNTYPE		u_long
51 #undef	ALIGNP
52 #define	ALIGNP(value, bound)	ALIGN((ALIGNTYPE)value, bound)
53 #undef	ALIGN
54 #define	ALIGN(value, bound)	(((value) + (bound) - 1) & ~((bound) - 1))
55 
56 /*
57  * There are several on-page structures that are declared to have a number of
58  * fields followed by a variable length array of items.  The structure size
59  * without including the variable length array or the address of the first of
60  * those elements can be found using SSZ.
61  *
62  * This macro can also be used to find the offset of a structure element in a
63  * structure.  This is used in various places to copy structure elements from
64  * unaligned memory references, e.g., pointers into a packed page.
65  *
66  * There are two versions because compilers object if you take the address of
67  * an array.
68  */
69 #undef	SSZ
70 #define SSZ(name, field)	((int)&(((name *)0)->field))
71 
72 #undef	SSZA
73 #define SSZA(name, field)	((int)&(((name *)0)->field[0]))
74 
75 /* Macros to return per-process address, offsets based on shared regions. */
76 #define	R_ADDR(base, offset)	((void *)((u_int8_t *)((base)->addr) + offset))
77 #define	R_OFFSET(base, p)	((u_int8_t *)(p) - (u_int8_t *)(base)->addr)
78 
79 #define	DB_DEFAULT	0x000000	/* No flag was specified. */
80 
81 /* Structure used to print flag values. */
82 typedef struct __fn {
83 	u_int32_t mask;			/* Flag value. */
84 	const char *name;		/* Flag name. */
85 } FN;
86 
87 /* Set, clear and test flags. */
88 #define	F_SET(p, f)	(p)->flags |= (f)
89 #define	F_CLR(p, f)	(p)->flags &= ~(f)
90 #define	F_ISSET(p, f)	((p)->flags & (f))
91 #define	LF_SET(f)	(flags |= (f))
92 #define	LF_CLR(f)	(flags &= ~(f))
93 #define	LF_ISSET(f)	(flags & (f))
94 
95 /*
96  * Panic check:
97  * All interfaces check the panic flag, if it's set, the tree is dead.
98  */
99 #define	DB_PANIC_CHECK(dbp) {						\
100 	if ((dbp)->dbenv != NULL && (dbp)->dbenv->db_panic != 0)	\
101 		return (DB_RUNRECOVERY);				\
102 }
103 
104 /* Display separator string. */
105 #undef	DB_LINE
106 #define	DB_LINE "=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-="
107 
108 /* Unused, or not-used-yet variable.  "Shut that bloody compiler up!" */
109 #define	COMPQUIET(n, v)	(n) = (v)
110 
111 /*
112  * Purify and similar run-time tools complain about unitialized reads/writes
113  * for structure fields whose only purpose is padding.
114  */
115 #define	UMRW(v)		(v) = 0
116 
117 /*
118  * Win16 needs specific syntax on callback functions.  Nobody else cares.
119  */
120 #ifndef	DB_CALLBACK
121 #define	DB_CALLBACK	/* Nothing. */
122 #endif
123 
124 /*******************************************************
125  * Files.
126  *******************************************************/
127  /*
128   * We use 1024 as the maximum path length.  It's too hard to figure out what
129   * the real path length is, as it was traditionally stored in <sys/param.h>,
130   * and that file isn't always available.
131   */
132 #undef	MAXPATHLEN
133 #define	MAXPATHLEN	1024
134 
135 #define	PATH_DOT	"."	/* Current working directory. */
136 #define	PATH_SEPARATOR	"/"	/* Path separator character. */
137 
138 /*******************************************************
139  * Mutex support.
140  *******************************************************/
141 #include <sys/machlock.h>
142 typedef lock_t tsl_t;
143 
144 
145 /*
146  * !!!
147  * Various systems require different alignments for mutexes (the worst we've
148  * seen so far is 16-bytes on some HP architectures).  The mutex (tsl_t) must
149  * be first in the db_mutex_t structure, which must itself be first in the
150  * region.  This ensures the alignment is as returned by mmap(2), which should
151  * be sufficient.  All other mutex users must ensure proper alignment locally.
152  */
153 #define	MUTEX_ALIGNMENT	sizeof(int)
154 
155 /*
156  * The offset of a mutex in memory.
157  *
158  * !!!
159  * Not an off_t, so backing file offsets MUST be less than 4Gb.  See the
160  * off field of the db_mutex_t as well.
161  */
162 #define	MUTEX_LOCK_OFFSET(a, b)	((u_int32_t)((u_int8_t *)b - (u_int8_t *)a))
163 
164 typedef struct _db_mutex_t {
165 #ifdef HAVE_SPINLOCKS
166 	tsl_t	  tsl_resource;		/* Resource test and set. */
167 #ifdef DIAGNOSTIC
168 	u_int32_t pid;			/* Lock holder: 0 or process pid. */
169 #endif
170 #else
171 	u_int32_t off;			/* Backing file offset. */
172 	u_int32_t pid;			/* Lock holder: 0 or process pid. */
173 #endif
174 	u_int32_t spins;		/* Spins before block. */
175 	u_int32_t mutex_set_wait;	/* Granted after wait. */
176 	u_int32_t mutex_set_nowait;	/* Granted without waiting. */
177 } db_mutex_t;
178 
179 #include "mutex_ext.h"
180 
181 /*******************************************************
182  * Access methods.
183  *******************************************************/
184 /* Lock/unlock a DB thread. */
185 #define	DB_THREAD_LOCK(dbp)						\
186 	if (F_ISSET(dbp, DB_AM_THREAD))					\
187 	    (void)__db_mutex_lock((db_mutex_t *)(dbp)->mutexp, -1);
188 #define	DB_THREAD_UNLOCK(dbp)						\
189 	if (F_ISSET(dbp, DB_AM_THREAD))					\
190 	    (void)__db_mutex_unlock((db_mutex_t *)(dbp)->mutexp, -1);
191 
192 /*******************************************************
193  * Environment.
194  *******************************************************/
195 /* Type passed to __db_appname(). */
196 typedef enum {
197 	DB_APP_NONE=0,			/* No type (region). */
198 	DB_APP_DATA,			/* Data file. */
199 	DB_APP_LOG,			/* Log file. */
200 	DB_APP_TMP			/* Temporary file. */
201 } APPNAME;
202 
203 /*******************************************************
204  * Shared memory regions.
205  *******************************************************/
206 /*
207  * The shared memory regions share an initial structure so that the general
208  * region code can handle races between the region being deleted and other
209  * processes waiting on the region mutex.
210  *
211  * !!!
212  * Note, the mutex must be the first entry in the region; see comment above.
213  */
214 typedef struct _rlayout {
215 	db_mutex_t lock;		/* Region mutex. */
216 #define	DB_REGIONMAGIC	0x120897
217 	u_int32_t  valid;		/* Valid magic number. */
218 	u_int32_t  refcnt;		/* Region reference count. */
219 	size_t	   size;		/* Region length. */
220 	int	   majver;		/* Major version number. */
221 	int	   minver;		/* Minor version number. */
222 	int	   patch;		/* Patch version number. */
223 	int	   panic;		/* Region is dead. */
224 #define	INVALID_SEGID	-1
225 	int	   segid;		/* shmget(2) ID, or Win16 segment ID. */
226 
227 #define	REGION_ANONYMOUS	0x01	/* Region is/should be in anon mem. */
228 	u_int32_t  flags;
229 } RLAYOUT;
230 
231 /*
232  * DB creates all regions on 4K boundaries out of sheer paranoia, so that
233  * we don't make the underlying VM unhappy.
234  */
235 #define	DB_VMPAGESIZE	(4 * 1024)
236 #define	DB_ROUNDOFF(n, round) {						\
237 	(n) += (round) - 1;						\
238 	(n) -= (n) % (round);						\
239 }
240 
241 /*
242  * The interface to region attach is nasty, there is a lot of complex stuff
243  * going on, which has to be retained between create/attach and detach.  The
244  * REGINFO structure keeps track of it.
245  */
246 struct __db_reginfo;	typedef struct __db_reginfo REGINFO;
247 struct __db_reginfo {
248 					/* Arguments. */
249 	DB_ENV	   *dbenv;		/* Region naming info. */
250 	APPNAME	    appname;		/* Region naming info. */
251 	char	   *path;		/* Region naming info. */
252 	const char *file;		/* Region naming info. */
253 	int	    mode;		/* Region mode, if a file. */
254 	size_t	    size;		/* Region size. */
255 	u_int32_t   dbflags;		/* Region file open flags, if a file. */
256 
257 					/* Results. */
258 	char	   *name;		/* Region name. */
259 	void	   *addr;		/* Region address. */
260 	int	    fd;			/* Fcntl(2) locking file descriptor.
261 					   NB: this is only valid if a regular
262 					   file is backing the shared region,
263 					   and mmap(2) is being used to map it
264 					   into our address space. */
265 	int	    segid;		/* shmget(2) ID, or Win16 segment ID. */
266 	void	   *wnt_handle;		/* Win/NT HANDLE. */
267 
268 					/* Shared flags. */
269 /*				0x0001	COMMON MASK with RLAYOUT structure. */
270 #define	REGION_CANGROW		0x0002	/* Can grow. */
271 #define	REGION_CREATED		0x0004	/* Created. */
272 #define	REGION_HOLDINGSYS	0x0008	/* Holding system resources. */
273 #define	REGION_LASTDETACH	0x0010	/* Delete on last detach. */
274 #define	REGION_MALLOC		0x0020	/* Created in malloc'd memory. */
275 #define	REGION_PRIVATE		0x0040	/* Private to thread/process. */
276 #define	REGION_REMOVED		0x0080	/* Already deleted. */
277 #define	REGION_SIZEDEF		0x0100	/* Use default region size if exists. */
278 	u_int32_t   flags;
279 };
280 
281 /*******************************************************
282  * Mpool.
283  *******************************************************/
284 /*
285  * File types for DB access methods.  Negative numbers are reserved to DB.
286  */
287 #define	DB_FTYPE_BTREE		-1	/* Btree. */
288 #define	DB_FTYPE_HASH		-2	/* Hash. */
289 
290 /* Structure used as the DB pgin/pgout pgcookie. */
291 typedef struct __dbpginfo {
292 	size_t	db_pagesize;		/* Underlying page size. */
293 	int	needswap;		/* If swapping required. */
294 } DB_PGINFO;
295 
296 /*******************************************************
297  * Log.
298  *******************************************************/
299 /* Initialize an LSN to 'zero'. */
300 #define	ZERO_LSN(LSN) {							\
301 	(LSN).file = 0;							\
302 	(LSN).offset = 0;						\
303 }
304 
305 /* Return 1 if LSN is a 'zero' lsn, otherwise return 0. */
306 #define	IS_ZERO_LSN(LSN)	((LSN).file == 0)
307 
308 /* Test if we need to log a change. */
309 #define	DB_LOGGING(dbc)							\
310 	(F_ISSET((dbc)->dbp, DB_AM_LOGGING) && !F_ISSET(dbc, DBC_RECOVER))
311 
312 #ifdef DIAGNOSTIC
313 /*
314  * Debugging macro to log operations.
315  *	If DEBUG_WOP is defined, log operations that modify the database.
316  *	If DEBUG_ROP is defined, log operations that read the database.
317  *
318  * D dbp
319  * T txn
320  * O operation (string)
321  * K key
322  * A data
323  * F flags
324  */
325 #define	LOG_OP(C, T, O, K, A, F) {					\
326 	DB_LSN _lsn;							\
327 	DBT _op;							\
328 	if (DB_LOGGING((C))) {						\
329 		memset(&_op, 0, sizeof(_op));				\
330 		_op.data = O;						\
331 		_op.size = strlen(O) + 1;				\
332 		(void)__db_debug_log((C)->dbp->dbenv->lg_info,		\
333 		    T, &_lsn, 0, &_op, (C)->dbp->log_fileid, K, A, F);	\
334 	}								\
335 }
336 #ifdef DEBUG_ROP
337 #define	DEBUG_LREAD(C, T, O, K, A, F)	LOG_OP(C, T, O, K, A, F)
338 #else
339 #define	DEBUG_LREAD(C, T, O, K, A, F)
340 #endif
341 #ifdef DEBUG_WOP
342 #define	DEBUG_LWRITE(C, T, O, K, A, F)	LOG_OP(C, T, O, K, A, F)
343 #else
344 #define	DEBUG_LWRITE(C, T, O, K, A, F)
345 #endif
346 #else
347 #define	DEBUG_LREAD(C, T, O, K, A, F)
348 #define	DEBUG_LWRITE(C, T, O, K, A, F)
349 #endif /* DIAGNOSTIC */
350 
351 /*******************************************************
352  * Transactions and recovery.
353  *******************************************************/
354 /*
355  * Out of band value for a lock.  The locks are returned to callers as offsets
356  * into the lock regions.  Since the RLAYOUT structure begins all regions, an
357  * offset of 0 is guaranteed not to be a valid lock.
358  */
359 #define	LOCK_INVALID	0
360 
361 /* The structure allocated for every transaction. */
362 struct __db_txn {
363 	DB_TXNMGR	*mgrp;		/* Pointer to transaction manager. */
364 	DB_TXN		*parent;	/* Pointer to transaction's parent. */
365 	DB_LSN		last_lsn;	/* Lsn of last log write. */
366 	u_int32_t	txnid;		/* Unique transaction id. */
367 	size_t		off;		/* Detail structure within region. */
368 	TAILQ_ENTRY(__db_txn) links;	/* Links transactions off manager. */
369 	TAILQ_HEAD(__kids, __db_txn) kids; /* Child transactions. */
370 	TAILQ_ENTRY(__db_txn) klinks;	/* Links child transactions. */
371 
372 #define	TXN_MALLOC	0x01		/* Structure allocated by TXN system. */
373 	u_int32_t	flags;
374 };
375 
376 /*******************************************************
377  * Global variables.
378  *******************************************************/
379 /*
380  * !!!
381  * Initialized in os/os_config.c, don't change this unless you change it
382  * as well.
383  */
384 
385 struct __rmname {
386 	char *dbhome;
387 	int rmid;
388 	TAILQ_ENTRY(__rmname) links;
389 };
390 
391 typedef struct __db_globals {
392 	int db_mutexlocks;		/* DB_MUTEXLOCKS */
393 	int db_pageyield;		/* DB_PAGEYIELD */
394 	int db_region_anon;		/* DB_REGION_ANON, DB_REGION_NAME */
395 	int db_region_init;		/* DB_REGION_INIT */
396 	int db_tsl_spins;		/* DB_TSL_SPINS */
397 					/* XA: list of opened environments. */
398 	TAILQ_HEAD(__db_envq, __db_env) db_envq;
399 					/* XA: list of id to dbhome mappings. */
400 	TAILQ_HEAD(__db_nameq, __rmname) db_nameq;
401 } DB_GLOBALS;
402 
403 extern	DB_GLOBALS	__db_global_values;
404 #define	DB_GLOBAL(v)	__db_global_values.v
405 
406 #include "os.h"
407 #include "os_ext.h"
408 
409 #endif /* !_DB_INTERNAL_H_ */
410