1 /*- 2 * See the file LICENSE for redistribution information. 3 * 4 * Copyright (c) 1996, 1997, 1998 5 * Sleepycat Software. All rights reserved. 6 * 7 * @(#)db_int.h 10.77 (Sleepycat) 1/3/99 8 */ 9 10 #ifndef _DB_INTERNAL_H_ 11 #define _DB_INTERNAL_H_ 12 13 #include "db.h" /* Standard DB include file. */ 14 #include "queue.h" 15 #include "shqueue.h" 16 17 /******************************************************* 18 * General purpose constants and macros. 19 *******************************************************/ 20 #define UINT16_T_MAX 0xffff /* Maximum 16 bit unsigned. */ 21 #define UINT32_T_MAX 0xffffffff /* Maximum 32 bit unsigned. */ 22 23 #define DB_MIN_PGSIZE 0x000200 /* Minimum page size. */ 24 #define DB_MAX_PGSIZE 0x010000 /* Maximum page size. */ 25 26 #define DB_MINCACHE 10 /* Minimum cached pages */ 27 28 #define MEGABYTE 1048576 29 30 /* 31 * If we are unable to determine the underlying filesystem block size, use 32 * 8K on the grounds that most OS's use less than 8K as their VM page size. 33 */ 34 #define DB_DEF_IOSIZE (8 * 1024) 35 36 /* 37 * Aligning items to particular sizes or in pages or memory. ALIGNP is a 38 * separate macro, as we've had to cast the pointer to different integral 39 * types on different architectures. 40 * 41 * We cast pointers into unsigned longs when manipulating them because C89 42 * guarantees that u_long is the largest available integral type and further, 43 * to never generate overflows. However, neither C89 or C9X requires that 44 * any integer type be large enough to hold a pointer, although C9X created 45 * the intptr_t type, which is guaranteed to hold a pointer but may or may 46 * not exist. At some point in the future, we should test for intptr_t and 47 * use it where available. 48 */ 49 #undef ALIGNTYPE 50 #define ALIGNTYPE u_long 51 #undef ALIGNP 52 #define ALIGNP(value, bound) ALIGN((ALIGNTYPE)value, bound) 53 #undef ALIGN 54 #define ALIGN(value, bound) (((value) + (bound) - 1) & ~((bound) - 1)) 55 56 /* 57 * There are several on-page structures that are declared to have a number of 58 * fields followed by a variable length array of items. The structure size 59 * without including the variable length array or the address of the first of 60 * those elements can be found using SSZ. 61 * 62 * This macro can also be used to find the offset of a structure element in a 63 * structure. This is used in various places to copy structure elements from 64 * unaligned memory references, e.g., pointers into a packed page. 65 * 66 * There are two versions because compilers object if you take the address of 67 * an array. 68 */ 69 #undef SSZ 70 #define SSZ(name, field) ((int)&(((name *)0)->field)) 71 72 #undef SSZA 73 #define SSZA(name, field) ((int)&(((name *)0)->field[0])) 74 75 /* Macros to return per-process address, offsets based on shared regions. */ 76 #define R_ADDR(base, offset) ((void *)((u_int8_t *)((base)->addr) + offset)) 77 #define R_OFFSET(base, p) ((u_int8_t *)(p) - (u_int8_t *)(base)->addr) 78 79 #define DB_DEFAULT 0x000000 /* No flag was specified. */ 80 81 /* Structure used to print flag values. */ 82 typedef struct __fn { 83 u_int32_t mask; /* Flag value. */ 84 const char *name; /* Flag name. */ 85 } FN; 86 87 /* Set, clear and test flags. */ 88 #define F_SET(p, f) (p)->flags |= (f) 89 #define F_CLR(p, f) (p)->flags &= ~(f) 90 #define F_ISSET(p, f) ((p)->flags & (f)) 91 #define LF_SET(f) (flags |= (f)) 92 #define LF_CLR(f) (flags &= ~(f)) 93 #define LF_ISSET(f) (flags & (f)) 94 95 /* 96 * Panic check: 97 * All interfaces check the panic flag, if it's set, the tree is dead. 98 */ 99 #define DB_PANIC_CHECK(dbp) { \ 100 if ((dbp)->dbenv != NULL && (dbp)->dbenv->db_panic != 0) \ 101 return (DB_RUNRECOVERY); \ 102 } 103 104 /* Display separator string. */ 105 #undef DB_LINE 106 #define DB_LINE "=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=" 107 108 /* Unused, or not-used-yet variable. "Shut that bloody compiler up!" */ 109 #define COMPQUIET(n, v) (n) = (v) 110 111 /* 112 * Purify and similar run-time tools complain about unitialized reads/writes 113 * for structure fields whose only purpose is padding. 114 */ 115 #define UMRW(v) (v) = 0 116 117 /* 118 * Win16 needs specific syntax on callback functions. Nobody else cares. 119 */ 120 #ifndef DB_CALLBACK 121 #define DB_CALLBACK /* Nothing. */ 122 #endif 123 124 /******************************************************* 125 * Files. 126 *******************************************************/ 127 /* 128 * We use 1024 as the maximum path length. It's too hard to figure out what 129 * the real path length is, as it was traditionally stored in <sys/param.h>, 130 * and that file isn't always available. 131 */ 132 #undef MAXPATHLEN 133 #define MAXPATHLEN 1024 134 135 #define PATH_DOT "." /* Current working directory. */ 136 #define PATH_SEPARATOR "/" /* Path separator character. */ 137 138 /******************************************************* 139 * Mutex support. 140 *******************************************************/ 141 #include <sys/machlock.h> 142 typedef lock_t tsl_t; 143 144 145 /* 146 * !!! 147 * Various systems require different alignments for mutexes (the worst we've 148 * seen so far is 16-bytes on some HP architectures). The mutex (tsl_t) must 149 * be first in the db_mutex_t structure, which must itself be first in the 150 * region. This ensures the alignment is as returned by mmap(2), which should 151 * be sufficient. All other mutex users must ensure proper alignment locally. 152 */ 153 #define MUTEX_ALIGNMENT sizeof(int) 154 155 /* 156 * The offset of a mutex in memory. 157 * 158 * !!! 159 * Not an off_t, so backing file offsets MUST be less than 4Gb. See the 160 * off field of the db_mutex_t as well. 161 */ 162 #define MUTEX_LOCK_OFFSET(a, b) ((u_int32_t)((u_int8_t *)b - (u_int8_t *)a)) 163 164 typedef struct _db_mutex_t { 165 #ifdef HAVE_SPINLOCKS 166 tsl_t tsl_resource; /* Resource test and set. */ 167 #ifdef DIAGNOSTIC 168 u_int32_t pid; /* Lock holder: 0 or process pid. */ 169 #endif 170 #else 171 u_int32_t off; /* Backing file offset. */ 172 u_int32_t pid; /* Lock holder: 0 or process pid. */ 173 #endif 174 u_int32_t spins; /* Spins before block. */ 175 u_int32_t mutex_set_wait; /* Granted after wait. */ 176 u_int32_t mutex_set_nowait; /* Granted without waiting. */ 177 } db_mutex_t; 178 179 #include "mutex_ext.h" 180 181 /******************************************************* 182 * Access methods. 183 *******************************************************/ 184 /* Lock/unlock a DB thread. */ 185 #define DB_THREAD_LOCK(dbp) \ 186 if (F_ISSET(dbp, DB_AM_THREAD)) \ 187 (void)__db_mutex_lock((db_mutex_t *)(dbp)->mutexp, -1); 188 #define DB_THREAD_UNLOCK(dbp) \ 189 if (F_ISSET(dbp, DB_AM_THREAD)) \ 190 (void)__db_mutex_unlock((db_mutex_t *)(dbp)->mutexp, -1); 191 192 /******************************************************* 193 * Environment. 194 *******************************************************/ 195 /* Type passed to __db_appname(). */ 196 typedef enum { 197 DB_APP_NONE=0, /* No type (region). */ 198 DB_APP_DATA, /* Data file. */ 199 DB_APP_LOG, /* Log file. */ 200 DB_APP_TMP /* Temporary file. */ 201 } APPNAME; 202 203 /******************************************************* 204 * Shared memory regions. 205 *******************************************************/ 206 /* 207 * The shared memory regions share an initial structure so that the general 208 * region code can handle races between the region being deleted and other 209 * processes waiting on the region mutex. 210 * 211 * !!! 212 * Note, the mutex must be the first entry in the region; see comment above. 213 */ 214 typedef struct _rlayout { 215 db_mutex_t lock; /* Region mutex. */ 216 #define DB_REGIONMAGIC 0x120897 217 u_int32_t valid; /* Valid magic number. */ 218 u_int32_t refcnt; /* Region reference count. */ 219 size_t size; /* Region length. */ 220 int majver; /* Major version number. */ 221 int minver; /* Minor version number. */ 222 int patch; /* Patch version number. */ 223 int panic; /* Region is dead. */ 224 #define INVALID_SEGID -1 225 int segid; /* shmget(2) ID, or Win16 segment ID. */ 226 227 #define REGION_ANONYMOUS 0x01 /* Region is/should be in anon mem. */ 228 u_int32_t flags; 229 } RLAYOUT; 230 231 /* 232 * DB creates all regions on 4K boundaries out of sheer paranoia, so that 233 * we don't make the underlying VM unhappy. 234 */ 235 #define DB_VMPAGESIZE (4 * 1024) 236 #define DB_ROUNDOFF(n, round) { \ 237 (n) += (round) - 1; \ 238 (n) -= (n) % (round); \ 239 } 240 241 /* 242 * The interface to region attach is nasty, there is a lot of complex stuff 243 * going on, which has to be retained between create/attach and detach. The 244 * REGINFO structure keeps track of it. 245 */ 246 struct __db_reginfo; typedef struct __db_reginfo REGINFO; 247 struct __db_reginfo { 248 /* Arguments. */ 249 DB_ENV *dbenv; /* Region naming info. */ 250 APPNAME appname; /* Region naming info. */ 251 char *path; /* Region naming info. */ 252 const char *file; /* Region naming info. */ 253 int mode; /* Region mode, if a file. */ 254 size_t size; /* Region size. */ 255 u_int32_t dbflags; /* Region file open flags, if a file. */ 256 257 /* Results. */ 258 char *name; /* Region name. */ 259 void *addr; /* Region address. */ 260 int fd; /* Fcntl(2) locking file descriptor. 261 NB: this is only valid if a regular 262 file is backing the shared region, 263 and mmap(2) is being used to map it 264 into our address space. */ 265 int segid; /* shmget(2) ID, or Win16 segment ID. */ 266 void *wnt_handle; /* Win/NT HANDLE. */ 267 268 /* Shared flags. */ 269 /* 0x0001 COMMON MASK with RLAYOUT structure. */ 270 #define REGION_CANGROW 0x0002 /* Can grow. */ 271 #define REGION_CREATED 0x0004 /* Created. */ 272 #define REGION_HOLDINGSYS 0x0008 /* Holding system resources. */ 273 #define REGION_LASTDETACH 0x0010 /* Delete on last detach. */ 274 #define REGION_MALLOC 0x0020 /* Created in malloc'd memory. */ 275 #define REGION_PRIVATE 0x0040 /* Private to thread/process. */ 276 #define REGION_REMOVED 0x0080 /* Already deleted. */ 277 #define REGION_SIZEDEF 0x0100 /* Use default region size if exists. */ 278 u_int32_t flags; 279 }; 280 281 /******************************************************* 282 * Mpool. 283 *******************************************************/ 284 /* 285 * File types for DB access methods. Negative numbers are reserved to DB. 286 */ 287 #define DB_FTYPE_BTREE -1 /* Btree. */ 288 #define DB_FTYPE_HASH -2 /* Hash. */ 289 290 /* Structure used as the DB pgin/pgout pgcookie. */ 291 typedef struct __dbpginfo { 292 size_t db_pagesize; /* Underlying page size. */ 293 int needswap; /* If swapping required. */ 294 } DB_PGINFO; 295 296 /******************************************************* 297 * Log. 298 *******************************************************/ 299 /* Initialize an LSN to 'zero'. */ 300 #define ZERO_LSN(LSN) { \ 301 (LSN).file = 0; \ 302 (LSN).offset = 0; \ 303 } 304 305 /* Return 1 if LSN is a 'zero' lsn, otherwise return 0. */ 306 #define IS_ZERO_LSN(LSN) ((LSN).file == 0) 307 308 /* Test if we need to log a change. */ 309 #define DB_LOGGING(dbc) \ 310 (F_ISSET((dbc)->dbp, DB_AM_LOGGING) && !F_ISSET(dbc, DBC_RECOVER)) 311 312 #ifdef DIAGNOSTIC 313 /* 314 * Debugging macro to log operations. 315 * If DEBUG_WOP is defined, log operations that modify the database. 316 * If DEBUG_ROP is defined, log operations that read the database. 317 * 318 * D dbp 319 * T txn 320 * O operation (string) 321 * K key 322 * A data 323 * F flags 324 */ 325 #define LOG_OP(C, T, O, K, A, F) { \ 326 DB_LSN _lsn; \ 327 DBT _op; \ 328 if (DB_LOGGING((C))) { \ 329 memset(&_op, 0, sizeof(_op)); \ 330 _op.data = O; \ 331 _op.size = strlen(O) + 1; \ 332 (void)__db_debug_log((C)->dbp->dbenv->lg_info, \ 333 T, &_lsn, 0, &_op, (C)->dbp->log_fileid, K, A, F); \ 334 } \ 335 } 336 #ifdef DEBUG_ROP 337 #define DEBUG_LREAD(C, T, O, K, A, F) LOG_OP(C, T, O, K, A, F) 338 #else 339 #define DEBUG_LREAD(C, T, O, K, A, F) 340 #endif 341 #ifdef DEBUG_WOP 342 #define DEBUG_LWRITE(C, T, O, K, A, F) LOG_OP(C, T, O, K, A, F) 343 #else 344 #define DEBUG_LWRITE(C, T, O, K, A, F) 345 #endif 346 #else 347 #define DEBUG_LREAD(C, T, O, K, A, F) 348 #define DEBUG_LWRITE(C, T, O, K, A, F) 349 #endif /* DIAGNOSTIC */ 350 351 /******************************************************* 352 * Transactions and recovery. 353 *******************************************************/ 354 /* 355 * Out of band value for a lock. The locks are returned to callers as offsets 356 * into the lock regions. Since the RLAYOUT structure begins all regions, an 357 * offset of 0 is guaranteed not to be a valid lock. 358 */ 359 #define LOCK_INVALID 0 360 361 /* The structure allocated for every transaction. */ 362 struct __db_txn { 363 DB_TXNMGR *mgrp; /* Pointer to transaction manager. */ 364 DB_TXN *parent; /* Pointer to transaction's parent. */ 365 DB_LSN last_lsn; /* Lsn of last log write. */ 366 u_int32_t txnid; /* Unique transaction id. */ 367 size_t off; /* Detail structure within region. */ 368 TAILQ_ENTRY(__db_txn) links; /* Links transactions off manager. */ 369 TAILQ_HEAD(__kids, __db_txn) kids; /* Child transactions. */ 370 TAILQ_ENTRY(__db_txn) klinks; /* Links child transactions. */ 371 372 #define TXN_MALLOC 0x01 /* Structure allocated by TXN system. */ 373 u_int32_t flags; 374 }; 375 376 /******************************************************* 377 * Global variables. 378 *******************************************************/ 379 /* 380 * !!! 381 * Initialized in os/os_config.c, don't change this unless you change it 382 * as well. 383 */ 384 385 struct __rmname { 386 char *dbhome; 387 int rmid; 388 TAILQ_ENTRY(__rmname) links; 389 }; 390 391 typedef struct __db_globals { 392 int db_mutexlocks; /* DB_MUTEXLOCKS */ 393 int db_pageyield; /* DB_PAGEYIELD */ 394 int db_region_anon; /* DB_REGION_ANON, DB_REGION_NAME */ 395 int db_region_init; /* DB_REGION_INIT */ 396 int db_tsl_spins; /* DB_TSL_SPINS */ 397 /* XA: list of opened environments. */ 398 TAILQ_HEAD(__db_envq, __db_env) db_envq; 399 /* XA: list of id to dbhome mappings. */ 400 TAILQ_HEAD(__db_nameq, __rmname) db_nameq; 401 } DB_GLOBALS; 402 403 extern DB_GLOBALS __db_global_values; 404 #define DB_GLOBAL(v) __db_global_values.v 405 406 #include "os.h" 407 #include "os_ext.h" 408 409 #endif /* !_DB_INTERNAL_H_ */ 410