1 /*- 2 * See the file LICENSE for redistribution information. 3 * 4 * Copyright (c) 1996, 1997, 1998 5 * Sleepycat Software. All rights reserved. 6 */ 7 #include "config.h" 8 9 #ifndef lint 10 static const char sccsid[] = "@(#)mp_region.c 10.35 (Sleepycat) 12/11/98"; 11 #endif /* not lint */ 12 13 #ifndef NO_SYSTEM_INCLUDES 14 #include <sys/types.h> 15 16 #include <errno.h> 17 #include <string.h> 18 #endif 19 20 #include "db_int.h" 21 #include "shqueue.h" 22 #include "db_shash.h" 23 #include "mp.h" 24 #include "common_ext.h" 25 26 /* 27 * __memp_reg_alloc -- 28 * Allocate some space in the mpool region, with locking. 29 * 30 * PUBLIC: int __memp_reg_alloc __P((DB_MPOOL *, size_t, size_t *, void *)); 31 */ 32 int 33 __memp_reg_alloc(dbmp, len, offsetp, retp) 34 DB_MPOOL *dbmp; 35 size_t len, *offsetp; 36 void *retp; 37 { 38 int ret; 39 40 LOCKREGION(dbmp); 41 ret = __memp_alloc(dbmp, len, offsetp, retp); 42 UNLOCKREGION(dbmp); 43 return (ret); 44 } 45 46 /* 47 * __memp_alloc -- 48 * Allocate some space in the mpool region. 49 * 50 * PUBLIC: int __memp_alloc __P((DB_MPOOL *, size_t, size_t *, void *)); 51 */ 52 int 53 __memp_alloc(dbmp, len, offsetp, retp) 54 DB_MPOOL *dbmp; 55 size_t len, *offsetp; 56 void *retp; 57 { 58 BH *bhp, *nbhp; 59 MPOOL *mp; 60 MPOOLFILE *mfp; 61 size_t fsize, total; 62 int nomore, restart, ret, wrote; 63 void *p; 64 65 mp = dbmp->mp; 66 67 nomore = 0; 68 alloc: if ((ret = __db_shalloc(dbmp->addr, len, MUTEX_ALIGNMENT, &p)) == 0) { 69 if (offsetp != NULL) 70 *offsetp = R_OFFSET(dbmp, p); 71 *(void **)retp = p; 72 return (0); 73 } 74 if (nomore) { 75 __db_err(dbmp->dbenv, 76 "Unable to allocate %lu bytes from mpool shared region: %s\n", 77 (u_long)len, strerror(ret)); 78 return (ret); 79 } 80 81 /* Look for a buffer on the free list that's the right size. */ 82 for (bhp = 83 SH_TAILQ_FIRST(&mp->bhfq, __bh); bhp != NULL; bhp = nbhp) { 84 nbhp = SH_TAILQ_NEXT(bhp, q, __bh); 85 86 if (__db_shsizeof(bhp) == len) { 87 SH_TAILQ_REMOVE(&mp->bhfq, bhp, q, __bh); 88 if (offsetp != NULL) 89 *offsetp = R_OFFSET(dbmp, bhp); 90 *(void **)retp = bhp; 91 return (0); 92 } 93 } 94 95 /* Discard from the free list until we've freed enough memory. */ 96 total = 0; 97 for (bhp = 98 SH_TAILQ_FIRST(&mp->bhfq, __bh); bhp != NULL; bhp = nbhp) { 99 nbhp = SH_TAILQ_NEXT(bhp, q, __bh); 100 101 SH_TAILQ_REMOVE(&mp->bhfq, bhp, q, __bh); 102 __db_shalloc_free(dbmp->addr, bhp); 103 --mp->stat.st_page_clean; 104 105 /* 106 * Retry as soon as we've freed up sufficient space. If we 107 * will have to coalesce memory to satisfy the request, don't 108 * try until it's likely (possible?) that we'll succeed. 109 */ 110 total += fsize = __db_shsizeof(bhp); 111 if (fsize >= len || total >= 3 * len) 112 goto alloc; 113 } 114 115 retry: /* Find a buffer we can flush; pure LRU. */ 116 restart = total = 0; 117 for (bhp = 118 SH_TAILQ_FIRST(&mp->bhq, __bh); bhp != NULL; bhp = nbhp) { 119 nbhp = SH_TAILQ_NEXT(bhp, q, __bh); 120 121 /* Ignore pinned or locked (I/O in progress) buffers. */ 122 if (bhp->ref != 0 || F_ISSET(bhp, BH_LOCKED)) 123 continue; 124 125 /* Find the associated MPOOLFILE. */ 126 mfp = R_ADDR(dbmp, bhp->mf_offset); 127 128 /* 129 * Write the page if it's dirty. 130 * 131 * If we wrote the page, fall through and free the buffer. We 132 * don't have to rewalk the list to acquire the buffer because 133 * it was never available for any other process to modify it. 134 * If we didn't write the page, but we discarded and reacquired 135 * the region lock, restart the buffer list walk. If we neither 136 * wrote the buffer nor discarded the region lock, continue down 137 * the buffer list. 138 */ 139 if (F_ISSET(bhp, BH_DIRTY)) { 140 ++bhp->ref; 141 if ((ret = __memp_bhwrite(dbmp, 142 mfp, bhp, &restart, &wrote)) != 0) 143 return (ret); 144 --bhp->ref; 145 146 /* 147 * It's possible that another process wants this buffer 148 * and incremented the ref count while we were writing 149 * it. 150 */ 151 if (bhp->ref != 0) 152 goto retry; 153 154 if (wrote) 155 ++mp->stat.st_rw_evict; 156 else { 157 if (restart) 158 goto retry; 159 continue; 160 } 161 } else 162 ++mp->stat.st_ro_evict; 163 164 /* 165 * Check to see if the buffer is the size we're looking for. 166 * If it is, simply reuse it. 167 */ 168 total += fsize = __db_shsizeof(bhp); 169 if (fsize == len) { 170 __memp_bhfree(dbmp, mfp, bhp, 0); 171 172 if (offsetp != NULL) 173 *offsetp = R_OFFSET(dbmp, bhp); 174 *(void **)retp = bhp; 175 return (0); 176 } 177 178 /* Free the buffer. */ 179 __memp_bhfree(dbmp, mfp, bhp, 1); 180 181 /* 182 * Retry as soon as we've freed up sufficient space. If we 183 * have to coalesce of memory to satisfy the request, don't 184 * try until it's likely (possible?) that we'll succeed. 185 */ 186 if (fsize >= len || total >= 3 * len) 187 goto alloc; 188 189 /* Restart the walk if we discarded the region lock. */ 190 if (restart) 191 goto retry; 192 } 193 nomore = 1; 194 goto alloc; 195 } 196 197 /* 198 * __memp_ropen -- 199 * Attach to, and optionally create, the mpool region. 200 * 201 * PUBLIC: int __memp_ropen 202 * PUBLIC: __P((DB_MPOOL *, const char *, size_t, int, int, u_int32_t)); 203 */ 204 int 205 __memp_ropen(dbmp, path, cachesize, mode, is_private, flags) 206 DB_MPOOL *dbmp; 207 const char *path; 208 size_t cachesize; 209 int mode, is_private; 210 u_int32_t flags; 211 { 212 MPOOL *mp; 213 size_t rlen; 214 int defcache, ret; 215 216 /* 217 * Unlike other DB subsystems, mpool can't simply grow the region 218 * because it returns pointers into the region to its clients. To 219 * "grow" the region, we'd have to allocate a new region and then 220 * store a region number in the structures that reference regional 221 * objects. It's reasonable that we fail regardless, as clients 222 * shouldn't have every page in the region pinned, so the only 223 * "failure" mode should be a performance penalty because we don't 224 * find a page in the cache that we'd like to have found. 225 * 226 * Up the user's cachesize by 25% to account for our overhead. 227 */ 228 defcache = 0; 229 if (cachesize < DB_CACHESIZE_MIN) 230 if (cachesize == 0) { 231 defcache = 1; 232 cachesize = DB_CACHESIZE_DEF; 233 } else 234 cachesize = DB_CACHESIZE_MIN; 235 rlen = cachesize + cachesize / 4; 236 237 /* 238 * Map in the region. 239 * 240 * If it's a private mpool, use malloc, it's a lot faster than 241 * instantiating a region. 242 */ 243 dbmp->reginfo.dbenv = dbmp->dbenv; 244 dbmp->reginfo.appname = DB_APP_NONE; 245 if (path == NULL) 246 dbmp->reginfo.path = NULL; 247 else 248 if ((ret = __os_strdup(path, &dbmp->reginfo.path)) != 0) 249 return (ret); 250 dbmp->reginfo.file = DB_DEFAULT_MPOOL_FILE; 251 dbmp->reginfo.mode = mode; 252 dbmp->reginfo.size = rlen; 253 dbmp->reginfo.dbflags = flags; 254 dbmp->reginfo.flags = 0; 255 if (defcache) 256 F_SET(&dbmp->reginfo, REGION_SIZEDEF); 257 258 /* 259 * If we're creating a temporary region, don't use any standard 260 * naming. 261 */ 262 if (is_private) { 263 dbmp->reginfo.appname = DB_APP_TMP; 264 dbmp->reginfo.file = NULL; 265 F_SET(&dbmp->reginfo, REGION_PRIVATE); 266 } 267 268 if ((ret = __db_rattach(&dbmp->reginfo)) != 0) { 269 if (dbmp->reginfo.path != NULL) 270 __os_freestr(dbmp->reginfo.path); 271 return (ret); 272 } 273 274 /* 275 * The MPOOL structure is first in the region, the rest of the region 276 * is free space. 277 */ 278 dbmp->mp = dbmp->reginfo.addr; 279 dbmp->addr = (u_int8_t *)dbmp->mp + sizeof(MPOOL); 280 281 /* Initialize a created region. */ 282 if (F_ISSET(&dbmp->reginfo, REGION_CREATED)) { 283 mp = dbmp->mp; 284 SH_TAILQ_INIT(&mp->bhq); 285 SH_TAILQ_INIT(&mp->bhfq); 286 SH_TAILQ_INIT(&mp->mpfq); 287 288 __db_shalloc_init(dbmp->addr, rlen - sizeof(MPOOL)); 289 290 /* 291 * Assume we want to keep the hash chains with under 10 pages 292 * on each chain. We don't know the pagesize in advance, and 293 * it may differ for different files. Use a pagesize of 1K for 294 * the calculation -- we walk these chains a lot, they should 295 * be short. 296 */ 297 mp->htab_buckets = 298 __db_tablesize((cachesize / (1 * 1024)) / 10); 299 300 /* Allocate hash table space and initialize it. */ 301 if ((ret = __db_shalloc(dbmp->addr, 302 mp->htab_buckets * sizeof(DB_HASHTAB), 303 0, &dbmp->htab)) != 0) 304 goto err; 305 __db_hashinit(dbmp->htab, mp->htab_buckets); 306 mp->htab = R_OFFSET(dbmp, dbmp->htab); 307 308 ZERO_LSN(mp->lsn); 309 mp->lsn_cnt = 0; 310 311 memset(&mp->stat, 0, sizeof(mp->stat)); 312 mp->stat.st_cachesize = cachesize; 313 314 mp->flags = 0; 315 } 316 317 /* Get the local hash table address. */ 318 dbmp->htab = R_ADDR(dbmp, dbmp->mp->htab); 319 320 UNLOCKREGION(dbmp); 321 return (0); 322 323 err: UNLOCKREGION(dbmp); 324 (void)__db_rdetach(&dbmp->reginfo); 325 if (F_ISSET(&dbmp->reginfo, REGION_CREATED)) 326 (void)memp_unlink(path, 1, dbmp->dbenv); 327 328 if (dbmp->reginfo.path != NULL) 329 __os_freestr(dbmp->reginfo.path); 330 return (ret); 331 } 332