1 /*-
2 * See the file LICENSE for redistribution information.
3 *
4 * Copyright (c) 1996, 1997, 1998
5 * Sleepycat Software. All rights reserved.
6 */
7 #include "config.h"
8
9 #ifndef lint
10 static const char sccsid[] = "@(#)mp_region.c 10.35 (Sleepycat) 12/11/98";
11 #endif /* not lint */
12
13 #ifndef NO_SYSTEM_INCLUDES
14 #include <sys/types.h>
15
16 #include <errno.h>
17 #include <string.h>
18 #endif
19
20 #include "db_int.h"
21 #include "shqueue.h"
22 #include "db_shash.h"
23 #include "mp.h"
24 #include "common_ext.h"
25
26 /*
27 * __memp_reg_alloc --
28 * Allocate some space in the mpool region, with locking.
29 *
30 * PUBLIC: int __memp_reg_alloc __P((DB_MPOOL *, size_t, size_t *, void *));
31 */
32 int
__memp_reg_alloc(dbmp,len,offsetp,retp)33 __memp_reg_alloc(dbmp, len, offsetp, retp)
34 DB_MPOOL *dbmp;
35 size_t len, *offsetp;
36 void *retp;
37 {
38 int ret;
39
40 LOCKREGION(dbmp);
41 ret = __memp_alloc(dbmp, len, offsetp, retp);
42 UNLOCKREGION(dbmp);
43 return (ret);
44 }
45
46 /*
47 * __memp_alloc --
48 * Allocate some space in the mpool region.
49 *
50 * PUBLIC: int __memp_alloc __P((DB_MPOOL *, size_t, size_t *, void *));
51 */
52 int
__memp_alloc(dbmp,len,offsetp,retp)53 __memp_alloc(dbmp, len, offsetp, retp)
54 DB_MPOOL *dbmp;
55 size_t len, *offsetp;
56 void *retp;
57 {
58 BH *bhp, *nbhp;
59 MPOOL *mp;
60 MPOOLFILE *mfp;
61 size_t fsize, total;
62 int nomore, restart, ret, wrote;
63 void *p;
64
65 mp = dbmp->mp;
66
67 nomore = 0;
68 alloc: if ((ret = __db_shalloc(dbmp->addr, len, MUTEX_ALIGNMENT, &p)) == 0) {
69 if (offsetp != NULL)
70 *offsetp = R_OFFSET(dbmp, p);
71 *(void **)retp = p;
72 return (0);
73 }
74 if (nomore) {
75 __db_err(dbmp->dbenv,
76 "Unable to allocate %lu bytes from mpool shared region: %s\n",
77 (u_long)len, strerror(ret));
78 return (ret);
79 }
80
81 /* Look for a buffer on the free list that's the right size. */
82 for (bhp =
83 SH_TAILQ_FIRST(&mp->bhfq, __bh); bhp != NULL; bhp = nbhp) {
84 nbhp = SH_TAILQ_NEXT(bhp, q, __bh);
85
86 if (__db_shsizeof(bhp) == len) {
87 SH_TAILQ_REMOVE(&mp->bhfq, bhp, q, __bh);
88 if (offsetp != NULL)
89 *offsetp = R_OFFSET(dbmp, bhp);
90 *(void **)retp = bhp;
91 return (0);
92 }
93 }
94
95 /* Discard from the free list until we've freed enough memory. */
96 total = 0;
97 for (bhp =
98 SH_TAILQ_FIRST(&mp->bhfq, __bh); bhp != NULL; bhp = nbhp) {
99 nbhp = SH_TAILQ_NEXT(bhp, q, __bh);
100
101 SH_TAILQ_REMOVE(&mp->bhfq, bhp, q, __bh);
102 __db_shalloc_free(dbmp->addr, bhp);
103 --mp->stat.st_page_clean;
104
105 /*
106 * Retry as soon as we've freed up sufficient space. If we
107 * will have to coalesce memory to satisfy the request, don't
108 * try until it's likely (possible?) that we'll succeed.
109 */
110 total += fsize = __db_shsizeof(bhp);
111 if (fsize >= len || total >= 3 * len)
112 goto alloc;
113 }
114
115 retry: /* Find a buffer we can flush; pure LRU. */
116 restart = total = 0;
117 for (bhp =
118 SH_TAILQ_FIRST(&mp->bhq, __bh); bhp != NULL; bhp = nbhp) {
119 nbhp = SH_TAILQ_NEXT(bhp, q, __bh);
120
121 /* Ignore pinned or locked (I/O in progress) buffers. */
122 if (bhp->ref != 0 || F_ISSET(bhp, BH_LOCKED))
123 continue;
124
125 /* Find the associated MPOOLFILE. */
126 mfp = R_ADDR(dbmp, bhp->mf_offset);
127
128 /*
129 * Write the page if it's dirty.
130 *
131 * If we wrote the page, fall through and free the buffer. We
132 * don't have to rewalk the list to acquire the buffer because
133 * it was never available for any other process to modify it.
134 * If we didn't write the page, but we discarded and reacquired
135 * the region lock, restart the buffer list walk. If we neither
136 * wrote the buffer nor discarded the region lock, continue down
137 * the buffer list.
138 */
139 if (F_ISSET(bhp, BH_DIRTY)) {
140 ++bhp->ref;
141 if ((ret = __memp_bhwrite(dbmp,
142 mfp, bhp, &restart, &wrote)) != 0)
143 return (ret);
144 --bhp->ref;
145
146 /*
147 * It's possible that another process wants this buffer
148 * and incremented the ref count while we were writing
149 * it.
150 */
151 if (bhp->ref != 0)
152 goto retry;
153
154 if (wrote)
155 ++mp->stat.st_rw_evict;
156 else {
157 if (restart)
158 goto retry;
159 continue;
160 }
161 } else
162 ++mp->stat.st_ro_evict;
163
164 /*
165 * Check to see if the buffer is the size we're looking for.
166 * If it is, simply reuse it.
167 */
168 total += fsize = __db_shsizeof(bhp);
169 if (fsize == len) {
170 __memp_bhfree(dbmp, mfp, bhp, 0);
171
172 if (offsetp != NULL)
173 *offsetp = R_OFFSET(dbmp, bhp);
174 *(void **)retp = bhp;
175 return (0);
176 }
177
178 /* Free the buffer. */
179 __memp_bhfree(dbmp, mfp, bhp, 1);
180
181 /*
182 * Retry as soon as we've freed up sufficient space. If we
183 * have to coalesce of memory to satisfy the request, don't
184 * try until it's likely (possible?) that we'll succeed.
185 */
186 if (fsize >= len || total >= 3 * len)
187 goto alloc;
188
189 /* Restart the walk if we discarded the region lock. */
190 if (restart)
191 goto retry;
192 }
193 nomore = 1;
194 goto alloc;
195 }
196
197 /*
198 * __memp_ropen --
199 * Attach to, and optionally create, the mpool region.
200 *
201 * PUBLIC: int __memp_ropen
202 * PUBLIC: __P((DB_MPOOL *, const char *, size_t, int, int, u_int32_t));
203 */
204 int
__memp_ropen(dbmp,path,cachesize,mode,is_private,flags)205 __memp_ropen(dbmp, path, cachesize, mode, is_private, flags)
206 DB_MPOOL *dbmp;
207 const char *path;
208 size_t cachesize;
209 int mode, is_private;
210 u_int32_t flags;
211 {
212 MPOOL *mp;
213 size_t rlen;
214 int defcache, ret;
215
216 /*
217 * Unlike other DB subsystems, mpool can't simply grow the region
218 * because it returns pointers into the region to its clients. To
219 * "grow" the region, we'd have to allocate a new region and then
220 * store a region number in the structures that reference regional
221 * objects. It's reasonable that we fail regardless, as clients
222 * shouldn't have every page in the region pinned, so the only
223 * "failure" mode should be a performance penalty because we don't
224 * find a page in the cache that we'd like to have found.
225 *
226 * Up the user's cachesize by 25% to account for our overhead.
227 */
228 defcache = 0;
229 if (cachesize < DB_CACHESIZE_MIN)
230 if (cachesize == 0) {
231 defcache = 1;
232 cachesize = DB_CACHESIZE_DEF;
233 } else
234 cachesize = DB_CACHESIZE_MIN;
235 rlen = cachesize + cachesize / 4;
236
237 /*
238 * Map in the region.
239 *
240 * If it's a private mpool, use malloc, it's a lot faster than
241 * instantiating a region.
242 */
243 dbmp->reginfo.dbenv = dbmp->dbenv;
244 dbmp->reginfo.appname = DB_APP_NONE;
245 if (path == NULL)
246 dbmp->reginfo.path = NULL;
247 else
248 if ((ret = __os_strdup(path, &dbmp->reginfo.path)) != 0)
249 return (ret);
250 dbmp->reginfo.file = DB_DEFAULT_MPOOL_FILE;
251 dbmp->reginfo.mode = mode;
252 dbmp->reginfo.size = rlen;
253 dbmp->reginfo.dbflags = flags;
254 dbmp->reginfo.flags = 0;
255 if (defcache)
256 F_SET(&dbmp->reginfo, REGION_SIZEDEF);
257
258 /*
259 * If we're creating a temporary region, don't use any standard
260 * naming.
261 */
262 if (is_private) {
263 dbmp->reginfo.appname = DB_APP_TMP;
264 dbmp->reginfo.file = NULL;
265 F_SET(&dbmp->reginfo, REGION_PRIVATE);
266 }
267
268 if ((ret = __db_rattach(&dbmp->reginfo)) != 0) {
269 if (dbmp->reginfo.path != NULL)
270 __os_freestr(dbmp->reginfo.path);
271 return (ret);
272 }
273
274 /*
275 * The MPOOL structure is first in the region, the rest of the region
276 * is free space.
277 */
278 dbmp->mp = dbmp->reginfo.addr;
279 dbmp->addr = (u_int8_t *)dbmp->mp + sizeof(MPOOL);
280
281 /* Initialize a created region. */
282 if (F_ISSET(&dbmp->reginfo, REGION_CREATED)) {
283 mp = dbmp->mp;
284 SH_TAILQ_INIT(&mp->bhq);
285 SH_TAILQ_INIT(&mp->bhfq);
286 SH_TAILQ_INIT(&mp->mpfq);
287
288 __db_shalloc_init(dbmp->addr, rlen - sizeof(MPOOL));
289
290 /*
291 * Assume we want to keep the hash chains with under 10 pages
292 * on each chain. We don't know the pagesize in advance, and
293 * it may differ for different files. Use a pagesize of 1K for
294 * the calculation -- we walk these chains a lot, they should
295 * be short.
296 */
297 mp->htab_buckets =
298 __db_tablesize((cachesize / (1 * 1024)) / 10);
299
300 /* Allocate hash table space and initialize it. */
301 if ((ret = __db_shalloc(dbmp->addr,
302 mp->htab_buckets * sizeof(DB_HASHTAB),
303 0, &dbmp->htab)) != 0)
304 goto err;
305 __db_hashinit(dbmp->htab, mp->htab_buckets);
306 mp->htab = R_OFFSET(dbmp, dbmp->htab);
307
308 ZERO_LSN(mp->lsn);
309 mp->lsn_cnt = 0;
310
311 memset(&mp->stat, 0, sizeof(mp->stat));
312 mp->stat.st_cachesize = cachesize;
313
314 mp->flags = 0;
315 }
316
317 /* Get the local hash table address. */
318 dbmp->htab = R_ADDR(dbmp, dbmp->mp->htab);
319
320 UNLOCKREGION(dbmp);
321 return (0);
322
323 err: UNLOCKREGION(dbmp);
324 (void)__db_rdetach(&dbmp->reginfo);
325 if (F_ISSET(&dbmp->reginfo, REGION_CREATED))
326 (void)memp_unlink(path, 1, dbmp->dbenv);
327
328 if (dbmp->reginfo.path != NULL)
329 __os_freestr(dbmp->reginfo.path);
330 return (ret);
331 }
332