xref: /titanic_51/usr/src/cmd/sendmail/db/mp/mp_region.c (revision 381a2a9a387f449fab7d0c7e97c4184c26963abf)
1 /*-
2  * See the file LICENSE for redistribution information.
3  *
4  * Copyright (c) 1996, 1997, 1998
5  *	Sleepycat Software.  All rights reserved.
6  */
7 #include "config.h"
8 
9 #ifndef lint
10 static const char sccsid[] = "@(#)mp_region.c	10.35 (Sleepycat) 12/11/98";
11 #endif /* not lint */
12 
13 #ifndef NO_SYSTEM_INCLUDES
14 #include <sys/types.h>
15 
16 #include <errno.h>
17 #include <string.h>
18 #endif
19 
20 #include "db_int.h"
21 #include "shqueue.h"
22 #include "db_shash.h"
23 #include "mp.h"
24 #include "common_ext.h"
25 
26 /*
27  * __memp_reg_alloc --
28  *	Allocate some space in the mpool region, with locking.
29  *
30  * PUBLIC: int __memp_reg_alloc __P((DB_MPOOL *, size_t, size_t *, void *));
31  */
32 int
33 __memp_reg_alloc(dbmp, len, offsetp, retp)
34 	DB_MPOOL *dbmp;
35 	size_t len, *offsetp;
36 	void *retp;
37 {
38 	int ret;
39 
40 	LOCKREGION(dbmp);
41 	ret = __memp_alloc(dbmp, len, offsetp, retp);
42 	UNLOCKREGION(dbmp);
43 	return (ret);
44 }
45 
46 /*
47  * __memp_alloc --
48  *	Allocate some space in the mpool region.
49  *
50  * PUBLIC: int __memp_alloc __P((DB_MPOOL *, size_t, size_t *, void *));
51  */
52 int
53 __memp_alloc(dbmp, len, offsetp, retp)
54 	DB_MPOOL *dbmp;
55 	size_t len, *offsetp;
56 	void *retp;
57 {
58 	BH *bhp, *nbhp;
59 	MPOOL *mp;
60 	MPOOLFILE *mfp;
61 	size_t fsize, total;
62 	int nomore, restart, ret, wrote;
63 	void *p;
64 
65 	mp = dbmp->mp;
66 
67 	nomore = 0;
68 alloc:	if ((ret = __db_shalloc(dbmp->addr, len, MUTEX_ALIGNMENT, &p)) == 0) {
69 		if (offsetp != NULL)
70 			*offsetp = R_OFFSET(dbmp, p);
71 		*(void **)retp = p;
72 		return (0);
73 	}
74 	if (nomore) {
75 		__db_err(dbmp->dbenv,
76 	    "Unable to allocate %lu bytes from mpool shared region: %s\n",
77 		    (u_long)len, strerror(ret));
78 		return (ret);
79 	}
80 
81 	/* Look for a buffer on the free list that's the right size. */
82 	for (bhp =
83 	    SH_TAILQ_FIRST(&mp->bhfq, __bh); bhp != NULL; bhp = nbhp) {
84 		nbhp = SH_TAILQ_NEXT(bhp, q, __bh);
85 
86 		if (__db_shsizeof(bhp) == len) {
87 			SH_TAILQ_REMOVE(&mp->bhfq, bhp, q, __bh);
88 			if (offsetp != NULL)
89 				*offsetp = R_OFFSET(dbmp, bhp);
90 			*(void **)retp = bhp;
91 			return (0);
92 		}
93 	}
94 
95 	/* Discard from the free list until we've freed enough memory. */
96 	total = 0;
97 	for (bhp =
98 	    SH_TAILQ_FIRST(&mp->bhfq, __bh); bhp != NULL; bhp = nbhp) {
99 		nbhp = SH_TAILQ_NEXT(bhp, q, __bh);
100 
101 		SH_TAILQ_REMOVE(&mp->bhfq, bhp, q, __bh);
102 		__db_shalloc_free(dbmp->addr, bhp);
103 		--mp->stat.st_page_clean;
104 
105 		/*
106 		 * Retry as soon as we've freed up sufficient space.  If we
107 		 * will have to coalesce memory to satisfy the request, don't
108 		 * try until it's likely (possible?) that we'll succeed.
109 		 */
110 		total += fsize = __db_shsizeof(bhp);
111 		if (fsize >= len || total >= 3 * len)
112 			goto alloc;
113 	}
114 
115 retry:	/* Find a buffer we can flush; pure LRU. */
116 	restart = total = 0;
117 	for (bhp =
118 	    SH_TAILQ_FIRST(&mp->bhq, __bh); bhp != NULL; bhp = nbhp) {
119 		nbhp = SH_TAILQ_NEXT(bhp, q, __bh);
120 
121 		/* Ignore pinned or locked (I/O in progress) buffers. */
122 		if (bhp->ref != 0 || F_ISSET(bhp, BH_LOCKED))
123 			continue;
124 
125 		/* Find the associated MPOOLFILE. */
126 		mfp = R_ADDR(dbmp, bhp->mf_offset);
127 
128 		/*
129 		 * Write the page if it's dirty.
130 		 *
131 		 * If we wrote the page, fall through and free the buffer.  We
132 		 * don't have to rewalk the list to acquire the buffer because
133 		 * it was never available for any other process to modify it.
134 		 * If we didn't write the page, but we discarded and reacquired
135 		 * the region lock, restart the buffer list walk.  If we neither
136 		 * wrote the buffer nor discarded the region lock, continue down
137 		 * the buffer list.
138 		 */
139 		if (F_ISSET(bhp, BH_DIRTY)) {
140 			++bhp->ref;
141 			if ((ret = __memp_bhwrite(dbmp,
142 			    mfp, bhp, &restart, &wrote)) != 0)
143 				return (ret);
144 			--bhp->ref;
145 
146 			/*
147 			 * It's possible that another process wants this buffer
148 			 * and incremented the ref count while we were writing
149 			 * it.
150 			 */
151 			if (bhp->ref != 0)
152 				goto retry;
153 
154 			if (wrote)
155 				++mp->stat.st_rw_evict;
156 			else {
157 				if (restart)
158 					goto retry;
159 				continue;
160 			}
161 		} else
162 			++mp->stat.st_ro_evict;
163 
164 		/*
165 		 * Check to see if the buffer is the size we're looking for.
166 		 * If it is, simply reuse it.
167 		 */
168 		total += fsize = __db_shsizeof(bhp);
169 		if (fsize == len) {
170 			__memp_bhfree(dbmp, mfp, bhp, 0);
171 
172 			if (offsetp != NULL)
173 				*offsetp = R_OFFSET(dbmp, bhp);
174 			*(void **)retp = bhp;
175 			return (0);
176 		}
177 
178 		/* Free the buffer. */
179 		__memp_bhfree(dbmp, mfp, bhp, 1);
180 
181 		/*
182 		 * Retry as soon as we've freed up sufficient space.  If we
183 		 * have to coalesce of memory to satisfy the request, don't
184 		 * try until it's likely (possible?) that we'll succeed.
185 		 */
186 		if (fsize >= len || total >= 3 * len)
187 			goto alloc;
188 
189 		/* Restart the walk if we discarded the region lock. */
190 		if (restart)
191 			goto retry;
192 	}
193 	nomore = 1;
194 	goto alloc;
195 }
196 
197 /*
198  * __memp_ropen --
199  *	Attach to, and optionally create, the mpool region.
200  *
201  * PUBLIC: int __memp_ropen
202  * PUBLIC:    __P((DB_MPOOL *, const char *, size_t, int, int, u_int32_t));
203  */
204 int
205 __memp_ropen(dbmp, path, cachesize, mode, is_private, flags)
206 	DB_MPOOL *dbmp;
207 	const char *path;
208 	size_t cachesize;
209 	int mode, is_private;
210 	u_int32_t flags;
211 {
212 	MPOOL *mp;
213 	size_t rlen;
214 	int defcache, ret;
215 
216 	/*
217 	 * Unlike other DB subsystems, mpool can't simply grow the region
218 	 * because it returns pointers into the region to its clients.  To
219 	 * "grow" the region, we'd have to allocate a new region and then
220 	 * store a region number in the structures that reference regional
221 	 * objects.  It's reasonable that we fail regardless, as clients
222 	 * shouldn't have every page in the region pinned, so the only
223 	 * "failure" mode should be a performance penalty because we don't
224 	 * find a page in the cache that we'd like to have found.
225 	 *
226 	 * Up the user's cachesize by 25% to account for our overhead.
227 	 */
228 	defcache = 0;
229 	if (cachesize < DB_CACHESIZE_MIN)
230 		if (cachesize == 0) {
231 			defcache = 1;
232 			cachesize = DB_CACHESIZE_DEF;
233 		} else
234 			cachesize = DB_CACHESIZE_MIN;
235 	rlen = cachesize + cachesize / 4;
236 
237 	/*
238 	 * Map in the region.
239 	 *
240 	 * If it's a private mpool, use malloc, it's a lot faster than
241 	 * instantiating a region.
242 	 */
243 	dbmp->reginfo.dbenv = dbmp->dbenv;
244 	dbmp->reginfo.appname = DB_APP_NONE;
245 	if (path == NULL)
246 		dbmp->reginfo.path = NULL;
247 	else
248 		if ((ret = __os_strdup(path, &dbmp->reginfo.path)) != 0)
249 			return (ret);
250 	dbmp->reginfo.file = DB_DEFAULT_MPOOL_FILE;
251 	dbmp->reginfo.mode = mode;
252 	dbmp->reginfo.size = rlen;
253 	dbmp->reginfo.dbflags = flags;
254 	dbmp->reginfo.flags = 0;
255 	if (defcache)
256 		F_SET(&dbmp->reginfo, REGION_SIZEDEF);
257 
258 	/*
259 	 * If we're creating a temporary region, don't use any standard
260 	 * naming.
261 	 */
262 	if (is_private) {
263 		dbmp->reginfo.appname = DB_APP_TMP;
264 		dbmp->reginfo.file = NULL;
265 		F_SET(&dbmp->reginfo, REGION_PRIVATE);
266 	}
267 
268 	if ((ret = __db_rattach(&dbmp->reginfo)) != 0) {
269 		if (dbmp->reginfo.path != NULL)
270 			__os_freestr(dbmp->reginfo.path);
271 		return (ret);
272 	}
273 
274 	/*
275 	 * The MPOOL structure is first in the region, the rest of the region
276 	 * is free space.
277 	 */
278 	dbmp->mp = dbmp->reginfo.addr;
279 	dbmp->addr = (u_int8_t *)dbmp->mp + sizeof(MPOOL);
280 
281 	/* Initialize a created region. */
282 	if (F_ISSET(&dbmp->reginfo, REGION_CREATED)) {
283 		mp = dbmp->mp;
284 		SH_TAILQ_INIT(&mp->bhq);
285 		SH_TAILQ_INIT(&mp->bhfq);
286 		SH_TAILQ_INIT(&mp->mpfq);
287 
288 		__db_shalloc_init(dbmp->addr, rlen - sizeof(MPOOL));
289 
290 		/*
291 		 * Assume we want to keep the hash chains with under 10 pages
292 		 * on each chain.  We don't know the pagesize in advance, and
293 		 * it may differ for different files.  Use a pagesize of 1K for
294 		 * the calculation -- we walk these chains a lot, they should
295 		 * be short.
296 		 */
297 		mp->htab_buckets =
298 		    __db_tablesize((cachesize / (1 * 1024)) / 10);
299 
300 		/* Allocate hash table space and initialize it. */
301 		if ((ret = __db_shalloc(dbmp->addr,
302 		    mp->htab_buckets * sizeof(DB_HASHTAB),
303 		    0, &dbmp->htab)) != 0)
304 			goto err;
305 		__db_hashinit(dbmp->htab, mp->htab_buckets);
306 		mp->htab = R_OFFSET(dbmp, dbmp->htab);
307 
308 		ZERO_LSN(mp->lsn);
309 		mp->lsn_cnt = 0;
310 
311 		memset(&mp->stat, 0, sizeof(mp->stat));
312 		mp->stat.st_cachesize = cachesize;
313 
314 		mp->flags = 0;
315 	}
316 
317 	/* Get the local hash table address. */
318 	dbmp->htab = R_ADDR(dbmp, dbmp->mp->htab);
319 
320 	UNLOCKREGION(dbmp);
321 	return (0);
322 
323 err:	UNLOCKREGION(dbmp);
324 	(void)__db_rdetach(&dbmp->reginfo);
325 	if (F_ISSET(&dbmp->reginfo, REGION_CREATED))
326 		(void)memp_unlink(path, 1, dbmp->dbenv);
327 
328 	if (dbmp->reginfo.path != NULL)
329 		__os_freestr(dbmp->reginfo.path);
330 	return (ret);
331 }
332