1 /*-
2 * See the file LICENSE for redistribution information.
3 *
4 * Copyright (c) 1996, 1997, 1998
5 * Sleepycat Software. All rights reserved.
6 */
7
8 #include "config.h"
9
10 #ifndef lint
11 static const char sccsid[] = "@(#)os_map.c 10.24 (Sleepycat) 10/12/98";
12 #endif /* not lint */
13
14 #ifndef NO_SYSTEM_INCLUDES
15 #include <sys/types.h>
16 #ifdef HAVE_MMAP
17 #include <sys/mman.h>
18 #endif
19
20 #ifdef HAVE_SHMGET
21 #include <sys/ipc.h>
22 #include <sys/shm.h>
23 #endif
24
25 #include <errno.h>
26 #include <string.h>
27 #endif
28
29 #include "db_int.h"
30 #include "os_jump.h"
31 #include "common_ext.h"
32
33 #ifdef HAVE_MMAP
34 static int __os_map __P((char *, int, size_t, int, int, int, void **));
35 #endif
36 #ifdef HAVE_SHMGET
37 static int __os_shmget __P((REGINFO *));
38 #endif
39
40 /*
41 * __db_mapanon_ok --
42 * Return if this OS can support anonymous memory regions.
43 *
44 * PUBLIC: int __db_mapanon_ok __P((int));
45 */
46 int
__db_mapanon_ok(need_names)47 __db_mapanon_ok(need_names)
48 int need_names;
49 {
50 int ret;
51
52 ret = EINVAL;
53
54 /*
55 * If we don't have spinlocks, we have to have a file descriptor
56 * for fcntl(2) locking, which implies using mmap(2) to map in a
57 * regular file. Theoretically, we could probably find ways to
58 * get a file descriptor to lock other types of shared regions,
59 * but I don't see any reason to do so.
60 *
61 * If need_names is set, the application wants to share anonymous
62 * memory among multiple processes, so we have to have a way to
63 * name it. This requires shmget(2), on UNIX systems.
64 */
65 #ifdef HAVE_SPINLOCKS
66 #ifdef HAVE_SHMGET
67 ret = 0;
68 #endif
69 #ifdef HAVE_MMAP
70 #ifdef MAP_ANON
71 if (!need_names)
72 ret = 0;
73 #endif
74 #ifdef MAP_ANONYMOUS
75 if (!need_names)
76 ret = 0;
77 #endif
78 #else
79 COMPQUIET(need_names, 0);
80 #endif /* HAVE_MMAP */
81 #endif /* HAVE_SPINLOCKS */
82
83 return (ret);
84 }
85
86 /*
87 * __db_mapinit --
88 * Return if shared regions need to be initialized.
89 *
90 * PUBLIC: int __db_mapinit __P((void));
91 */
92 int
__db_mapinit()93 __db_mapinit()
94 {
95 /*
96 * Historically, some systems required that all of the bytes of the
97 * region be written before it could be mmapped and accessed randomly.
98 * We have the option of setting REGION_INIT_NEEDED at configuration
99 * time if we're running on one of those systems.
100 */
101 #ifdef REGION_INIT_NEEDED
102 return (1);
103 #else
104 return (0);
105 #endif
106 }
107
108 /*
109 * __db_mapregion --
110 * Attach to a shared memory region.
111 *
112 * PUBLIC: int __db_mapregion __P((char *, REGINFO *));
113 */
114 int
__db_mapregion(path,infop)115 __db_mapregion(path, infop)
116 char *path;
117 REGINFO *infop;
118 {
119 int called, ret;
120
121 called = 0;
122 ret = EINVAL;
123
124 /* If the user replaces the map call, call through their interface. */
125 if (__db_jump.j_map != NULL) {
126 F_SET(infop, REGION_HOLDINGSYS);
127 return (__db_jump.j_map(path, infop->fd, infop->size,
128 1, F_ISSET(infop, REGION_ANONYMOUS), 0, &infop->addr));
129 }
130
131 if (F_ISSET(infop, REGION_ANONYMOUS)) {
132 /*
133 * !!!
134 * If we're creating anonymous regions:
135 *
136 * If it's private, we use mmap(2). The problem with using
137 * shmget(2) is that we may be creating a region of which the
138 * application isn't aware, and if the application crashes
139 * we'll have no way to remove the system resources for the
140 * region.
141 *
142 * If it's not private, we use the shmget(2) interface if it's
143 * available, because it allows us to name anonymous memory.
144 * If shmget(2) isn't available, use the mmap(2) calls.
145 *
146 * In the case of anonymous memory, using mmap(2) means the
147 * memory isn't named and only the single process and its
148 * threads can access the region.
149 */
150 #ifdef HAVE_MMAP
151 #ifdef MAP_ANON
152 #define HAVE_MMAP_ANONYMOUS 1
153 #else
154 #ifdef MAP_ANONYMOUS
155 #define HAVE_MMAP_ANONYMOUS 1
156 #endif
157 #endif
158 #endif
159 #ifdef HAVE_MMAP_ANONYMOUS
160 if (!called && F_ISSET(infop, REGION_PRIVATE)) {
161 called = 1;
162 ret = __os_map(path,
163 infop->fd, infop->size, 1, 1, 0, &infop->addr);
164 }
165 #endif
166 #ifdef HAVE_SHMGET
167 if (!called) {
168 called = 1;
169 ret = __os_shmget(infop);
170 }
171 #endif
172 #ifdef HAVE_MMAP
173 /*
174 * If we're trying to join an unnamed anonymous region, fail --
175 * that's not possible.
176 */
177 if (!called) {
178 called = 1;
179
180 if (!F_ISSET(infop, REGION_CREATED)) {
181 __db_err(infop->dbenv,
182 "cannot join region in unnamed anonymous memory");
183 return (EINVAL);
184 }
185
186 ret = __os_map(path,
187 infop->fd, infop->size, 1, 1, 0, &infop->addr);
188 }
189 #endif
190 } else {
191 /*
192 * !!!
193 * If we're creating normal regions, we use the mmap(2)
194 * interface if it's available because it's POSIX 1003.1
195 * standard and we trust it more than we do shmget(2).
196 */
197 #ifdef HAVE_MMAP
198 if (!called) {
199 called = 1;
200
201 /* Mmap(2) regions that aren't anonymous can grow. */
202 F_SET(infop, REGION_CANGROW);
203
204 ret = __os_map(path,
205 infop->fd, infop->size, 1, 0, 0, &infop->addr);
206 }
207 #endif
208 #ifdef HAVE_SHMGET
209 if (!called) {
210 called = 1;
211 ret = __os_shmget(infop);
212 }
213 #endif
214 }
215 return (ret);
216 }
217
218 /*
219 * __db_unmapregion --
220 * Detach from the shared memory region.
221 *
222 * PUBLIC: int __db_unmapregion __P((REGINFO *));
223 */
224 int
__db_unmapregion(infop)225 __db_unmapregion(infop)
226 REGINFO *infop;
227 {
228 int called, ret;
229
230 called = 0;
231 ret = EINVAL;
232
233 if (__db_jump.j_unmap != NULL)
234 return (__db_jump.j_unmap(infop->addr, infop->size));
235
236 #ifdef HAVE_SHMGET
237 if (infop->segid != INVALID_SEGID) {
238 called = 1;
239 ret = shmdt(infop->addr) ? errno : 0;
240 }
241 #endif
242 #ifdef HAVE_MMAP
243 if (!called) {
244 called = 1;
245 ret = munmap(infop->addr, infop->size) ? errno : 0;
246 }
247 #endif
248 return (ret);
249 }
250
251 /*
252 * __db_unlinkregion --
253 * Remove the shared memory region.
254 *
255 * PUBLIC: int __db_unlinkregion __P((char *, REGINFO *));
256 */
257 int
__db_unlinkregion(name,infop)258 __db_unlinkregion(name, infop)
259 char *name;
260 REGINFO *infop;
261 {
262 int called, ret;
263
264 called = 0;
265 ret = EINVAL;
266
267 if (__db_jump.j_runlink != NULL)
268 return (__db_jump.j_runlink(name));
269
270 #ifdef HAVE_SHMGET
271 if (infop->segid != INVALID_SEGID) {
272 called = 1;
273 ret = shmctl(infop->segid, IPC_RMID, NULL) ? errno : 0;
274 }
275 #endif
276 #ifdef HAVE_MMAP
277 COMPQUIET(infop, NULL);
278 if (!called) {
279 called = 1;
280 ret = 0;
281 }
282 #endif
283 return (ret);
284 }
285
286 /*
287 * __db_mapfile --
288 * Map in a shared memory file.
289 *
290 * PUBLIC: int __db_mapfile __P((char *, int, size_t, int, void **));
291 */
292 int
__db_mapfile(path,fd,len,is_rdonly,addr)293 __db_mapfile(path, fd, len, is_rdonly, addr)
294 char *path;
295 int fd, is_rdonly;
296 size_t len;
297 void **addr;
298 {
299 if (__db_jump.j_map != NULL)
300 return (__db_jump.j_map(path, fd, len, 0, 0, is_rdonly, addr));
301
302 #ifdef HAVE_MMAP
303 return (__os_map(path, fd, len, 0, 0, is_rdonly, addr));
304 #else
305 return (EINVAL);
306 #endif
307 }
308
309 /*
310 * __db_unmapfile --
311 * Unmap the shared memory file.
312 *
313 * PUBLIC: int __db_unmapfile __P((void *, size_t));
314 */
315 int
__db_unmapfile(addr,len)316 __db_unmapfile(addr, len)
317 void *addr;
318 size_t len;
319 {
320 if (__db_jump.j_unmap != NULL)
321 return (__db_jump.j_unmap(addr, len));
322
323 #ifdef HAVE_MMAP
324 return (munmap(addr, len) ? errno : 0);
325 #else
326 return (EINVAL);
327 #endif
328 }
329
330 #ifdef HAVE_MMAP
331 /*
332 * __os_map --
333 * Call the mmap(2) function.
334 */
335 static int
__os_map(path,fd,len,is_region,is_anonymous,is_rdonly,addr)336 __os_map(path, fd, len, is_region, is_anonymous, is_rdonly, addr)
337 char *path;
338 int fd, is_region, is_anonymous, is_rdonly;
339 size_t len;
340 void **addr;
341 {
342 void *p;
343 int flags, prot;
344
345 COMPQUIET(path, NULL);
346
347 /*
348 * If it's read-only, it's private, and if it's not, it's shared.
349 * Don't bother with an additional parameter.
350 */
351 flags = is_rdonly ? MAP_PRIVATE : MAP_SHARED;
352
353 if (is_region && is_anonymous) {
354 /*
355 * BSD derived systems use MAP_ANON; Digital Unix and HP/UX
356 * use MAP_ANONYMOUS.
357 */
358 #ifdef MAP_ANON
359 flags |= MAP_ANON;
360 #endif
361 #ifdef MAP_ANONYMOUS
362 flags |= MAP_ANONYMOUS;
363 #endif
364 fd = -1;
365 }
366 #ifdef MAP_FILE
367 if (!is_region || !is_anonymous) {
368 /*
369 * Historically, MAP_FILE was required for mapping regular
370 * files, even though it was the default. Some systems have
371 * it, some don't, some that have it set it to 0.
372 */
373 flags |= MAP_FILE;
374 }
375 #endif
376
377 /*
378 * I know of no systems that implement the flag to tell the system
379 * that the region contains semaphores, but it's not an unreasonable
380 * thing to do, and has been part of the design since forever. I
381 * don't think anyone will object, but don't set it for read-only
382 * files, it doesn't make sense.
383 */
384 #ifdef MAP_HASSEMAPHORE
385 if (!is_rdonly)
386 flags |= MAP_HASSEMAPHORE;
387 #endif
388
389 prot = PROT_READ | (is_rdonly ? 0 : PROT_WRITE);
390
391 /*
392 * XXX
393 * Work around a bug in the VMS V7.1 mmap() implementation. To map a file
394 * into memory on VMS it needs to be opened in a certain way, originally.
395 * To get the file opened in that certain way, the VMS mmap() closes the
396 * file and re-opens it. When it does this, it doesn't flush any caches
397 * out to disk before closing. The problem this causes us is that when the
398 * memory cache doesn't get written out, the file isn't big enough to match
399 * the memory chunk and the mmap() call fails. This call to fsync() fixes
400 * the problem. DEC thinks this isn't a bug because of language in XPG5
401 * discussing user responsibility for on-disk and in-memory synchronization.
402 */
403 #ifdef VMS
404 if (__os_fsync(fd) == -1)
405 return(errno);
406 #endif
407
408 /* MAP_FAILED was not defined in early mmap implementations. */
409 #ifndef MAP_FAILED
410 #define MAP_FAILED -1
411 #endif
412 if ((p =
413 mmap(NULL, len, prot, flags, fd, (off_t)0)) == (void *)MAP_FAILED)
414 return (errno);
415
416 *addr = p;
417 return (0);
418 }
419 #endif
420
421 #ifdef HAVE_SHMGET
422 /*
423 * __os_shmget --
424 * Call the shmget(2) family of functions.
425 */
426 static int
__os_shmget(infop)427 __os_shmget(infop)
428 REGINFO *infop;
429 {
430 if (F_ISSET(infop, REGION_CREATED) &&
431 (infop->segid = shmget(0, infop->size, IPC_PRIVATE | 0600)) == -1)
432 return (errno);
433
434 if ((infop->addr = shmat(infop->segid, NULL, 0)) == (void *)-1) {
435 /*
436 * If we're trying to join the region and failing, assume
437 * that there was a reboot and the region no longer exists.
438 */
439 if (!F_ISSET(infop, REGION_CREATED))
440 errno = EAGAIN;
441 return (errno);
442 }
443
444 F_SET(infop, REGION_HOLDINGSYS);
445 return (0);
446 }
447 #endif
448