1 /*- 2 * See the file LICENSE for redistribution information. 3 * 4 * Copyright (c) 1996, 1997, 1998 5 * Sleepycat Software. All rights reserved. 6 */ 7 8 #include "config.h" 9 10 #ifndef lint 11 static const char sccsid[] = "@(#)os_map.c 10.24 (Sleepycat) 10/12/98"; 12 #endif /* not lint */ 13 14 #ifndef NO_SYSTEM_INCLUDES 15 #include <sys/types.h> 16 #ifdef HAVE_MMAP 17 #include <sys/mman.h> 18 #endif 19 20 #ifdef HAVE_SHMGET 21 #include <sys/ipc.h> 22 #include <sys/shm.h> 23 #endif 24 25 #include <errno.h> 26 #include <string.h> 27 #endif 28 29 #include "db_int.h" 30 #include "os_jump.h" 31 #include "common_ext.h" 32 33 #ifdef HAVE_MMAP 34 static int __os_map __P((char *, int, size_t, int, int, int, void **)); 35 #endif 36 #ifdef HAVE_SHMGET 37 static int __os_shmget __P((REGINFO *)); 38 #endif 39 40 /* 41 * __db_mapanon_ok -- 42 * Return if this OS can support anonymous memory regions. 43 * 44 * PUBLIC: int __db_mapanon_ok __P((int)); 45 */ 46 int 47 __db_mapanon_ok(need_names) 48 int need_names; 49 { 50 int ret; 51 52 ret = EINVAL; 53 54 /* 55 * If we don't have spinlocks, we have to have a file descriptor 56 * for fcntl(2) locking, which implies using mmap(2) to map in a 57 * regular file. Theoretically, we could probably find ways to 58 * get a file descriptor to lock other types of shared regions, 59 * but I don't see any reason to do so. 60 * 61 * If need_names is set, the application wants to share anonymous 62 * memory among multiple processes, so we have to have a way to 63 * name it. This requires shmget(2), on UNIX systems. 64 */ 65 #ifdef HAVE_SPINLOCKS 66 #ifdef HAVE_SHMGET 67 ret = 0; 68 #endif 69 #ifdef HAVE_MMAP 70 #ifdef MAP_ANON 71 if (!need_names) 72 ret = 0; 73 #endif 74 #ifdef MAP_ANONYMOUS 75 if (!need_names) 76 ret = 0; 77 #endif 78 #else 79 COMPQUIET(need_names, 0); 80 #endif /* HAVE_MMAP */ 81 #endif /* HAVE_SPINLOCKS */ 82 83 return (ret); 84 } 85 86 /* 87 * __db_mapinit -- 88 * Return if shared regions need to be initialized. 89 * 90 * PUBLIC: int __db_mapinit __P((void)); 91 */ 92 int 93 __db_mapinit() 94 { 95 /* 96 * Historically, some systems required that all of the bytes of the 97 * region be written before it could be mmapped and accessed randomly. 98 * We have the option of setting REGION_INIT_NEEDED at configuration 99 * time if we're running on one of those systems. 100 */ 101 #ifdef REGION_INIT_NEEDED 102 return (1); 103 #else 104 return (0); 105 #endif 106 } 107 108 /* 109 * __db_mapregion -- 110 * Attach to a shared memory region. 111 * 112 * PUBLIC: int __db_mapregion __P((char *, REGINFO *)); 113 */ 114 int 115 __db_mapregion(path, infop) 116 char *path; 117 REGINFO *infop; 118 { 119 int called, ret; 120 121 called = 0; 122 ret = EINVAL; 123 124 /* If the user replaces the map call, call through their interface. */ 125 if (__db_jump.j_map != NULL) { 126 F_SET(infop, REGION_HOLDINGSYS); 127 return (__db_jump.j_map(path, infop->fd, infop->size, 128 1, F_ISSET(infop, REGION_ANONYMOUS), 0, &infop->addr)); 129 } 130 131 if (F_ISSET(infop, REGION_ANONYMOUS)) { 132 /* 133 * !!! 134 * If we're creating anonymous regions: 135 * 136 * If it's private, we use mmap(2). The problem with using 137 * shmget(2) is that we may be creating a region of which the 138 * application isn't aware, and if the application crashes 139 * we'll have no way to remove the system resources for the 140 * region. 141 * 142 * If it's not private, we use the shmget(2) interface if it's 143 * available, because it allows us to name anonymous memory. 144 * If shmget(2) isn't available, use the mmap(2) calls. 145 * 146 * In the case of anonymous memory, using mmap(2) means the 147 * memory isn't named and only the single process and its 148 * threads can access the region. 149 */ 150 #ifdef HAVE_MMAP 151 #ifdef MAP_ANON 152 #define HAVE_MMAP_ANONYMOUS 1 153 #else 154 #ifdef MAP_ANONYMOUS 155 #define HAVE_MMAP_ANONYMOUS 1 156 #endif 157 #endif 158 #endif 159 #ifdef HAVE_MMAP_ANONYMOUS 160 if (!called && F_ISSET(infop, REGION_PRIVATE)) { 161 called = 1; 162 ret = __os_map(path, 163 infop->fd, infop->size, 1, 1, 0, &infop->addr); 164 } 165 #endif 166 #ifdef HAVE_SHMGET 167 if (!called) { 168 called = 1; 169 ret = __os_shmget(infop); 170 } 171 #endif 172 #ifdef HAVE_MMAP 173 /* 174 * If we're trying to join an unnamed anonymous region, fail -- 175 * that's not possible. 176 */ 177 if (!called) { 178 called = 1; 179 180 if (!F_ISSET(infop, REGION_CREATED)) { 181 __db_err(infop->dbenv, 182 "cannot join region in unnamed anonymous memory"); 183 return (EINVAL); 184 } 185 186 ret = __os_map(path, 187 infop->fd, infop->size, 1, 1, 0, &infop->addr); 188 } 189 #endif 190 } else { 191 /* 192 * !!! 193 * If we're creating normal regions, we use the mmap(2) 194 * interface if it's available because it's POSIX 1003.1 195 * standard and we trust it more than we do shmget(2). 196 */ 197 #ifdef HAVE_MMAP 198 if (!called) { 199 called = 1; 200 201 /* Mmap(2) regions that aren't anonymous can grow. */ 202 F_SET(infop, REGION_CANGROW); 203 204 ret = __os_map(path, 205 infop->fd, infop->size, 1, 0, 0, &infop->addr); 206 } 207 #endif 208 #ifdef HAVE_SHMGET 209 if (!called) { 210 called = 1; 211 ret = __os_shmget(infop); 212 } 213 #endif 214 } 215 return (ret); 216 } 217 218 /* 219 * __db_unmapregion -- 220 * Detach from the shared memory region. 221 * 222 * PUBLIC: int __db_unmapregion __P((REGINFO *)); 223 */ 224 int 225 __db_unmapregion(infop) 226 REGINFO *infop; 227 { 228 int called, ret; 229 230 called = 0; 231 ret = EINVAL; 232 233 if (__db_jump.j_unmap != NULL) 234 return (__db_jump.j_unmap(infop->addr, infop->size)); 235 236 #ifdef HAVE_SHMGET 237 if (infop->segid != INVALID_SEGID) { 238 called = 1; 239 ret = shmdt(infop->addr) ? errno : 0; 240 } 241 #endif 242 #ifdef HAVE_MMAP 243 if (!called) { 244 called = 1; 245 ret = munmap(infop->addr, infop->size) ? errno : 0; 246 } 247 #endif 248 return (ret); 249 } 250 251 /* 252 * __db_unlinkregion -- 253 * Remove the shared memory region. 254 * 255 * PUBLIC: int __db_unlinkregion __P((char *, REGINFO *)); 256 */ 257 int 258 __db_unlinkregion(name, infop) 259 char *name; 260 REGINFO *infop; 261 { 262 int called, ret; 263 264 called = 0; 265 ret = EINVAL; 266 267 if (__db_jump.j_runlink != NULL) 268 return (__db_jump.j_runlink(name)); 269 270 #ifdef HAVE_SHMGET 271 if (infop->segid != INVALID_SEGID) { 272 called = 1; 273 ret = shmctl(infop->segid, IPC_RMID, NULL) ? errno : 0; 274 } 275 #endif 276 #ifdef HAVE_MMAP 277 COMPQUIET(infop, NULL); 278 if (!called) { 279 called = 1; 280 ret = 0; 281 } 282 #endif 283 return (ret); 284 } 285 286 /* 287 * __db_mapfile -- 288 * Map in a shared memory file. 289 * 290 * PUBLIC: int __db_mapfile __P((char *, int, size_t, int, void **)); 291 */ 292 int 293 __db_mapfile(path, fd, len, is_rdonly, addr) 294 char *path; 295 int fd, is_rdonly; 296 size_t len; 297 void **addr; 298 { 299 if (__db_jump.j_map != NULL) 300 return (__db_jump.j_map(path, fd, len, 0, 0, is_rdonly, addr)); 301 302 #ifdef HAVE_MMAP 303 return (__os_map(path, fd, len, 0, 0, is_rdonly, addr)); 304 #else 305 return (EINVAL); 306 #endif 307 } 308 309 /* 310 * __db_unmapfile -- 311 * Unmap the shared memory file. 312 * 313 * PUBLIC: int __db_unmapfile __P((void *, size_t)); 314 */ 315 int 316 __db_unmapfile(addr, len) 317 void *addr; 318 size_t len; 319 { 320 if (__db_jump.j_unmap != NULL) 321 return (__db_jump.j_unmap(addr, len)); 322 323 #ifdef HAVE_MMAP 324 return (munmap(addr, len) ? errno : 0); 325 #else 326 return (EINVAL); 327 #endif 328 } 329 330 #ifdef HAVE_MMAP 331 /* 332 * __os_map -- 333 * Call the mmap(2) function. 334 */ 335 static int 336 __os_map(path, fd, len, is_region, is_anonymous, is_rdonly, addr) 337 char *path; 338 int fd, is_region, is_anonymous, is_rdonly; 339 size_t len; 340 void **addr; 341 { 342 void *p; 343 int flags, prot; 344 345 COMPQUIET(path, NULL); 346 347 /* 348 * If it's read-only, it's private, and if it's not, it's shared. 349 * Don't bother with an additional parameter. 350 */ 351 flags = is_rdonly ? MAP_PRIVATE : MAP_SHARED; 352 353 if (is_region && is_anonymous) { 354 /* 355 * BSD derived systems use MAP_ANON; Digital Unix and HP/UX 356 * use MAP_ANONYMOUS. 357 */ 358 #ifdef MAP_ANON 359 flags |= MAP_ANON; 360 #endif 361 #ifdef MAP_ANONYMOUS 362 flags |= MAP_ANONYMOUS; 363 #endif 364 fd = -1; 365 } 366 #ifdef MAP_FILE 367 if (!is_region || !is_anonymous) { 368 /* 369 * Historically, MAP_FILE was required for mapping regular 370 * files, even though it was the default. Some systems have 371 * it, some don't, some that have it set it to 0. 372 */ 373 flags |= MAP_FILE; 374 } 375 #endif 376 377 /* 378 * I know of no systems that implement the flag to tell the system 379 * that the region contains semaphores, but it's not an unreasonable 380 * thing to do, and has been part of the design since forever. I 381 * don't think anyone will object, but don't set it for read-only 382 * files, it doesn't make sense. 383 */ 384 #ifdef MAP_HASSEMAPHORE 385 if (!is_rdonly) 386 flags |= MAP_HASSEMAPHORE; 387 #endif 388 389 prot = PROT_READ | (is_rdonly ? 0 : PROT_WRITE); 390 391 /* 392 * XXX 393 * Work around a bug in the VMS V7.1 mmap() implementation. To map a file 394 * into memory on VMS it needs to be opened in a certain way, originally. 395 * To get the file opened in that certain way, the VMS mmap() closes the 396 * file and re-opens it. When it does this, it doesn't flush any caches 397 * out to disk before closing. The problem this causes us is that when the 398 * memory cache doesn't get written out, the file isn't big enough to match 399 * the memory chunk and the mmap() call fails. This call to fsync() fixes 400 * the problem. DEC thinks this isn't a bug because of language in XPG5 401 * discussing user responsibility for on-disk and in-memory synchronization. 402 */ 403 #ifdef VMS 404 if (__os_fsync(fd) == -1) 405 return(errno); 406 #endif 407 408 /* MAP_FAILED was not defined in early mmap implementations. */ 409 #ifndef MAP_FAILED 410 #define MAP_FAILED -1 411 #endif 412 if ((p = 413 mmap(NULL, len, prot, flags, fd, (off_t)0)) == (void *)MAP_FAILED) 414 return (errno); 415 416 *addr = p; 417 return (0); 418 } 419 #endif 420 421 #ifdef HAVE_SHMGET 422 /* 423 * __os_shmget -- 424 * Call the shmget(2) family of functions. 425 */ 426 static int 427 __os_shmget(infop) 428 REGINFO *infop; 429 { 430 if (F_ISSET(infop, REGION_CREATED) && 431 (infop->segid = shmget(0, infop->size, IPC_PRIVATE | 0600)) == -1) 432 return (errno); 433 434 if ((infop->addr = shmat(infop->segid, NULL, 0)) == (void *)-1) { 435 /* 436 * If we're trying to join the region and failing, assume 437 * that there was a reboot and the region no longer exists. 438 */ 439 if (!F_ISSET(infop, REGION_CREATED)) 440 errno = EAGAIN; 441 return (errno); 442 } 443 444 F_SET(infop, REGION_HOLDINGSYS); 445 return (0); 446 } 447 #endif 448