1 /*- 2 * SPDX-License-Identifier: BSD-4-Clause AND BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 1994 Adam Glass and Charles Hannum. All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. All advertising materials mentioning features or use of this software 15 * must display the following acknowledgement: 16 * This product includes software developed by Adam Glass and Charles 17 * Hannum. 18 * 4. The names of the authors may not be used to endorse or promote products 19 * derived from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR 22 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 23 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 24 * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, 25 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 26 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 30 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 * 32 * $NetBSD: sysv_shm.c,v 1.39 1997/10/07 10:02:03 drochner Exp $ 33 */ 34 /*- 35 * Copyright (c) 2003-2005 McAfee, Inc. 36 * Copyright (c) 2016-2017 Robert N. M. Watson 37 * All rights reserved. 38 * 39 * This software was developed for the FreeBSD Project in part by McAfee 40 * Research, the Security Research Division of McAfee, Inc under DARPA/SPAWAR 41 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS research 42 * program. 43 * 44 * Portions of this software were developed by BAE Systems, the University of 45 * Cambridge Computer Laboratory, and Memorial University under DARPA/AFRL 46 * contract FA8650-15-C-7558 ("CADETS"), as part of the DARPA Transparent 47 * Computing (TC) research program. 48 * 49 * Redistribution and use in source and binary forms, with or without 50 * modification, are permitted provided that the following conditions 51 * are met: 52 * 1. Redistributions of source code must retain the above copyright 53 * notice, this list of conditions and the following disclaimer. 54 * 2. Redistributions in binary form must reproduce the above copyright 55 * notice, this list of conditions and the following disclaimer in the 56 * documentation and/or other materials provided with the distribution. 57 * 58 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 59 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 60 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 61 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 62 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 63 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 64 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 65 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 66 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 67 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 68 * SUCH DAMAGE. 69 */ 70 71 #include <sys/cdefs.h> 72 __FBSDID("$FreeBSD$"); 73 74 #include "opt_sysvipc.h" 75 76 #include <sys/param.h> 77 #include <sys/systm.h> 78 #include <sys/kernel.h> 79 #include <sys/limits.h> 80 #include <sys/lock.h> 81 #include <sys/sysctl.h> 82 #include <sys/shm.h> 83 #include <sys/proc.h> 84 #include <sys/malloc.h> 85 #include <sys/mman.h> 86 #include <sys/module.h> 87 #include <sys/mutex.h> 88 #include <sys/racct.h> 89 #include <sys/resourcevar.h> 90 #include <sys/rwlock.h> 91 #include <sys/stat.h> 92 #include <sys/syscall.h> 93 #include <sys/syscallsubr.h> 94 #include <sys/sysent.h> 95 #include <sys/sysproto.h> 96 #include <sys/jail.h> 97 98 #include <security/audit/audit.h> 99 #include <security/mac/mac_framework.h> 100 101 #include <vm/vm.h> 102 #include <vm/vm_param.h> 103 #include <vm/pmap.h> 104 #include <vm/vm_object.h> 105 #include <vm/vm_map.h> 106 #include <vm/vm_page.h> 107 #include <vm/vm_pager.h> 108 109 FEATURE(sysv_shm, "System V shared memory segments support"); 110 111 static MALLOC_DEFINE(M_SHM, "shm", "SVID compatible shared memory segments"); 112 113 static int shmget_allocate_segment(struct thread *td, 114 struct shmget_args *uap, int mode); 115 static int shmget_existing(struct thread *td, struct shmget_args *uap, 116 int mode, int segnum); 117 118 #define SHMSEG_FREE 0x0200 119 #define SHMSEG_REMOVED 0x0400 120 #define SHMSEG_ALLOCATED 0x0800 121 122 static int shm_last_free, shm_nused, shmalloced; 123 vm_size_t shm_committed; 124 static struct shmid_kernel *shmsegs; 125 static unsigned shm_prison_slot; 126 127 struct shmmap_state { 128 vm_offset_t va; 129 int shmid; 130 }; 131 132 static void shm_deallocate_segment(struct shmid_kernel *); 133 static int shm_find_segment_by_key(struct prison *, key_t); 134 static struct shmid_kernel *shm_find_segment(struct prison *, int, bool); 135 static int shm_delete_mapping(struct vmspace *vm, struct shmmap_state *); 136 static void shmrealloc(void); 137 static int shminit(void); 138 static int sysvshm_modload(struct module *, int, void *); 139 static int shmunload(void); 140 static void shmexit_myhook(struct vmspace *vm); 141 static void shmfork_myhook(struct proc *p1, struct proc *p2); 142 static int sysctl_shmsegs(SYSCTL_HANDLER_ARGS); 143 static void shm_remove(struct shmid_kernel *, int); 144 static struct prison *shm_find_prison(struct ucred *); 145 static int shm_prison_cansee(struct prison *, struct shmid_kernel *); 146 static int shm_prison_check(void *, void *); 147 static int shm_prison_set(void *, void *); 148 static int shm_prison_get(void *, void *); 149 static int shm_prison_remove(void *, void *); 150 static void shm_prison_cleanup(struct prison *); 151 152 /* 153 * Tuneable values. 154 */ 155 #ifndef SHMMAXPGS 156 #define SHMMAXPGS 131072 /* Note: sysv shared memory is swap backed. */ 157 #endif 158 #ifndef SHMMAX 159 #define SHMMAX (SHMMAXPGS*PAGE_SIZE) 160 #endif 161 #ifndef SHMMIN 162 #define SHMMIN 1 163 #endif 164 #ifndef SHMMNI 165 #define SHMMNI 192 166 #endif 167 #ifndef SHMSEG 168 #define SHMSEG 128 169 #endif 170 #ifndef SHMALL 171 #define SHMALL (SHMMAXPGS) 172 #endif 173 174 struct shminfo shminfo = { 175 .shmmax = SHMMAX, 176 .shmmin = SHMMIN, 177 .shmmni = SHMMNI, 178 .shmseg = SHMSEG, 179 .shmall = SHMALL 180 }; 181 182 static int shm_use_phys; 183 static int shm_allow_removed = 1; 184 185 SYSCTL_ULONG(_kern_ipc, OID_AUTO, shmmax, CTLFLAG_RWTUN, &shminfo.shmmax, 0, 186 "Maximum shared memory segment size"); 187 SYSCTL_ULONG(_kern_ipc, OID_AUTO, shmmin, CTLFLAG_RWTUN, &shminfo.shmmin, 0, 188 "Minimum shared memory segment size"); 189 SYSCTL_ULONG(_kern_ipc, OID_AUTO, shmmni, CTLFLAG_RDTUN, &shminfo.shmmni, 0, 190 "Number of shared memory identifiers"); 191 SYSCTL_ULONG(_kern_ipc, OID_AUTO, shmseg, CTLFLAG_RDTUN, &shminfo.shmseg, 0, 192 "Number of segments per process"); 193 SYSCTL_ULONG(_kern_ipc, OID_AUTO, shmall, CTLFLAG_RWTUN, &shminfo.shmall, 0, 194 "Maximum number of pages available for shared memory"); 195 SYSCTL_INT(_kern_ipc, OID_AUTO, shm_use_phys, CTLFLAG_RWTUN, 196 &shm_use_phys, 0, "Enable/Disable locking of shared memory pages in core"); 197 SYSCTL_INT(_kern_ipc, OID_AUTO, shm_allow_removed, CTLFLAG_RWTUN, 198 &shm_allow_removed, 0, 199 "Enable/Disable attachment to attached segments marked for removal"); 200 SYSCTL_PROC(_kern_ipc, OID_AUTO, shmsegs, CTLTYPE_OPAQUE | CTLFLAG_RD | 201 CTLFLAG_MPSAFE, NULL, 0, sysctl_shmsegs, "", 202 "Array of struct shmid_kernel for each potential shared memory segment"); 203 204 static struct sx sysvshmsx; 205 #define SYSVSHM_LOCK() sx_xlock(&sysvshmsx) 206 #define SYSVSHM_UNLOCK() sx_xunlock(&sysvshmsx) 207 #define SYSVSHM_ASSERT_LOCKED() sx_assert(&sysvshmsx, SA_XLOCKED) 208 209 static int 210 shm_find_segment_by_key(struct prison *pr, key_t key) 211 { 212 int i; 213 214 for (i = 0; i < shmalloced; i++) 215 if ((shmsegs[i].u.shm_perm.mode & SHMSEG_ALLOCATED) && 216 shmsegs[i].cred != NULL && 217 shmsegs[i].cred->cr_prison == pr && 218 shmsegs[i].u.shm_perm.key == key) 219 return (i); 220 return (-1); 221 } 222 223 /* 224 * Finds segment either by shmid if is_shmid is true, or by segnum if 225 * is_shmid is false. 226 */ 227 static struct shmid_kernel * 228 shm_find_segment(struct prison *rpr, int arg, bool is_shmid) 229 { 230 struct shmid_kernel *shmseg; 231 int segnum; 232 233 segnum = is_shmid ? IPCID_TO_IX(arg) : arg; 234 if (segnum < 0 || segnum >= shmalloced) 235 return (NULL); 236 shmseg = &shmsegs[segnum]; 237 if ((shmseg->u.shm_perm.mode & SHMSEG_ALLOCATED) == 0 || 238 (!shm_allow_removed && 239 (shmseg->u.shm_perm.mode & SHMSEG_REMOVED) != 0) || 240 (is_shmid && shmseg->u.shm_perm.seq != IPCID_TO_SEQ(arg)) || 241 shm_prison_cansee(rpr, shmseg) != 0) 242 return (NULL); 243 return (shmseg); 244 } 245 246 static void 247 shm_deallocate_segment(struct shmid_kernel *shmseg) 248 { 249 vm_size_t size; 250 251 SYSVSHM_ASSERT_LOCKED(); 252 253 vm_object_deallocate(shmseg->object); 254 shmseg->object = NULL; 255 size = round_page(shmseg->u.shm_segsz); 256 shm_committed -= btoc(size); 257 shm_nused--; 258 shmseg->u.shm_perm.mode = SHMSEG_FREE; 259 #ifdef MAC 260 mac_sysvshm_cleanup(shmseg); 261 #endif 262 racct_sub_cred(shmseg->cred, RACCT_NSHM, 1); 263 racct_sub_cred(shmseg->cred, RACCT_SHMSIZE, size); 264 crfree(shmseg->cred); 265 shmseg->cred = NULL; 266 } 267 268 static int 269 shm_delete_mapping(struct vmspace *vm, struct shmmap_state *shmmap_s) 270 { 271 struct shmid_kernel *shmseg; 272 int segnum, result; 273 vm_size_t size; 274 275 SYSVSHM_ASSERT_LOCKED(); 276 segnum = IPCID_TO_IX(shmmap_s->shmid); 277 KASSERT(segnum >= 0 && segnum < shmalloced, 278 ("segnum %d shmalloced %d", segnum, shmalloced)); 279 280 shmseg = &shmsegs[segnum]; 281 size = round_page(shmseg->u.shm_segsz); 282 result = vm_map_remove(&vm->vm_map, shmmap_s->va, shmmap_s->va + size); 283 if (result != KERN_SUCCESS) 284 return (EINVAL); 285 shmmap_s->shmid = -1; 286 shmseg->u.shm_dtime = time_second; 287 if (--shmseg->u.shm_nattch == 0 && 288 (shmseg->u.shm_perm.mode & SHMSEG_REMOVED)) { 289 shm_deallocate_segment(shmseg); 290 shm_last_free = segnum; 291 } 292 return (0); 293 } 294 295 static void 296 shm_remove(struct shmid_kernel *shmseg, int segnum) 297 { 298 299 shmseg->u.shm_perm.key = IPC_PRIVATE; 300 shmseg->u.shm_perm.mode |= SHMSEG_REMOVED; 301 if (shmseg->u.shm_nattch == 0) { 302 shm_deallocate_segment(shmseg); 303 shm_last_free = segnum; 304 } 305 } 306 307 static struct prison * 308 shm_find_prison(struct ucred *cred) 309 { 310 struct prison *pr, *rpr; 311 312 pr = cred->cr_prison; 313 prison_lock(pr); 314 rpr = osd_jail_get(pr, shm_prison_slot); 315 prison_unlock(pr); 316 return rpr; 317 } 318 319 static int 320 shm_prison_cansee(struct prison *rpr, struct shmid_kernel *shmseg) 321 { 322 323 if (shmseg->cred == NULL || 324 !(rpr == shmseg->cred->cr_prison || 325 prison_ischild(rpr, shmseg->cred->cr_prison))) 326 return (EINVAL); 327 return (0); 328 } 329 330 static int 331 kern_shmdt_locked(struct thread *td, const void *shmaddr) 332 { 333 struct proc *p = td->td_proc; 334 struct shmmap_state *shmmap_s; 335 #ifdef MAC 336 int error; 337 #endif 338 int i; 339 340 SYSVSHM_ASSERT_LOCKED(); 341 if (shm_find_prison(td->td_ucred) == NULL) 342 return (ENOSYS); 343 shmmap_s = p->p_vmspace->vm_shm; 344 if (shmmap_s == NULL) 345 return (EINVAL); 346 AUDIT_ARG_SVIPC_ID(shmmap_s->shmid); 347 for (i = 0; i < shminfo.shmseg; i++, shmmap_s++) { 348 if (shmmap_s->shmid != -1 && 349 shmmap_s->va == (vm_offset_t)shmaddr) { 350 break; 351 } 352 } 353 if (i == shminfo.shmseg) 354 return (EINVAL); 355 #ifdef MAC 356 error = mac_sysvshm_check_shmdt(td->td_ucred, 357 &shmsegs[IPCID_TO_IX(shmmap_s->shmid)]); 358 if (error != 0) 359 return (error); 360 #endif 361 return (shm_delete_mapping(p->p_vmspace, shmmap_s)); 362 } 363 364 #ifndef _SYS_SYSPROTO_H_ 365 struct shmdt_args { 366 const void *shmaddr; 367 }; 368 #endif 369 int 370 sys_shmdt(struct thread *td, struct shmdt_args *uap) 371 { 372 int error; 373 374 SYSVSHM_LOCK(); 375 error = kern_shmdt_locked(td, uap->shmaddr); 376 SYSVSHM_UNLOCK(); 377 return (error); 378 } 379 380 static int 381 kern_shmat_locked(struct thread *td, int shmid, const void *shmaddr, 382 int shmflg) 383 { 384 struct prison *rpr; 385 struct proc *p = td->td_proc; 386 struct shmid_kernel *shmseg; 387 struct shmmap_state *shmmap_s; 388 vm_offset_t attach_va; 389 vm_prot_t prot; 390 vm_size_t size; 391 int error, i, rv; 392 393 AUDIT_ARG_SVIPC_ID(shmid); 394 AUDIT_ARG_VALUE(shmflg); 395 396 SYSVSHM_ASSERT_LOCKED(); 397 rpr = shm_find_prison(td->td_ucred); 398 if (rpr == NULL) 399 return (ENOSYS); 400 shmmap_s = p->p_vmspace->vm_shm; 401 if (shmmap_s == NULL) { 402 shmmap_s = malloc(shminfo.shmseg * sizeof(struct shmmap_state), 403 M_SHM, M_WAITOK); 404 for (i = 0; i < shminfo.shmseg; i++) 405 shmmap_s[i].shmid = -1; 406 KASSERT(p->p_vmspace->vm_shm == NULL, ("raced")); 407 p->p_vmspace->vm_shm = shmmap_s; 408 } 409 shmseg = shm_find_segment(rpr, shmid, true); 410 if (shmseg == NULL) 411 return (EINVAL); 412 error = ipcperm(td, &shmseg->u.shm_perm, 413 (shmflg & SHM_RDONLY) ? IPC_R : IPC_R|IPC_W); 414 if (error != 0) 415 return (error); 416 #ifdef MAC 417 error = mac_sysvshm_check_shmat(td->td_ucred, shmseg, shmflg); 418 if (error != 0) 419 return (error); 420 #endif 421 for (i = 0; i < shminfo.shmseg; i++) { 422 if (shmmap_s->shmid == -1) 423 break; 424 shmmap_s++; 425 } 426 if (i >= shminfo.shmseg) 427 return (EMFILE); 428 size = round_page(shmseg->u.shm_segsz); 429 prot = VM_PROT_READ; 430 if ((shmflg & SHM_RDONLY) == 0) 431 prot |= VM_PROT_WRITE; 432 if (shmaddr != NULL) { 433 if ((shmflg & SHM_RND) != 0) 434 attach_va = rounddown2((vm_offset_t)shmaddr, SHMLBA); 435 else if (((vm_offset_t)shmaddr & (SHMLBA-1)) == 0) 436 attach_va = (vm_offset_t)shmaddr; 437 else 438 return (EINVAL); 439 } else { 440 /* 441 * This is just a hint to vm_map_find() about where to 442 * put it. 443 */ 444 attach_va = round_page((vm_offset_t)p->p_vmspace->vm_daddr + 445 lim_max(td, RLIMIT_DATA)); 446 } 447 448 vm_object_reference(shmseg->object); 449 rv = vm_map_find(&p->p_vmspace->vm_map, shmseg->object, 0, &attach_va, 450 size, 0, shmaddr != NULL ? VMFS_NO_SPACE : VMFS_OPTIMAL_SPACE, 451 prot, prot, MAP_INHERIT_SHARE | MAP_PREFAULT_PARTIAL); 452 if (rv != KERN_SUCCESS) { 453 vm_object_deallocate(shmseg->object); 454 return (ENOMEM); 455 } 456 457 shmmap_s->va = attach_va; 458 shmmap_s->shmid = shmid; 459 shmseg->u.shm_lpid = p->p_pid; 460 shmseg->u.shm_atime = time_second; 461 shmseg->u.shm_nattch++; 462 td->td_retval[0] = attach_va; 463 return (error); 464 } 465 466 int 467 kern_shmat(struct thread *td, int shmid, const void *shmaddr, int shmflg) 468 { 469 int error; 470 471 SYSVSHM_LOCK(); 472 error = kern_shmat_locked(td, shmid, shmaddr, shmflg); 473 SYSVSHM_UNLOCK(); 474 return (error); 475 } 476 477 #ifndef _SYS_SYSPROTO_H_ 478 struct shmat_args { 479 int shmid; 480 const void *shmaddr; 481 int shmflg; 482 }; 483 #endif 484 int 485 sys_shmat(struct thread *td, struct shmat_args *uap) 486 { 487 488 return (kern_shmat(td, uap->shmid, uap->shmaddr, uap->shmflg)); 489 } 490 491 static int 492 kern_shmctl_locked(struct thread *td, int shmid, int cmd, void *buf, 493 size_t *bufsz) 494 { 495 struct prison *rpr; 496 struct shmid_kernel *shmseg; 497 struct shmid_ds *shmidp; 498 struct shm_info shm_info; 499 int error; 500 501 SYSVSHM_ASSERT_LOCKED(); 502 503 rpr = shm_find_prison(td->td_ucred); 504 if (rpr == NULL) 505 return (ENOSYS); 506 507 AUDIT_ARG_SVIPC_ID(shmid); 508 AUDIT_ARG_SVIPC_CMD(cmd); 509 510 switch (cmd) { 511 /* 512 * It is possible that kern_shmctl is being called from the Linux ABI 513 * layer, in which case, we will need to implement IPC_INFO. It should 514 * be noted that other shmctl calls will be funneled through here for 515 * Linix binaries as well. 516 * 517 * NB: The Linux ABI layer will convert this data to structure(s) more 518 * consistent with the Linux ABI. 519 */ 520 case IPC_INFO: 521 memcpy(buf, &shminfo, sizeof(shminfo)); 522 if (bufsz) 523 *bufsz = sizeof(shminfo); 524 td->td_retval[0] = shmalloced; 525 return (0); 526 case SHM_INFO: { 527 shm_info.used_ids = shm_nused; 528 shm_info.shm_rss = 0; /*XXX where to get from ? */ 529 shm_info.shm_tot = 0; /*XXX where to get from ? */ 530 shm_info.shm_swp = 0; /*XXX where to get from ? */ 531 shm_info.swap_attempts = 0; /*XXX where to get from ? */ 532 shm_info.swap_successes = 0; /*XXX where to get from ? */ 533 memcpy(buf, &shm_info, sizeof(shm_info)); 534 if (bufsz != NULL) 535 *bufsz = sizeof(shm_info); 536 td->td_retval[0] = shmalloced; 537 return (0); 538 } 539 } 540 shmseg = shm_find_segment(rpr, shmid, cmd != SHM_STAT); 541 if (shmseg == NULL) 542 return (EINVAL); 543 #ifdef MAC 544 error = mac_sysvshm_check_shmctl(td->td_ucred, shmseg, cmd); 545 if (error != 0) 546 return (error); 547 #endif 548 switch (cmd) { 549 case SHM_STAT: 550 case IPC_STAT: 551 shmidp = (struct shmid_ds *)buf; 552 error = ipcperm(td, &shmseg->u.shm_perm, IPC_R); 553 if (error != 0) 554 return (error); 555 memcpy(shmidp, &shmseg->u, sizeof(struct shmid_ds)); 556 if (td->td_ucred->cr_prison != shmseg->cred->cr_prison) 557 shmidp->shm_perm.key = IPC_PRIVATE; 558 if (bufsz != NULL) 559 *bufsz = sizeof(struct shmid_ds); 560 if (cmd == SHM_STAT) { 561 td->td_retval[0] = IXSEQ_TO_IPCID(shmid, 562 shmseg->u.shm_perm); 563 } 564 break; 565 case IPC_SET: 566 shmidp = (struct shmid_ds *)buf; 567 AUDIT_ARG_SVIPC_PERM(&shmidp->shm_perm); 568 error = ipcperm(td, &shmseg->u.shm_perm, IPC_M); 569 if (error != 0) 570 return (error); 571 shmseg->u.shm_perm.uid = shmidp->shm_perm.uid; 572 shmseg->u.shm_perm.gid = shmidp->shm_perm.gid; 573 shmseg->u.shm_perm.mode = 574 (shmseg->u.shm_perm.mode & ~ACCESSPERMS) | 575 (shmidp->shm_perm.mode & ACCESSPERMS); 576 shmseg->u.shm_ctime = time_second; 577 break; 578 case IPC_RMID: 579 error = ipcperm(td, &shmseg->u.shm_perm, IPC_M); 580 if (error != 0) 581 return (error); 582 shm_remove(shmseg, IPCID_TO_IX(shmid)); 583 break; 584 #if 0 585 case SHM_LOCK: 586 case SHM_UNLOCK: 587 #endif 588 default: 589 error = EINVAL; 590 break; 591 } 592 return (error); 593 } 594 595 int 596 kern_shmctl(struct thread *td, int shmid, int cmd, void *buf, size_t *bufsz) 597 { 598 int error; 599 600 SYSVSHM_LOCK(); 601 error = kern_shmctl_locked(td, shmid, cmd, buf, bufsz); 602 SYSVSHM_UNLOCK(); 603 return (error); 604 } 605 606 607 #ifndef _SYS_SYSPROTO_H_ 608 struct shmctl_args { 609 int shmid; 610 int cmd; 611 struct shmid_ds *buf; 612 }; 613 #endif 614 int 615 sys_shmctl(struct thread *td, struct shmctl_args *uap) 616 { 617 int error; 618 struct shmid_ds buf; 619 size_t bufsz; 620 621 /* 622 * The only reason IPC_INFO, SHM_INFO, SHM_STAT exists is to support 623 * Linux binaries. If we see the call come through the FreeBSD ABI, 624 * return an error back to the user since we do not to support this. 625 */ 626 if (uap->cmd == IPC_INFO || uap->cmd == SHM_INFO || 627 uap->cmd == SHM_STAT) 628 return (EINVAL); 629 630 /* IPC_SET needs to copyin the buffer before calling kern_shmctl */ 631 if (uap->cmd == IPC_SET) { 632 if ((error = copyin(uap->buf, &buf, sizeof(struct shmid_ds)))) 633 goto done; 634 } 635 636 error = kern_shmctl(td, uap->shmid, uap->cmd, (void *)&buf, &bufsz); 637 if (error) 638 goto done; 639 640 /* Cases in which we need to copyout */ 641 switch (uap->cmd) { 642 case IPC_STAT: 643 error = copyout(&buf, uap->buf, bufsz); 644 break; 645 } 646 647 done: 648 if (error) { 649 /* Invalidate the return value */ 650 td->td_retval[0] = -1; 651 } 652 return (error); 653 } 654 655 656 static int 657 shmget_existing(struct thread *td, struct shmget_args *uap, int mode, 658 int segnum) 659 { 660 struct shmid_kernel *shmseg; 661 #ifdef MAC 662 int error; 663 #endif 664 665 SYSVSHM_ASSERT_LOCKED(); 666 KASSERT(segnum >= 0 && segnum < shmalloced, 667 ("segnum %d shmalloced %d", segnum, shmalloced)); 668 shmseg = &shmsegs[segnum]; 669 if ((uap->shmflg & (IPC_CREAT | IPC_EXCL)) == (IPC_CREAT | IPC_EXCL)) 670 return (EEXIST); 671 #ifdef MAC 672 error = mac_sysvshm_check_shmget(td->td_ucred, shmseg, uap->shmflg); 673 if (error != 0) 674 return (error); 675 #endif 676 if (uap->size != 0 && uap->size > shmseg->u.shm_segsz) 677 return (EINVAL); 678 td->td_retval[0] = IXSEQ_TO_IPCID(segnum, shmseg->u.shm_perm); 679 return (0); 680 } 681 682 static int 683 shmget_allocate_segment(struct thread *td, struct shmget_args *uap, int mode) 684 { 685 struct ucred *cred = td->td_ucred; 686 struct shmid_kernel *shmseg; 687 vm_object_t shm_object; 688 int i, segnum; 689 size_t size; 690 691 SYSVSHM_ASSERT_LOCKED(); 692 693 if (uap->size < shminfo.shmmin || uap->size > shminfo.shmmax) 694 return (EINVAL); 695 if (shm_nused >= shminfo.shmmni) /* Any shmids left? */ 696 return (ENOSPC); 697 size = round_page(uap->size); 698 if (shm_committed + btoc(size) > shminfo.shmall) 699 return (ENOMEM); 700 if (shm_last_free < 0) { 701 shmrealloc(); /* Maybe expand the shmsegs[] array. */ 702 for (i = 0; i < shmalloced; i++) 703 if (shmsegs[i].u.shm_perm.mode & SHMSEG_FREE) 704 break; 705 if (i == shmalloced) 706 return (ENOSPC); 707 segnum = i; 708 } else { 709 segnum = shm_last_free; 710 shm_last_free = -1; 711 } 712 KASSERT(segnum >= 0 && segnum < shmalloced, 713 ("segnum %d shmalloced %d", segnum, shmalloced)); 714 shmseg = &shmsegs[segnum]; 715 #ifdef RACCT 716 if (racct_enable) { 717 PROC_LOCK(td->td_proc); 718 if (racct_add(td->td_proc, RACCT_NSHM, 1)) { 719 PROC_UNLOCK(td->td_proc); 720 return (ENOSPC); 721 } 722 if (racct_add(td->td_proc, RACCT_SHMSIZE, size)) { 723 racct_sub(td->td_proc, RACCT_NSHM, 1); 724 PROC_UNLOCK(td->td_proc); 725 return (ENOMEM); 726 } 727 PROC_UNLOCK(td->td_proc); 728 } 729 #endif 730 731 /* 732 * We make sure that we have allocated a pager before we need 733 * to. 734 */ 735 shm_object = vm_pager_allocate(shm_use_phys ? OBJT_PHYS : OBJT_SWAP, 736 0, size, VM_PROT_DEFAULT, 0, cred); 737 if (shm_object == NULL) { 738 #ifdef RACCT 739 if (racct_enable) { 740 PROC_LOCK(td->td_proc); 741 racct_sub(td->td_proc, RACCT_NSHM, 1); 742 racct_sub(td->td_proc, RACCT_SHMSIZE, size); 743 PROC_UNLOCK(td->td_proc); 744 } 745 #endif 746 return (ENOMEM); 747 } 748 shm_object->pg_color = 0; 749 VM_OBJECT_WLOCK(shm_object); 750 vm_object_clear_flag(shm_object, OBJ_ONEMAPPING); 751 vm_object_set_flag(shm_object, OBJ_COLORED | OBJ_NOSPLIT); 752 VM_OBJECT_WUNLOCK(shm_object); 753 754 shmseg->object = shm_object; 755 shmseg->u.shm_perm.cuid = shmseg->u.shm_perm.uid = cred->cr_uid; 756 shmseg->u.shm_perm.cgid = shmseg->u.shm_perm.gid = cred->cr_gid; 757 shmseg->u.shm_perm.mode = (mode & ACCESSPERMS) | SHMSEG_ALLOCATED; 758 shmseg->u.shm_perm.key = uap->key; 759 shmseg->u.shm_perm.seq = (shmseg->u.shm_perm.seq + 1) & 0x7fff; 760 shmseg->cred = crhold(cred); 761 shmseg->u.shm_segsz = uap->size; 762 shmseg->u.shm_cpid = td->td_proc->p_pid; 763 shmseg->u.shm_lpid = shmseg->u.shm_nattch = 0; 764 shmseg->u.shm_atime = shmseg->u.shm_dtime = 0; 765 #ifdef MAC 766 mac_sysvshm_create(cred, shmseg); 767 #endif 768 shmseg->u.shm_ctime = time_second; 769 shm_committed += btoc(size); 770 shm_nused++; 771 td->td_retval[0] = IXSEQ_TO_IPCID(segnum, shmseg->u.shm_perm); 772 773 return (0); 774 } 775 776 #ifndef _SYS_SYSPROTO_H_ 777 struct shmget_args { 778 key_t key; 779 size_t size; 780 int shmflg; 781 }; 782 #endif 783 int 784 sys_shmget(struct thread *td, struct shmget_args *uap) 785 { 786 int segnum, mode; 787 int error; 788 789 if (shm_find_prison(td->td_ucred) == NULL) 790 return (ENOSYS); 791 mode = uap->shmflg & ACCESSPERMS; 792 SYSVSHM_LOCK(); 793 if (uap->key == IPC_PRIVATE) { 794 error = shmget_allocate_segment(td, uap, mode); 795 } else { 796 segnum = shm_find_segment_by_key(td->td_ucred->cr_prison, 797 uap->key); 798 if (segnum >= 0) 799 error = shmget_existing(td, uap, mode, segnum); 800 else if ((uap->shmflg & IPC_CREAT) == 0) 801 error = ENOENT; 802 else 803 error = shmget_allocate_segment(td, uap, mode); 804 } 805 SYSVSHM_UNLOCK(); 806 return (error); 807 } 808 809 static void 810 shmfork_myhook(struct proc *p1, struct proc *p2) 811 { 812 struct shmmap_state *shmmap_s; 813 size_t size; 814 int i; 815 816 SYSVSHM_LOCK(); 817 size = shminfo.shmseg * sizeof(struct shmmap_state); 818 shmmap_s = malloc(size, M_SHM, M_WAITOK); 819 bcopy(p1->p_vmspace->vm_shm, shmmap_s, size); 820 p2->p_vmspace->vm_shm = shmmap_s; 821 for (i = 0; i < shminfo.shmseg; i++, shmmap_s++) { 822 if (shmmap_s->shmid != -1) { 823 KASSERT(IPCID_TO_IX(shmmap_s->shmid) >= 0 && 824 IPCID_TO_IX(shmmap_s->shmid) < shmalloced, 825 ("segnum %d shmalloced %d", 826 IPCID_TO_IX(shmmap_s->shmid), shmalloced)); 827 shmsegs[IPCID_TO_IX(shmmap_s->shmid)].u.shm_nattch++; 828 } 829 } 830 SYSVSHM_UNLOCK(); 831 } 832 833 static void 834 shmexit_myhook(struct vmspace *vm) 835 { 836 struct shmmap_state *base, *shm; 837 int i; 838 839 base = vm->vm_shm; 840 if (base != NULL) { 841 vm->vm_shm = NULL; 842 SYSVSHM_LOCK(); 843 for (i = 0, shm = base; i < shminfo.shmseg; i++, shm++) { 844 if (shm->shmid != -1) 845 shm_delete_mapping(vm, shm); 846 } 847 SYSVSHM_UNLOCK(); 848 free(base, M_SHM); 849 } 850 } 851 852 static void 853 shmrealloc(void) 854 { 855 struct shmid_kernel *newsegs; 856 int i; 857 858 SYSVSHM_ASSERT_LOCKED(); 859 860 if (shmalloced >= shminfo.shmmni) 861 return; 862 863 newsegs = malloc(shminfo.shmmni * sizeof(*newsegs), M_SHM, 864 M_WAITOK | M_ZERO); 865 for (i = 0; i < shmalloced; i++) 866 bcopy(&shmsegs[i], &newsegs[i], sizeof(newsegs[0])); 867 for (; i < shminfo.shmmni; i++) { 868 newsegs[i].u.shm_perm.mode = SHMSEG_FREE; 869 newsegs[i].u.shm_perm.seq = 0; 870 #ifdef MAC 871 mac_sysvshm_init(&newsegs[i]); 872 #endif 873 } 874 free(shmsegs, M_SHM); 875 shmsegs = newsegs; 876 shmalloced = shminfo.shmmni; 877 } 878 879 static struct syscall_helper_data shm_syscalls[] = { 880 SYSCALL_INIT_HELPER(shmat), 881 SYSCALL_INIT_HELPER(shmctl), 882 SYSCALL_INIT_HELPER(shmdt), 883 SYSCALL_INIT_HELPER(shmget), 884 #if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \ 885 defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7) 886 SYSCALL_INIT_HELPER_COMPAT(freebsd7_shmctl), 887 #endif 888 #if defined(__i386__) && (defined(COMPAT_FREEBSD4) || defined(COMPAT_43)) 889 SYSCALL_INIT_HELPER(shmsys), 890 #endif 891 SYSCALL_INIT_LAST 892 }; 893 894 #ifdef COMPAT_FREEBSD32 895 #include <compat/freebsd32/freebsd32.h> 896 #include <compat/freebsd32/freebsd32_ipc.h> 897 #include <compat/freebsd32/freebsd32_proto.h> 898 #include <compat/freebsd32/freebsd32_signal.h> 899 #include <compat/freebsd32/freebsd32_syscall.h> 900 #include <compat/freebsd32/freebsd32_util.h> 901 902 static struct syscall_helper_data shm32_syscalls[] = { 903 SYSCALL32_INIT_HELPER_COMPAT(shmat), 904 SYSCALL32_INIT_HELPER_COMPAT(shmdt), 905 SYSCALL32_INIT_HELPER_COMPAT(shmget), 906 SYSCALL32_INIT_HELPER(freebsd32_shmsys), 907 SYSCALL32_INIT_HELPER(freebsd32_shmctl), 908 #if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \ 909 defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7) 910 SYSCALL32_INIT_HELPER(freebsd7_freebsd32_shmctl), 911 #endif 912 SYSCALL_INIT_LAST 913 }; 914 #endif 915 916 static int 917 shminit(void) 918 { 919 struct prison *pr; 920 void **rsv; 921 int i, error; 922 osd_method_t methods[PR_MAXMETHOD] = { 923 [PR_METHOD_CHECK] = shm_prison_check, 924 [PR_METHOD_SET] = shm_prison_set, 925 [PR_METHOD_GET] = shm_prison_get, 926 [PR_METHOD_REMOVE] = shm_prison_remove, 927 }; 928 929 #ifndef BURN_BRIDGES 930 if (TUNABLE_ULONG_FETCH("kern.ipc.shmmaxpgs", &shminfo.shmall) != 0) 931 printf("kern.ipc.shmmaxpgs is now called kern.ipc.shmall!\n"); 932 #endif 933 if (shminfo.shmmax == SHMMAX) { 934 /* Initialize shmmax dealing with possible overflow. */ 935 for (i = PAGE_SIZE; i != 0; i--) { 936 shminfo.shmmax = shminfo.shmall * i; 937 if ((shminfo.shmmax / shminfo.shmall) == (u_long)i) 938 break; 939 } 940 } 941 shmalloced = shminfo.shmmni; 942 shmsegs = malloc(shmalloced * sizeof(shmsegs[0]), M_SHM, 943 M_WAITOK|M_ZERO); 944 for (i = 0; i < shmalloced; i++) { 945 shmsegs[i].u.shm_perm.mode = SHMSEG_FREE; 946 shmsegs[i].u.shm_perm.seq = 0; 947 #ifdef MAC 948 mac_sysvshm_init(&shmsegs[i]); 949 #endif 950 } 951 shm_last_free = 0; 952 shm_nused = 0; 953 shm_committed = 0; 954 sx_init(&sysvshmsx, "sysvshmsx"); 955 shmexit_hook = &shmexit_myhook; 956 shmfork_hook = &shmfork_myhook; 957 958 /* Set current prisons according to their allow.sysvipc. */ 959 shm_prison_slot = osd_jail_register(NULL, methods); 960 rsv = osd_reserve(shm_prison_slot); 961 prison_lock(&prison0); 962 (void)osd_jail_set_reserved(&prison0, shm_prison_slot, rsv, &prison0); 963 prison_unlock(&prison0); 964 rsv = NULL; 965 sx_slock(&allprison_lock); 966 TAILQ_FOREACH(pr, &allprison, pr_list) { 967 if (rsv == NULL) 968 rsv = osd_reserve(shm_prison_slot); 969 prison_lock(pr); 970 if ((pr->pr_allow & PR_ALLOW_SYSVIPC) && pr->pr_ref > 0) { 971 (void)osd_jail_set_reserved(pr, shm_prison_slot, rsv, 972 &prison0); 973 rsv = NULL; 974 } 975 prison_unlock(pr); 976 } 977 if (rsv != NULL) 978 osd_free_reserved(rsv); 979 sx_sunlock(&allprison_lock); 980 981 error = syscall_helper_register(shm_syscalls, SY_THR_STATIC_KLD); 982 if (error != 0) 983 return (error); 984 #ifdef COMPAT_FREEBSD32 985 error = syscall32_helper_register(shm32_syscalls, SY_THR_STATIC_KLD); 986 if (error != 0) 987 return (error); 988 #endif 989 return (0); 990 } 991 992 static int 993 shmunload(void) 994 { 995 int i; 996 997 if (shm_nused > 0) 998 return (EBUSY); 999 1000 #ifdef COMPAT_FREEBSD32 1001 syscall32_helper_unregister(shm32_syscalls); 1002 #endif 1003 syscall_helper_unregister(shm_syscalls); 1004 if (shm_prison_slot != 0) 1005 osd_jail_deregister(shm_prison_slot); 1006 1007 for (i = 0; i < shmalloced; i++) { 1008 #ifdef MAC 1009 mac_sysvshm_destroy(&shmsegs[i]); 1010 #endif 1011 /* 1012 * Objects might be still mapped into the processes 1013 * address spaces. Actual free would happen on the 1014 * last mapping destruction. 1015 */ 1016 if (shmsegs[i].u.shm_perm.mode != SHMSEG_FREE) 1017 vm_object_deallocate(shmsegs[i].object); 1018 } 1019 free(shmsegs, M_SHM); 1020 shmexit_hook = NULL; 1021 shmfork_hook = NULL; 1022 sx_destroy(&sysvshmsx); 1023 return (0); 1024 } 1025 1026 static int 1027 sysctl_shmsegs(SYSCTL_HANDLER_ARGS) 1028 { 1029 struct shmid_kernel tshmseg; 1030 #ifdef COMPAT_FREEBSD32 1031 struct shmid_kernel32 tshmseg32; 1032 #endif 1033 struct prison *pr, *rpr; 1034 void *outaddr; 1035 size_t outsize; 1036 int error, i; 1037 1038 SYSVSHM_LOCK(); 1039 pr = req->td->td_ucred->cr_prison; 1040 rpr = shm_find_prison(req->td->td_ucred); 1041 error = 0; 1042 for (i = 0; i < shmalloced; i++) { 1043 if ((shmsegs[i].u.shm_perm.mode & SHMSEG_ALLOCATED) == 0 || 1044 rpr == NULL || shm_prison_cansee(rpr, &shmsegs[i]) != 0) { 1045 bzero(&tshmseg, sizeof(tshmseg)); 1046 tshmseg.u.shm_perm.mode = SHMSEG_FREE; 1047 } else { 1048 tshmseg = shmsegs[i]; 1049 if (tshmseg.cred->cr_prison != pr) 1050 tshmseg.u.shm_perm.key = IPC_PRIVATE; 1051 } 1052 #ifdef COMPAT_FREEBSD32 1053 if (SV_CURPROC_FLAG(SV_ILP32)) { 1054 bzero(&tshmseg32, sizeof(tshmseg32)); 1055 freebsd32_ipcperm_out(&tshmseg.u.shm_perm, 1056 &tshmseg32.u.shm_perm); 1057 CP(tshmseg, tshmseg32, u.shm_segsz); 1058 CP(tshmseg, tshmseg32, u.shm_lpid); 1059 CP(tshmseg, tshmseg32, u.shm_cpid); 1060 CP(tshmseg, tshmseg32, u.shm_nattch); 1061 CP(tshmseg, tshmseg32, u.shm_atime); 1062 CP(tshmseg, tshmseg32, u.shm_dtime); 1063 CP(tshmseg, tshmseg32, u.shm_ctime); 1064 /* Don't copy object, label, or cred */ 1065 outaddr = &tshmseg32; 1066 outsize = sizeof(tshmseg32); 1067 } else 1068 #endif 1069 { 1070 tshmseg.object = NULL; 1071 tshmseg.label = NULL; 1072 tshmseg.cred = NULL; 1073 outaddr = &tshmseg; 1074 outsize = sizeof(tshmseg); 1075 } 1076 error = SYSCTL_OUT(req, outaddr, outsize); 1077 if (error != 0) 1078 break; 1079 } 1080 SYSVSHM_UNLOCK(); 1081 return (error); 1082 } 1083 1084 static int 1085 shm_prison_check(void *obj, void *data) 1086 { 1087 struct prison *pr = obj; 1088 struct prison *prpr; 1089 struct vfsoptlist *opts = data; 1090 int error, jsys; 1091 1092 /* 1093 * sysvshm is a jailsys integer. 1094 * It must be "disable" if the parent jail is disabled. 1095 */ 1096 error = vfs_copyopt(opts, "sysvshm", &jsys, sizeof(jsys)); 1097 if (error != ENOENT) { 1098 if (error != 0) 1099 return (error); 1100 switch (jsys) { 1101 case JAIL_SYS_DISABLE: 1102 break; 1103 case JAIL_SYS_NEW: 1104 case JAIL_SYS_INHERIT: 1105 prison_lock(pr->pr_parent); 1106 prpr = osd_jail_get(pr->pr_parent, shm_prison_slot); 1107 prison_unlock(pr->pr_parent); 1108 if (prpr == NULL) 1109 return (EPERM); 1110 break; 1111 default: 1112 return (EINVAL); 1113 } 1114 } 1115 1116 return (0); 1117 } 1118 1119 static int 1120 shm_prison_set(void *obj, void *data) 1121 { 1122 struct prison *pr = obj; 1123 struct prison *tpr, *orpr, *nrpr, *trpr; 1124 struct vfsoptlist *opts = data; 1125 void *rsv; 1126 int jsys, descend; 1127 1128 /* 1129 * sysvshm controls which jail is the root of the associated segments 1130 * (this jail or same as the parent), or if the feature is available 1131 * at all. 1132 */ 1133 if (vfs_copyopt(opts, "sysvshm", &jsys, sizeof(jsys)) == ENOENT) 1134 jsys = vfs_flagopt(opts, "allow.sysvipc", NULL, 0) 1135 ? JAIL_SYS_INHERIT 1136 : vfs_flagopt(opts, "allow.nosysvipc", NULL, 0) 1137 ? JAIL_SYS_DISABLE 1138 : -1; 1139 if (jsys == JAIL_SYS_DISABLE) { 1140 prison_lock(pr); 1141 orpr = osd_jail_get(pr, shm_prison_slot); 1142 if (orpr != NULL) 1143 osd_jail_del(pr, shm_prison_slot); 1144 prison_unlock(pr); 1145 if (orpr != NULL) { 1146 if (orpr == pr) 1147 shm_prison_cleanup(pr); 1148 /* Disable all child jails as well. */ 1149 FOREACH_PRISON_DESCENDANT(pr, tpr, descend) { 1150 prison_lock(tpr); 1151 trpr = osd_jail_get(tpr, shm_prison_slot); 1152 if (trpr != NULL) { 1153 osd_jail_del(tpr, shm_prison_slot); 1154 prison_unlock(tpr); 1155 if (trpr == tpr) 1156 shm_prison_cleanup(tpr); 1157 } else { 1158 prison_unlock(tpr); 1159 descend = 0; 1160 } 1161 } 1162 } 1163 } else if (jsys != -1) { 1164 if (jsys == JAIL_SYS_NEW) 1165 nrpr = pr; 1166 else { 1167 prison_lock(pr->pr_parent); 1168 nrpr = osd_jail_get(pr->pr_parent, shm_prison_slot); 1169 prison_unlock(pr->pr_parent); 1170 } 1171 rsv = osd_reserve(shm_prison_slot); 1172 prison_lock(pr); 1173 orpr = osd_jail_get(pr, shm_prison_slot); 1174 if (orpr != nrpr) 1175 (void)osd_jail_set_reserved(pr, shm_prison_slot, rsv, 1176 nrpr); 1177 else 1178 osd_free_reserved(rsv); 1179 prison_unlock(pr); 1180 if (orpr != nrpr) { 1181 if (orpr == pr) 1182 shm_prison_cleanup(pr); 1183 if (orpr != NULL) { 1184 /* Change child jails matching the old root, */ 1185 FOREACH_PRISON_DESCENDANT(pr, tpr, descend) { 1186 prison_lock(tpr); 1187 trpr = osd_jail_get(tpr, 1188 shm_prison_slot); 1189 if (trpr == orpr) { 1190 (void)osd_jail_set(tpr, 1191 shm_prison_slot, nrpr); 1192 prison_unlock(tpr); 1193 if (trpr == tpr) 1194 shm_prison_cleanup(tpr); 1195 } else { 1196 prison_unlock(tpr); 1197 descend = 0; 1198 } 1199 } 1200 } 1201 } 1202 } 1203 1204 return (0); 1205 } 1206 1207 static int 1208 shm_prison_get(void *obj, void *data) 1209 { 1210 struct prison *pr = obj; 1211 struct prison *rpr; 1212 struct vfsoptlist *opts = data; 1213 int error, jsys; 1214 1215 /* Set sysvshm based on the jail's root prison. */ 1216 prison_lock(pr); 1217 rpr = osd_jail_get(pr, shm_prison_slot); 1218 prison_unlock(pr); 1219 jsys = rpr == NULL ? JAIL_SYS_DISABLE 1220 : rpr == pr ? JAIL_SYS_NEW : JAIL_SYS_INHERIT; 1221 error = vfs_setopt(opts, "sysvshm", &jsys, sizeof(jsys)); 1222 if (error == ENOENT) 1223 error = 0; 1224 return (error); 1225 } 1226 1227 static int 1228 shm_prison_remove(void *obj, void *data __unused) 1229 { 1230 struct prison *pr = obj; 1231 struct prison *rpr; 1232 1233 SYSVSHM_LOCK(); 1234 prison_lock(pr); 1235 rpr = osd_jail_get(pr, shm_prison_slot); 1236 prison_unlock(pr); 1237 if (rpr == pr) 1238 shm_prison_cleanup(pr); 1239 SYSVSHM_UNLOCK(); 1240 return (0); 1241 } 1242 1243 static void 1244 shm_prison_cleanup(struct prison *pr) 1245 { 1246 struct shmid_kernel *shmseg; 1247 int i; 1248 1249 /* Remove any segments that belong to this jail. */ 1250 for (i = 0; i < shmalloced; i++) { 1251 shmseg = &shmsegs[i]; 1252 if ((shmseg->u.shm_perm.mode & SHMSEG_ALLOCATED) && 1253 shmseg->cred != NULL && shmseg->cred->cr_prison == pr) { 1254 shm_remove(shmseg, i); 1255 } 1256 } 1257 } 1258 1259 SYSCTL_JAIL_PARAM_SYS_NODE(sysvshm, CTLFLAG_RW, "SYSV shared memory"); 1260 1261 #if defined(__i386__) && (defined(COMPAT_FREEBSD4) || defined(COMPAT_43)) 1262 struct oshmid_ds { 1263 struct ipc_perm_old shm_perm; /* operation perms */ 1264 int shm_segsz; /* size of segment (bytes) */ 1265 u_short shm_cpid; /* pid, creator */ 1266 u_short shm_lpid; /* pid, last operation */ 1267 short shm_nattch; /* no. of current attaches */ 1268 time_t shm_atime; /* last attach time */ 1269 time_t shm_dtime; /* last detach time */ 1270 time_t shm_ctime; /* last change time */ 1271 void *shm_handle; /* internal handle for shm segment */ 1272 }; 1273 1274 struct oshmctl_args { 1275 int shmid; 1276 int cmd; 1277 struct oshmid_ds *ubuf; 1278 }; 1279 1280 static int 1281 oshmctl(struct thread *td, struct oshmctl_args *uap) 1282 { 1283 #ifdef COMPAT_43 1284 int error = 0; 1285 struct prison *rpr; 1286 struct shmid_kernel *shmseg; 1287 struct oshmid_ds outbuf; 1288 1289 rpr = shm_find_prison(td->td_ucred); 1290 if (rpr == NULL) 1291 return (ENOSYS); 1292 if (uap->cmd != IPC_STAT) { 1293 return (freebsd7_shmctl(td, 1294 (struct freebsd7_shmctl_args *)uap)); 1295 } 1296 SYSVSHM_LOCK(); 1297 shmseg = shm_find_segment(rpr, uap->shmid, true); 1298 if (shmseg == NULL) { 1299 SYSVSHM_UNLOCK(); 1300 return (EINVAL); 1301 } 1302 error = ipcperm(td, &shmseg->u.shm_perm, IPC_R); 1303 if (error != 0) { 1304 SYSVSHM_UNLOCK(); 1305 return (error); 1306 } 1307 #ifdef MAC 1308 error = mac_sysvshm_check_shmctl(td->td_ucred, shmseg, uap->cmd); 1309 if (error != 0) { 1310 SYSVSHM_UNLOCK(); 1311 return (error); 1312 } 1313 #endif 1314 ipcperm_new2old(&shmseg->u.shm_perm, &outbuf.shm_perm); 1315 outbuf.shm_segsz = shmseg->u.shm_segsz; 1316 outbuf.shm_cpid = shmseg->u.shm_cpid; 1317 outbuf.shm_lpid = shmseg->u.shm_lpid; 1318 outbuf.shm_nattch = shmseg->u.shm_nattch; 1319 outbuf.shm_atime = shmseg->u.shm_atime; 1320 outbuf.shm_dtime = shmseg->u.shm_dtime; 1321 outbuf.shm_ctime = shmseg->u.shm_ctime; 1322 outbuf.shm_handle = shmseg->object; 1323 SYSVSHM_UNLOCK(); 1324 return (copyout(&outbuf, uap->ubuf, sizeof(outbuf))); 1325 #else 1326 return (EINVAL); 1327 #endif 1328 } 1329 1330 /* XXX casting to (sy_call_t *) is bogus, as usual. */ 1331 static sy_call_t *shmcalls[] = { 1332 (sy_call_t *)sys_shmat, (sy_call_t *)oshmctl, 1333 (sy_call_t *)sys_shmdt, (sy_call_t *)sys_shmget, 1334 (sy_call_t *)freebsd7_shmctl 1335 }; 1336 1337 #ifndef _SYS_SYSPROTO_H_ 1338 /* XXX actually varargs. */ 1339 struct shmsys_args { 1340 int which; 1341 int a2; 1342 int a3; 1343 int a4; 1344 }; 1345 #endif 1346 int 1347 sys_shmsys(struct thread *td, struct shmsys_args *uap) 1348 { 1349 1350 AUDIT_ARG_SVIPC_WHICH(uap->which); 1351 if (uap->which < 0 || uap->which >= nitems(shmcalls)) 1352 return (EINVAL); 1353 return ((*shmcalls[uap->which])(td, &uap->a2)); 1354 } 1355 1356 #endif /* i386 && (COMPAT_FREEBSD4 || COMPAT_43) */ 1357 1358 #ifdef COMPAT_FREEBSD32 1359 1360 int 1361 freebsd32_shmsys(struct thread *td, struct freebsd32_shmsys_args *uap) 1362 { 1363 1364 #if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \ 1365 defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7) 1366 AUDIT_ARG_SVIPC_WHICH(uap->which); 1367 switch (uap->which) { 1368 case 0: { /* shmat */ 1369 struct shmat_args ap; 1370 1371 ap.shmid = uap->a2; 1372 ap.shmaddr = PTRIN(uap->a3); 1373 ap.shmflg = uap->a4; 1374 return (sysent[SYS_shmat].sy_call(td, &ap)); 1375 } 1376 case 2: { /* shmdt */ 1377 struct shmdt_args ap; 1378 1379 ap.shmaddr = PTRIN(uap->a2); 1380 return (sysent[SYS_shmdt].sy_call(td, &ap)); 1381 } 1382 case 3: { /* shmget */ 1383 struct shmget_args ap; 1384 1385 ap.key = uap->a2; 1386 ap.size = uap->a3; 1387 ap.shmflg = uap->a4; 1388 return (sysent[SYS_shmget].sy_call(td, &ap)); 1389 } 1390 case 4: { /* shmctl */ 1391 struct freebsd7_freebsd32_shmctl_args ap; 1392 1393 ap.shmid = uap->a2; 1394 ap.cmd = uap->a3; 1395 ap.buf = PTRIN(uap->a4); 1396 return (freebsd7_freebsd32_shmctl(td, &ap)); 1397 } 1398 case 1: /* oshmctl */ 1399 default: 1400 return (EINVAL); 1401 } 1402 #else 1403 return (nosys(td, NULL)); 1404 #endif 1405 } 1406 1407 #if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \ 1408 defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7) 1409 int 1410 freebsd7_freebsd32_shmctl(struct thread *td, 1411 struct freebsd7_freebsd32_shmctl_args *uap) 1412 { 1413 int error; 1414 union { 1415 struct shmid_ds shmid_ds; 1416 struct shm_info shm_info; 1417 struct shminfo shminfo; 1418 } u; 1419 union { 1420 struct shmid_ds32_old shmid_ds32; 1421 struct shm_info32 shm_info32; 1422 struct shminfo32 shminfo32; 1423 } u32; 1424 size_t sz; 1425 1426 if (uap->cmd == IPC_SET) { 1427 if ((error = copyin(uap->buf, &u32.shmid_ds32, 1428 sizeof(u32.shmid_ds32)))) 1429 goto done; 1430 freebsd32_ipcperm_old_in(&u32.shmid_ds32.shm_perm, 1431 &u.shmid_ds.shm_perm); 1432 CP(u32.shmid_ds32, u.shmid_ds, shm_segsz); 1433 CP(u32.shmid_ds32, u.shmid_ds, shm_lpid); 1434 CP(u32.shmid_ds32, u.shmid_ds, shm_cpid); 1435 CP(u32.shmid_ds32, u.shmid_ds, shm_nattch); 1436 CP(u32.shmid_ds32, u.shmid_ds, shm_atime); 1437 CP(u32.shmid_ds32, u.shmid_ds, shm_dtime); 1438 CP(u32.shmid_ds32, u.shmid_ds, shm_ctime); 1439 } 1440 1441 error = kern_shmctl(td, uap->shmid, uap->cmd, (void *)&u, &sz); 1442 if (error) 1443 goto done; 1444 1445 /* Cases in which we need to copyout */ 1446 switch (uap->cmd) { 1447 case IPC_INFO: 1448 CP(u.shminfo, u32.shminfo32, shmmax); 1449 CP(u.shminfo, u32.shminfo32, shmmin); 1450 CP(u.shminfo, u32.shminfo32, shmmni); 1451 CP(u.shminfo, u32.shminfo32, shmseg); 1452 CP(u.shminfo, u32.shminfo32, shmall); 1453 error = copyout(&u32.shminfo32, uap->buf, 1454 sizeof(u32.shminfo32)); 1455 break; 1456 case SHM_INFO: 1457 CP(u.shm_info, u32.shm_info32, used_ids); 1458 CP(u.shm_info, u32.shm_info32, shm_rss); 1459 CP(u.shm_info, u32.shm_info32, shm_tot); 1460 CP(u.shm_info, u32.shm_info32, shm_swp); 1461 CP(u.shm_info, u32.shm_info32, swap_attempts); 1462 CP(u.shm_info, u32.shm_info32, swap_successes); 1463 error = copyout(&u32.shm_info32, uap->buf, 1464 sizeof(u32.shm_info32)); 1465 break; 1466 case SHM_STAT: 1467 case IPC_STAT: 1468 memset(&u32.shmid_ds32, 0, sizeof(u32.shmid_ds32)); 1469 freebsd32_ipcperm_old_out(&u.shmid_ds.shm_perm, 1470 &u32.shmid_ds32.shm_perm); 1471 if (u.shmid_ds.shm_segsz > INT32_MAX) 1472 u32.shmid_ds32.shm_segsz = INT32_MAX; 1473 else 1474 CP(u.shmid_ds, u32.shmid_ds32, shm_segsz); 1475 CP(u.shmid_ds, u32.shmid_ds32, shm_lpid); 1476 CP(u.shmid_ds, u32.shmid_ds32, shm_cpid); 1477 CP(u.shmid_ds, u32.shmid_ds32, shm_nattch); 1478 CP(u.shmid_ds, u32.shmid_ds32, shm_atime); 1479 CP(u.shmid_ds, u32.shmid_ds32, shm_dtime); 1480 CP(u.shmid_ds, u32.shmid_ds32, shm_ctime); 1481 u32.shmid_ds32.shm_internal = 0; 1482 error = copyout(&u32.shmid_ds32, uap->buf, 1483 sizeof(u32.shmid_ds32)); 1484 break; 1485 } 1486 1487 done: 1488 if (error) { 1489 /* Invalidate the return value */ 1490 td->td_retval[0] = -1; 1491 } 1492 return (error); 1493 } 1494 #endif 1495 1496 int 1497 freebsd32_shmctl(struct thread *td, struct freebsd32_shmctl_args *uap) 1498 { 1499 int error; 1500 union { 1501 struct shmid_ds shmid_ds; 1502 struct shm_info shm_info; 1503 struct shminfo shminfo; 1504 } u; 1505 union { 1506 struct shmid_ds32 shmid_ds32; 1507 struct shm_info32 shm_info32; 1508 struct shminfo32 shminfo32; 1509 } u32; 1510 size_t sz; 1511 1512 if (uap->cmd == IPC_SET) { 1513 if ((error = copyin(uap->buf, &u32.shmid_ds32, 1514 sizeof(u32.shmid_ds32)))) 1515 goto done; 1516 freebsd32_ipcperm_in(&u32.shmid_ds32.shm_perm, 1517 &u.shmid_ds.shm_perm); 1518 CP(u32.shmid_ds32, u.shmid_ds, shm_segsz); 1519 CP(u32.shmid_ds32, u.shmid_ds, shm_lpid); 1520 CP(u32.shmid_ds32, u.shmid_ds, shm_cpid); 1521 CP(u32.shmid_ds32, u.shmid_ds, shm_nattch); 1522 CP(u32.shmid_ds32, u.shmid_ds, shm_atime); 1523 CP(u32.shmid_ds32, u.shmid_ds, shm_dtime); 1524 CP(u32.shmid_ds32, u.shmid_ds, shm_ctime); 1525 } 1526 1527 error = kern_shmctl(td, uap->shmid, uap->cmd, (void *)&u, &sz); 1528 if (error) 1529 goto done; 1530 1531 /* Cases in which we need to copyout */ 1532 switch (uap->cmd) { 1533 case IPC_INFO: 1534 CP(u.shminfo, u32.shminfo32, shmmax); 1535 CP(u.shminfo, u32.shminfo32, shmmin); 1536 CP(u.shminfo, u32.shminfo32, shmmni); 1537 CP(u.shminfo, u32.shminfo32, shmseg); 1538 CP(u.shminfo, u32.shminfo32, shmall); 1539 error = copyout(&u32.shminfo32, uap->buf, 1540 sizeof(u32.shminfo32)); 1541 break; 1542 case SHM_INFO: 1543 CP(u.shm_info, u32.shm_info32, used_ids); 1544 CP(u.shm_info, u32.shm_info32, shm_rss); 1545 CP(u.shm_info, u32.shm_info32, shm_tot); 1546 CP(u.shm_info, u32.shm_info32, shm_swp); 1547 CP(u.shm_info, u32.shm_info32, swap_attempts); 1548 CP(u.shm_info, u32.shm_info32, swap_successes); 1549 error = copyout(&u32.shm_info32, uap->buf, 1550 sizeof(u32.shm_info32)); 1551 break; 1552 case SHM_STAT: 1553 case IPC_STAT: 1554 freebsd32_ipcperm_out(&u.shmid_ds.shm_perm, 1555 &u32.shmid_ds32.shm_perm); 1556 if (u.shmid_ds.shm_segsz > INT32_MAX) 1557 u32.shmid_ds32.shm_segsz = INT32_MAX; 1558 else 1559 CP(u.shmid_ds, u32.shmid_ds32, shm_segsz); 1560 CP(u.shmid_ds, u32.shmid_ds32, shm_lpid); 1561 CP(u.shmid_ds, u32.shmid_ds32, shm_cpid); 1562 CP(u.shmid_ds, u32.shmid_ds32, shm_nattch); 1563 CP(u.shmid_ds, u32.shmid_ds32, shm_atime); 1564 CP(u.shmid_ds, u32.shmid_ds32, shm_dtime); 1565 CP(u.shmid_ds, u32.shmid_ds32, shm_ctime); 1566 error = copyout(&u32.shmid_ds32, uap->buf, 1567 sizeof(u32.shmid_ds32)); 1568 break; 1569 } 1570 1571 done: 1572 if (error) { 1573 /* Invalidate the return value */ 1574 td->td_retval[0] = -1; 1575 } 1576 return (error); 1577 } 1578 #endif 1579 1580 #if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \ 1581 defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7) 1582 1583 #ifndef CP 1584 #define CP(src, dst, fld) do { (dst).fld = (src).fld; } while (0) 1585 #endif 1586 1587 #ifndef _SYS_SYSPROTO_H_ 1588 struct freebsd7_shmctl_args { 1589 int shmid; 1590 int cmd; 1591 struct shmid_ds_old *buf; 1592 }; 1593 #endif 1594 int 1595 freebsd7_shmctl(struct thread *td, struct freebsd7_shmctl_args *uap) 1596 { 1597 int error; 1598 struct shmid_ds_old old; 1599 struct shmid_ds buf; 1600 size_t bufsz; 1601 1602 /* 1603 * The only reason IPC_INFO, SHM_INFO, SHM_STAT exists is to support 1604 * Linux binaries. If we see the call come through the FreeBSD ABI, 1605 * return an error back to the user since we do not to support this. 1606 */ 1607 if (uap->cmd == IPC_INFO || uap->cmd == SHM_INFO || 1608 uap->cmd == SHM_STAT) 1609 return (EINVAL); 1610 1611 /* IPC_SET needs to copyin the buffer before calling kern_shmctl */ 1612 if (uap->cmd == IPC_SET) { 1613 if ((error = copyin(uap->buf, &old, sizeof(old)))) 1614 goto done; 1615 ipcperm_old2new(&old.shm_perm, &buf.shm_perm); 1616 CP(old, buf, shm_segsz); 1617 CP(old, buf, shm_lpid); 1618 CP(old, buf, shm_cpid); 1619 CP(old, buf, shm_nattch); 1620 CP(old, buf, shm_atime); 1621 CP(old, buf, shm_dtime); 1622 CP(old, buf, shm_ctime); 1623 } 1624 1625 error = kern_shmctl(td, uap->shmid, uap->cmd, (void *)&buf, &bufsz); 1626 if (error) 1627 goto done; 1628 1629 /* Cases in which we need to copyout */ 1630 switch (uap->cmd) { 1631 case IPC_STAT: 1632 memset(&old, 0, sizeof(old)); 1633 ipcperm_new2old(&buf.shm_perm, &old.shm_perm); 1634 if (buf.shm_segsz > INT_MAX) 1635 old.shm_segsz = INT_MAX; 1636 else 1637 CP(buf, old, shm_segsz); 1638 CP(buf, old, shm_lpid); 1639 CP(buf, old, shm_cpid); 1640 if (buf.shm_nattch > SHRT_MAX) 1641 old.shm_nattch = SHRT_MAX; 1642 else 1643 CP(buf, old, shm_nattch); 1644 CP(buf, old, shm_atime); 1645 CP(buf, old, shm_dtime); 1646 CP(buf, old, shm_ctime); 1647 old.shm_internal = NULL; 1648 error = copyout(&old, uap->buf, sizeof(old)); 1649 break; 1650 } 1651 1652 done: 1653 if (error) { 1654 /* Invalidate the return value */ 1655 td->td_retval[0] = -1; 1656 } 1657 return (error); 1658 } 1659 1660 #endif /* COMPAT_FREEBSD4 || COMPAT_FREEBSD5 || COMPAT_FREEBSD6 || 1661 COMPAT_FREEBSD7 */ 1662 1663 static int 1664 sysvshm_modload(struct module *module, int cmd, void *arg) 1665 { 1666 int error = 0; 1667 1668 switch (cmd) { 1669 case MOD_LOAD: 1670 error = shminit(); 1671 if (error != 0) 1672 shmunload(); 1673 break; 1674 case MOD_UNLOAD: 1675 error = shmunload(); 1676 break; 1677 case MOD_SHUTDOWN: 1678 break; 1679 default: 1680 error = EINVAL; 1681 break; 1682 } 1683 return (error); 1684 } 1685 1686 static moduledata_t sysvshm_mod = { 1687 "sysvshm", 1688 &sysvshm_modload, 1689 NULL 1690 }; 1691 1692 DECLARE_MODULE(sysvshm, sysvshm_mod, SI_SUB_SYSV_SHM, SI_ORDER_FIRST); 1693 MODULE_VERSION(sysvshm, 1); 1694