xref: /freebsd/sys/kern/sysv_shm.c (revision 84ee9401a3fc8d3c22424266f421a928989cd692)
1 /*	$NetBSD: sysv_shm.c,v 1.23 1994/07/04 23:25:12 glass Exp $	*/
2 /*-
3  * Copyright (c) 1994 Adam Glass and Charles Hannum.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *	This product includes software developed by Adam Glass and Charles
16  *	Hannum.
17  * 4. The names of the authors may not be used to endorse or promote products
18  *    derived from this software without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
21  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
22  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
23  * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
25  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
29  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30  */
31 /*-
32  * Copyright (c) 2003-2005 McAfee, Inc.
33  * All rights reserved.
34  *
35  * This software was developed for the FreeBSD Project in part by McAfee
36  * Research, the Security Research Division of McAfee, Inc under DARPA/SPAWAR
37  * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS research
38  * program.
39  *
40  * Redistribution and use in source and binary forms, with or without
41  * modification, are permitted provided that the following conditions
42  * are met:
43  * 1. Redistributions of source code must retain the above copyright
44  *    notice, this list of conditions and the following disclaimer.
45  * 2. Redistributions in binary form must reproduce the above copyright
46  *    notice, this list of conditions and the following disclaimer in the
47  *    documentation and/or other materials provided with the distribution.
48  *
49  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59  * SUCH DAMAGE.
60  */
61 
62 #include <sys/cdefs.h>
63 __FBSDID("$FreeBSD$");
64 
65 #include "opt_compat.h"
66 #include "opt_sysvipc.h"
67 #include "opt_mac.h"
68 
69 #include <sys/param.h>
70 #include <sys/systm.h>
71 #include <sys/kernel.h>
72 #include <sys/lock.h>
73 #include <sys/sysctl.h>
74 #include <sys/shm.h>
75 #include <sys/proc.h>
76 #include <sys/malloc.h>
77 #include <sys/mman.h>
78 #include <sys/module.h>
79 #include <sys/mutex.h>
80 #include <sys/resourcevar.h>
81 #include <sys/stat.h>
82 #include <sys/syscall.h>
83 #include <sys/syscallsubr.h>
84 #include <sys/sysent.h>
85 #include <sys/sysproto.h>
86 #include <sys/jail.h>
87 #include <sys/mac.h>
88 
89 #include <vm/vm.h>
90 #include <vm/vm_param.h>
91 #include <vm/pmap.h>
92 #include <vm/vm_object.h>
93 #include <vm/vm_map.h>
94 #include <vm/vm_page.h>
95 #include <vm/vm_pager.h>
96 
97 static MALLOC_DEFINE(M_SHM, "shm", "SVID compatible shared memory segments");
98 
99 #if defined(__i386__) && (defined(COMPAT_FREEBSD4) || defined(COMPAT_43))
100 struct oshmctl_args;
101 static int oshmctl(struct thread *td, struct oshmctl_args *uap);
102 #endif
103 
104 static int shmget_allocate_segment(struct thread *td,
105     struct shmget_args *uap, int mode);
106 static int shmget_existing(struct thread *td, struct shmget_args *uap,
107     int mode, int segnum);
108 
109 #if defined(__i386__) && (defined(COMPAT_FREEBSD4) || defined(COMPAT_43))
110 /* XXX casting to (sy_call_t *) is bogus, as usual. */
111 static sy_call_t *shmcalls[] = {
112 	(sy_call_t *)shmat, (sy_call_t *)oshmctl,
113 	(sy_call_t *)shmdt, (sy_call_t *)shmget,
114 	(sy_call_t *)shmctl
115 };
116 #endif
117 
118 #define	SHMSEG_FREE     	0x0200
119 #define	SHMSEG_REMOVED  	0x0400
120 #define	SHMSEG_ALLOCATED	0x0800
121 #define	SHMSEG_WANTED		0x1000
122 
123 static int shm_last_free, shm_nused, shm_committed, shmalloced;
124 static struct shmid_kernel	*shmsegs;
125 
126 struct shmmap_state {
127 	vm_offset_t va;
128 	int shmid;
129 };
130 
131 static void shm_deallocate_segment(struct shmid_kernel *);
132 static int shm_find_segment_by_key(key_t);
133 static struct shmid_kernel *shm_find_segment_by_shmid(int);
134 static struct shmid_kernel *shm_find_segment_by_shmidx(int);
135 static int shm_delete_mapping(struct vmspace *vm, struct shmmap_state *);
136 static void shmrealloc(void);
137 static void shminit(void);
138 static int sysvshm_modload(struct module *, int, void *);
139 static int shmunload(void);
140 static void shmexit_myhook(struct vmspace *vm);
141 static void shmfork_myhook(struct proc *p1, struct proc *p2);
142 static int sysctl_shmsegs(SYSCTL_HANDLER_ARGS);
143 
144 /*
145  * Tuneable values.
146  */
147 #ifndef SHMMAXPGS
148 #define	SHMMAXPGS	8192	/* Note: sysv shared memory is swap backed. */
149 #endif
150 #ifndef SHMMAX
151 #define	SHMMAX	(SHMMAXPGS*PAGE_SIZE)
152 #endif
153 #ifndef SHMMIN
154 #define	SHMMIN	1
155 #endif
156 #ifndef SHMMNI
157 #define	SHMMNI	192
158 #endif
159 #ifndef SHMSEG
160 #define	SHMSEG	128
161 #endif
162 #ifndef SHMALL
163 #define	SHMALL	(SHMMAXPGS)
164 #endif
165 
166 struct	shminfo shminfo = {
167 	SHMMAX,
168 	SHMMIN,
169 	SHMMNI,
170 	SHMSEG,
171 	SHMALL
172 };
173 
174 static int shm_use_phys;
175 static int shm_allow_removed;
176 
177 SYSCTL_ULONG(_kern_ipc, OID_AUTO, shmmax, CTLFLAG_RW, &shminfo.shmmax, 0,
178     "Maximum shared memory segment size");
179 SYSCTL_ULONG(_kern_ipc, OID_AUTO, shmmin, CTLFLAG_RW, &shminfo.shmmin, 0,
180     "Minimum shared memory segment size");
181 SYSCTL_ULONG(_kern_ipc, OID_AUTO, shmmni, CTLFLAG_RDTUN, &shminfo.shmmni, 0,
182     "Number of shared memory identifiers");
183 SYSCTL_ULONG(_kern_ipc, OID_AUTO, shmseg, CTLFLAG_RDTUN, &shminfo.shmseg, 0,
184     "Number of segments per process");
185 SYSCTL_ULONG(_kern_ipc, OID_AUTO, shmall, CTLFLAG_RW, &shminfo.shmall, 0,
186     "Maximum number of pages available for shared memory");
187 SYSCTL_INT(_kern_ipc, OID_AUTO, shm_use_phys, CTLFLAG_RW,
188     &shm_use_phys, 0, "Enable/Disable locking of shared memory pages in core");
189 SYSCTL_INT(_kern_ipc, OID_AUTO, shm_allow_removed, CTLFLAG_RW,
190     &shm_allow_removed, 0,
191     "Enable/Disable attachment to attached segments marked for removal");
192 SYSCTL_PROC(_kern_ipc, OID_AUTO, shmsegs, CTLFLAG_RD,
193     NULL, 0, sysctl_shmsegs, "",
194     "Current number of shared memory segments allocated");
195 
196 static int
197 shm_find_segment_by_key(key)
198 	key_t key;
199 {
200 	int i;
201 
202 	for (i = 0; i < shmalloced; i++)
203 		if ((shmsegs[i].u.shm_perm.mode & SHMSEG_ALLOCATED) &&
204 		    shmsegs[i].u.shm_perm.key == key)
205 			return (i);
206 	return (-1);
207 }
208 
209 static struct shmid_kernel *
210 shm_find_segment_by_shmid(int shmid)
211 {
212 	int segnum;
213 	struct shmid_kernel *shmseg;
214 
215 	segnum = IPCID_TO_IX(shmid);
216 	if (segnum < 0 || segnum >= shmalloced)
217 		return (NULL);
218 	shmseg = &shmsegs[segnum];
219 	if ((shmseg->u.shm_perm.mode & SHMSEG_ALLOCATED) == 0 ||
220 	    (!shm_allow_removed &&
221 	     (shmseg->u.shm_perm.mode & SHMSEG_REMOVED) != 0) ||
222 	    shmseg->u.shm_perm.seq != IPCID_TO_SEQ(shmid))
223 		return (NULL);
224 	return (shmseg);
225 }
226 
227 static struct shmid_kernel *
228 shm_find_segment_by_shmidx(int segnum)
229 {
230 	struct shmid_kernel *shmseg;
231 
232 	if (segnum < 0 || segnum >= shmalloced)
233 		return (NULL);
234 	shmseg = &shmsegs[segnum];
235 	if ((shmseg->u.shm_perm.mode & SHMSEG_ALLOCATED) == 0 ||
236 	    (!shm_allow_removed &&
237 	     (shmseg->u.shm_perm.mode & SHMSEG_REMOVED) != 0))
238 		return (NULL);
239 	return (shmseg);
240 }
241 
242 static void
243 shm_deallocate_segment(shmseg)
244 	struct shmid_kernel *shmseg;
245 {
246 	size_t size;
247 
248 	GIANT_REQUIRED;
249 
250 	vm_object_deallocate(shmseg->u.shm_internal);
251 	shmseg->u.shm_internal = NULL;
252 	size = round_page(shmseg->u.shm_segsz);
253 	shm_committed -= btoc(size);
254 	shm_nused--;
255 	shmseg->u.shm_perm.mode = SHMSEG_FREE;
256 #ifdef MAC
257 	mac_cleanup_sysv_shm(shmseg);
258 #endif
259 }
260 
261 static int
262 shm_delete_mapping(struct vmspace *vm, struct shmmap_state *shmmap_s)
263 {
264 	struct shmid_kernel *shmseg;
265 	int segnum, result;
266 	size_t size;
267 
268 	GIANT_REQUIRED;
269 
270 	segnum = IPCID_TO_IX(shmmap_s->shmid);
271 	shmseg = &shmsegs[segnum];
272 	size = round_page(shmseg->u.shm_segsz);
273 	result = vm_map_remove(&vm->vm_map, shmmap_s->va, shmmap_s->va + size);
274 	if (result != KERN_SUCCESS)
275 		return (EINVAL);
276 	shmmap_s->shmid = -1;
277 	shmseg->u.shm_dtime = time_second;
278 	if ((--shmseg->u.shm_nattch <= 0) &&
279 	    (shmseg->u.shm_perm.mode & SHMSEG_REMOVED)) {
280 		shm_deallocate_segment(shmseg);
281 		shm_last_free = segnum;
282 	}
283 	return (0);
284 }
285 
286 #ifndef _SYS_SYSPROTO_H_
287 struct shmdt_args {
288 	const void *shmaddr;
289 };
290 #endif
291 
292 /*
293  * MPSAFE
294  */
295 int
296 shmdt(td, uap)
297 	struct thread *td;
298 	struct shmdt_args *uap;
299 {
300 	struct proc *p = td->td_proc;
301 	struct shmmap_state *shmmap_s;
302 #ifdef MAC
303 	struct shmid_kernel *shmsegptr;
304 #endif
305 	int i;
306 	int error = 0;
307 
308 	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
309 		return (ENOSYS);
310 	mtx_lock(&Giant);
311 	shmmap_s = p->p_vmspace->vm_shm;
312  	if (shmmap_s == NULL) {
313 		error = EINVAL;
314 		goto done2;
315 	}
316 	for (i = 0; i < shminfo.shmseg; i++, shmmap_s++) {
317 		if (shmmap_s->shmid != -1 &&
318 		    shmmap_s->va == (vm_offset_t)uap->shmaddr) {
319 			break;
320 		}
321 	}
322 	if (i == shminfo.shmseg) {
323 		error = EINVAL;
324 		goto done2;
325 	}
326 #ifdef MAC
327 	shmsegptr = &shmsegs[IPCID_TO_IX(shmmap_s->shmid)];
328 	error = mac_check_sysv_shmdt(td->td_ucred, shmsegptr);
329 	if (error != 0)
330 		goto done2;
331 #endif
332 	error = shm_delete_mapping(p->p_vmspace, shmmap_s);
333 done2:
334 	mtx_unlock(&Giant);
335 	return (error);
336 }
337 
338 #ifndef _SYS_SYSPROTO_H_
339 struct shmat_args {
340 	int shmid;
341 	const void *shmaddr;
342 	int shmflg;
343 };
344 #endif
345 
346 /*
347  * MPSAFE
348  */
349 int
350 kern_shmat(td, shmid, shmaddr, shmflg)
351 	struct thread *td;
352 	int shmid;
353 	const void *shmaddr;
354 	int shmflg;
355 {
356 	struct proc *p = td->td_proc;
357 	int i, flags;
358 	struct shmid_kernel *shmseg;
359 	struct shmmap_state *shmmap_s = NULL;
360 	vm_offset_t attach_va;
361 	vm_prot_t prot;
362 	vm_size_t size;
363 	int rv;
364 	int error = 0;
365 
366 	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
367 		return (ENOSYS);
368 	mtx_lock(&Giant);
369 	shmmap_s = p->p_vmspace->vm_shm;
370 	if (shmmap_s == NULL) {
371 		size = shminfo.shmseg * sizeof(struct shmmap_state);
372 		shmmap_s = malloc(size, M_SHM, M_WAITOK);
373 		for (i = 0; i < shminfo.shmseg; i++)
374 			shmmap_s[i].shmid = -1;
375 		p->p_vmspace->vm_shm = shmmap_s;
376 	}
377 	shmseg = shm_find_segment_by_shmid(shmid);
378 	if (shmseg == NULL) {
379 		error = EINVAL;
380 		goto done2;
381 	}
382 	error = ipcperm(td, &shmseg->u.shm_perm,
383 	    (shmflg & SHM_RDONLY) ? IPC_R : IPC_R|IPC_W);
384 	if (error)
385 		goto done2;
386 #ifdef MAC
387 	error = mac_check_sysv_shmat(td->td_ucred, shmseg, shmflg);
388 	if (error != 0)
389 		goto done2;
390 #endif
391 	for (i = 0; i < shminfo.shmseg; i++) {
392 		if (shmmap_s->shmid == -1)
393 			break;
394 		shmmap_s++;
395 	}
396 	if (i >= shminfo.shmseg) {
397 		error = EMFILE;
398 		goto done2;
399 	}
400 	size = round_page(shmseg->u.shm_segsz);
401 #ifdef VM_PROT_READ_IS_EXEC
402 	prot = VM_PROT_READ | VM_PROT_EXECUTE;
403 #else
404 	prot = VM_PROT_READ;
405 #endif
406 	if ((shmflg & SHM_RDONLY) == 0)
407 		prot |= VM_PROT_WRITE;
408 	flags = MAP_ANON | MAP_SHARED;
409 	if (shmaddr) {
410 		flags |= MAP_FIXED;
411 		if (shmflg & SHM_RND) {
412 			attach_va = (vm_offset_t)shmaddr & ~(SHMLBA-1);
413 		} else if (((vm_offset_t)shmaddr & (SHMLBA-1)) == 0) {
414 			attach_va = (vm_offset_t)shmaddr;
415 		} else {
416 			error = EINVAL;
417 			goto done2;
418 		}
419 	} else {
420 		/*
421 		 * This is just a hint to vm_map_find() about where to
422 		 * put it.
423 		 */
424 		PROC_LOCK(p);
425 		attach_va = round_page((vm_offset_t)p->p_vmspace->vm_daddr +
426 		    lim_max(p, RLIMIT_DATA));
427 		PROC_UNLOCK(p);
428 	}
429 
430 	vm_object_reference(shmseg->u.shm_internal);
431 	rv = vm_map_find(&p->p_vmspace->vm_map, shmseg->u.shm_internal,
432 		0, &attach_va, size, (flags & MAP_FIXED)?0:1, prot, prot, 0);
433 	if (rv != KERN_SUCCESS) {
434 		vm_object_deallocate(shmseg->u.shm_internal);
435 		error = ENOMEM;
436 		goto done2;
437 	}
438 	vm_map_inherit(&p->p_vmspace->vm_map,
439 		attach_va, attach_va + size, VM_INHERIT_SHARE);
440 
441 	shmmap_s->va = attach_va;
442 	shmmap_s->shmid = shmid;
443 	shmseg->u.shm_lpid = p->p_pid;
444 	shmseg->u.shm_atime = time_second;
445 	shmseg->u.shm_nattch++;
446 	td->td_retval[0] = attach_va;
447 done2:
448 	mtx_unlock(&Giant);
449 	return (error);
450 }
451 
452 int
453 shmat(td, uap)
454 	struct thread *td;
455 	struct shmat_args *uap;
456 {
457 	return kern_shmat(td, uap->shmid, uap->shmaddr, uap->shmflg);
458 }
459 
460 #if defined(__i386__) && (defined(COMPAT_FREEBSD4) || defined(COMPAT_43))
461 struct oshmid_ds {
462 	struct	ipc_perm shm_perm;	/* operation perms */
463 	int	shm_segsz;		/* size of segment (bytes) */
464 	u_short	shm_cpid;		/* pid, creator */
465 	u_short	shm_lpid;		/* pid, last operation */
466 	short	shm_nattch;		/* no. of current attaches */
467 	time_t	shm_atime;		/* last attach time */
468 	time_t	shm_dtime;		/* last detach time */
469 	time_t	shm_ctime;		/* last change time */
470 	void	*shm_handle;		/* internal handle for shm segment */
471 };
472 
473 struct oshmctl_args {
474 	int shmid;
475 	int cmd;
476 	struct oshmid_ds *ubuf;
477 };
478 
479 /*
480  * MPSAFE
481  */
482 static int
483 oshmctl(td, uap)
484 	struct thread *td;
485 	struct oshmctl_args *uap;
486 {
487 #ifdef COMPAT_43
488 	int error = 0;
489 	struct shmid_kernel *shmseg;
490 	struct oshmid_ds outbuf;
491 
492 	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
493 		return (ENOSYS);
494 	mtx_lock(&Giant);
495 	shmseg = shm_find_segment_by_shmid(uap->shmid);
496 	if (shmseg == NULL) {
497 		error = EINVAL;
498 		goto done2;
499 	}
500 	switch (uap->cmd) {
501 	case IPC_STAT:
502 		error = ipcperm(td, &shmseg->u.shm_perm, IPC_R);
503 		if (error)
504 			goto done2;
505 #ifdef MAC
506 		error = mac_check_sysv_shmctl(td->td_ucred, shmseg, uap->cmd);
507 		if (error != 0)
508 			goto done2;
509 #endif
510 		outbuf.shm_perm = shmseg->u.shm_perm;
511 		outbuf.shm_segsz = shmseg->u.shm_segsz;
512 		outbuf.shm_cpid = shmseg->u.shm_cpid;
513 		outbuf.shm_lpid = shmseg->u.shm_lpid;
514 		outbuf.shm_nattch = shmseg->u.shm_nattch;
515 		outbuf.shm_atime = shmseg->u.shm_atime;
516 		outbuf.shm_dtime = shmseg->u.shm_dtime;
517 		outbuf.shm_ctime = shmseg->u.shm_ctime;
518 		outbuf.shm_handle = shmseg->u.shm_internal;
519 		error = copyout(&outbuf, uap->ubuf, sizeof(outbuf));
520 		if (error)
521 			goto done2;
522 		break;
523 	default:
524 		error = shmctl(td, (struct shmctl_args *)uap);
525 		break;
526 	}
527 done2:
528 	mtx_unlock(&Giant);
529 	return (error);
530 #else
531 	return (EINVAL);
532 #endif
533 }
534 #endif
535 
536 #ifndef _SYS_SYSPROTO_H_
537 struct shmctl_args {
538 	int shmid;
539 	int cmd;
540 	struct shmid_ds *buf;
541 };
542 #endif
543 
544 /*
545  * MPSAFE
546  */
547 int
548 kern_shmctl(td, shmid, cmd, buf, bufsz)
549 	struct thread *td;
550 	int shmid;
551 	int cmd;
552 	void *buf;
553 	size_t *bufsz;
554 {
555 	int error = 0;
556 	struct shmid_kernel *shmseg;
557 
558 	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
559 		return (ENOSYS);
560 
561 	mtx_lock(&Giant);
562 	switch (cmd) {
563 	case IPC_INFO:
564 		memcpy(buf, &shminfo, sizeof(shminfo));
565 		if (bufsz)
566 			*bufsz = sizeof(shminfo);
567 		td->td_retval[0] = shmalloced;
568 		goto done2;
569 	case SHM_INFO: {
570 		struct shm_info shm_info;
571 		shm_info.used_ids = shm_nused;
572 		shm_info.shm_rss = 0;	/*XXX where to get from ? */
573 		shm_info.shm_tot = 0;	/*XXX where to get from ? */
574 		shm_info.shm_swp = 0;	/*XXX where to get from ? */
575 		shm_info.swap_attempts = 0;	/*XXX where to get from ? */
576 		shm_info.swap_successes = 0;	/*XXX where to get from ? */
577 		memcpy(buf, &shm_info, sizeof(shm_info));
578 		if (bufsz)
579 			*bufsz = sizeof(shm_info);
580 		td->td_retval[0] = shmalloced;
581 		goto done2;
582 	}
583 	}
584 	if (cmd == SHM_STAT)
585 		shmseg = shm_find_segment_by_shmidx(shmid);
586 	else
587 		shmseg = shm_find_segment_by_shmid(shmid);
588 	if (shmseg == NULL) {
589 		error = EINVAL;
590 		goto done2;
591 	}
592 #ifdef MAC
593 	error = mac_check_sysv_shmctl(td->td_ucred, shmseg, cmd);
594 	if (error != 0)
595 		goto done2;
596 #endif
597 	switch (cmd) {
598 	case SHM_STAT:
599 	case IPC_STAT:
600 		error = ipcperm(td, &shmseg->u.shm_perm, IPC_R);
601 		if (error)
602 			goto done2;
603 		memcpy(buf, &shmseg->u, sizeof(struct shmid_ds));
604 		if (bufsz)
605 			*bufsz = sizeof(struct shmid_ds);
606 		if (cmd == SHM_STAT)
607 			td->td_retval[0] = IXSEQ_TO_IPCID(shmid, shmseg->u.shm_perm);
608 		break;
609 	case IPC_SET: {
610 		struct shmid_ds *shmid;
611 
612 		shmid = (struct shmid_ds *)buf;
613 		error = ipcperm(td, &shmseg->u.shm_perm, IPC_M);
614 		if (error)
615 			goto done2;
616 		shmseg->u.shm_perm.uid = shmid->shm_perm.uid;
617 		shmseg->u.shm_perm.gid = shmid->shm_perm.gid;
618 		shmseg->u.shm_perm.mode =
619 		    (shmseg->u.shm_perm.mode & ~ACCESSPERMS) |
620 		    (shmid->shm_perm.mode & ACCESSPERMS);
621 		shmseg->u.shm_ctime = time_second;
622 		break;
623 	}
624 	case IPC_RMID:
625 		error = ipcperm(td, &shmseg->u.shm_perm, IPC_M);
626 		if (error)
627 			goto done2;
628 		shmseg->u.shm_perm.key = IPC_PRIVATE;
629 		shmseg->u.shm_perm.mode |= SHMSEG_REMOVED;
630 		if (shmseg->u.shm_nattch <= 0) {
631 			shm_deallocate_segment(shmseg);
632 			shm_last_free = IPCID_TO_IX(shmid);
633 		}
634 		break;
635 #if 0
636 	case SHM_LOCK:
637 	case SHM_UNLOCK:
638 #endif
639 	default:
640 		error = EINVAL;
641 		break;
642 	}
643 done2:
644 	mtx_unlock(&Giant);
645 	return (error);
646 }
647 
648 int
649 shmctl(td, uap)
650 	struct thread *td;
651 	struct shmctl_args *uap;
652 {
653 	int error = 0;
654 	struct shmid_ds buf;
655 	size_t bufsz;
656 
657 	/* IPC_SET needs to copyin the buffer before calling kern_shmctl */
658 	if (uap->cmd == IPC_SET) {
659 		if ((error = copyin(uap->buf, &buf, sizeof(struct shmid_ds))))
660 			goto done;
661 	}
662 
663 	error = kern_shmctl(td, uap->shmid, uap->cmd, (void *)&buf, &bufsz);
664 	if (error)
665 		goto done;
666 
667 	/* Cases in which we need to copyout */
668 	switch (uap->cmd) {
669 	case IPC_INFO:
670 	case SHM_INFO:
671 	case SHM_STAT:
672 	case IPC_STAT:
673 		error = copyout(&buf, uap->buf, bufsz);
674 		break;
675 	}
676 
677 done:
678 	if (error) {
679 		/* Invalidate the return value */
680 		td->td_retval[0] = -1;
681 	}
682 	return (error);
683 }
684 
685 
686 #ifndef _SYS_SYSPROTO_H_
687 struct shmget_args {
688 	key_t key;
689 	size_t size;
690 	int shmflg;
691 };
692 #endif
693 
694 static int
695 shmget_existing(td, uap, mode, segnum)
696 	struct thread *td;
697 	struct shmget_args *uap;
698 	int mode;
699 	int segnum;
700 {
701 	struct shmid_kernel *shmseg;
702 	int error;
703 
704 	shmseg = &shmsegs[segnum];
705 	if (shmseg->u.shm_perm.mode & SHMSEG_REMOVED) {
706 		/*
707 		 * This segment is in the process of being allocated.  Wait
708 		 * until it's done, and look the key up again (in case the
709 		 * allocation failed or it was freed).
710 		 */
711 		shmseg->u.shm_perm.mode |= SHMSEG_WANTED;
712 		error = tsleep(shmseg, PLOCK | PCATCH, "shmget", 0);
713 		if (error)
714 			return (error);
715 		return (EAGAIN);
716 	}
717 	if ((uap->shmflg & (IPC_CREAT | IPC_EXCL)) == (IPC_CREAT | IPC_EXCL))
718 		return (EEXIST);
719 #ifdef MAC
720 	error = mac_check_sysv_shmget(td->td_ucred, shmseg, uap->shmflg);
721 	if (error != 0)
722 		return (error);
723 #endif
724 	error = ipcperm(td, &shmseg->u.shm_perm, mode);
725 	if (error)
726 		return (error);
727 	if (uap->size && uap->size > shmseg->u.shm_segsz)
728 		return (EINVAL);
729 	td->td_retval[0] = IXSEQ_TO_IPCID(segnum, shmseg->u.shm_perm);
730 	return (0);
731 }
732 
733 static int
734 shmget_allocate_segment(td, uap, mode)
735 	struct thread *td;
736 	struct shmget_args *uap;
737 	int mode;
738 {
739 	int i, segnum, shmid, size;
740 	struct ucred *cred = td->td_ucred;
741 	struct shmid_kernel *shmseg;
742 	vm_object_t shm_object;
743 
744 	GIANT_REQUIRED;
745 
746 	if (uap->size < shminfo.shmmin || uap->size > shminfo.shmmax)
747 		return (EINVAL);
748 	if (shm_nused >= shminfo.shmmni) /* Any shmids left? */
749 		return (ENOSPC);
750 	size = round_page(uap->size);
751 	if (shm_committed + btoc(size) > shminfo.shmall)
752 		return (ENOMEM);
753 	if (shm_last_free < 0) {
754 		shmrealloc();	/* Maybe expand the shmsegs[] array. */
755 		for (i = 0; i < shmalloced; i++)
756 			if (shmsegs[i].u.shm_perm.mode & SHMSEG_FREE)
757 				break;
758 		if (i == shmalloced)
759 			return (ENOSPC);
760 		segnum = i;
761 	} else  {
762 		segnum = shm_last_free;
763 		shm_last_free = -1;
764 	}
765 	shmseg = &shmsegs[segnum];
766 	/*
767 	 * In case we sleep in malloc(), mark the segment present but deleted
768 	 * so that noone else tries to create the same key.
769 	 */
770 	shmseg->u.shm_perm.mode = SHMSEG_ALLOCATED | SHMSEG_REMOVED;
771 	shmseg->u.shm_perm.key = uap->key;
772 	shmseg->u.shm_perm.seq = (shmseg->u.shm_perm.seq + 1) & 0x7fff;
773 	shmid = IXSEQ_TO_IPCID(segnum, shmseg->u.shm_perm);
774 
775 	/*
776 	 * We make sure that we have allocated a pager before we need
777 	 * to.
778 	 */
779 	if (shm_use_phys) {
780 		shm_object =
781 		    vm_pager_allocate(OBJT_PHYS, 0, size, VM_PROT_DEFAULT, 0);
782 	} else {
783 		shm_object =
784 		    vm_pager_allocate(OBJT_SWAP, 0, size, VM_PROT_DEFAULT, 0);
785 	}
786 	VM_OBJECT_LOCK(shm_object);
787 	vm_object_clear_flag(shm_object, OBJ_ONEMAPPING);
788 	vm_object_set_flag(shm_object, OBJ_NOSPLIT);
789 	VM_OBJECT_UNLOCK(shm_object);
790 
791 	shmseg->u.shm_internal = shm_object;
792 	shmseg->u.shm_perm.cuid = shmseg->u.shm_perm.uid = cred->cr_uid;
793 	shmseg->u.shm_perm.cgid = shmseg->u.shm_perm.gid = cred->cr_gid;
794 	shmseg->u.shm_perm.mode = (shmseg->u.shm_perm.mode & SHMSEG_WANTED) |
795 	    (mode & ACCESSPERMS) | SHMSEG_ALLOCATED;
796 	shmseg->u.shm_segsz = uap->size;
797 	shmseg->u.shm_cpid = td->td_proc->p_pid;
798 	shmseg->u.shm_lpid = shmseg->u.shm_nattch = 0;
799 	shmseg->u.shm_atime = shmseg->u.shm_dtime = 0;
800 #ifdef MAC
801 	mac_create_sysv_shm(cred, shmseg);
802 #endif
803 	shmseg->u.shm_ctime = time_second;
804 	shm_committed += btoc(size);
805 	shm_nused++;
806 	if (shmseg->u.shm_perm.mode & SHMSEG_WANTED) {
807 		/*
808 		 * Somebody else wanted this key while we were asleep.  Wake
809 		 * them up now.
810 		 */
811 		shmseg->u.shm_perm.mode &= ~SHMSEG_WANTED;
812 		wakeup(shmseg);
813 	}
814 	td->td_retval[0] = shmid;
815 	return (0);
816 }
817 
818 /*
819  * MPSAFE
820  */
821 int
822 shmget(td, uap)
823 	struct thread *td;
824 	struct shmget_args *uap;
825 {
826 	int segnum, mode;
827 	int error;
828 
829 	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
830 		return (ENOSYS);
831 	mtx_lock(&Giant);
832 	mode = uap->shmflg & ACCESSPERMS;
833 	if (uap->key != IPC_PRIVATE) {
834 	again:
835 		segnum = shm_find_segment_by_key(uap->key);
836 		if (segnum >= 0) {
837 			error = shmget_existing(td, uap, mode, segnum);
838 			if (error == EAGAIN)
839 				goto again;
840 			goto done2;
841 		}
842 		if ((uap->shmflg & IPC_CREAT) == 0) {
843 			error = ENOENT;
844 			goto done2;
845 		}
846 	}
847 	error = shmget_allocate_segment(td, uap, mode);
848 done2:
849 	mtx_unlock(&Giant);
850 	return (error);
851 }
852 
853 /*
854  * MPSAFE
855  */
856 int
857 shmsys(td, uap)
858 	struct thread *td;
859 	/* XXX actually varargs. */
860 	struct shmsys_args /* {
861 		int	which;
862 		int	a2;
863 		int	a3;
864 		int	a4;
865 	} */ *uap;
866 {
867 #if defined(__i386__) && (defined(COMPAT_FREEBSD4) || defined(COMPAT_43))
868 	int error;
869 
870 	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
871 		return (ENOSYS);
872 	if (uap->which < 0 ||
873 	    uap->which >= sizeof(shmcalls)/sizeof(shmcalls[0]))
874 		return (EINVAL);
875 	mtx_lock(&Giant);
876 	error = (*shmcalls[uap->which])(td, &uap->a2);
877 	mtx_unlock(&Giant);
878 	return (error);
879 #else
880 	return (nosys(td, NULL));
881 #endif
882 }
883 
884 static void
885 shmfork_myhook(p1, p2)
886 	struct proc *p1, *p2;
887 {
888 	struct shmmap_state *shmmap_s;
889 	size_t size;
890 	int i;
891 
892 	mtx_lock(&Giant);
893 	size = shminfo.shmseg * sizeof(struct shmmap_state);
894 	shmmap_s = malloc(size, M_SHM, M_WAITOK);
895 	bcopy(p1->p_vmspace->vm_shm, shmmap_s, size);
896 	p2->p_vmspace->vm_shm = shmmap_s;
897 	for (i = 0; i < shminfo.shmseg; i++, shmmap_s++)
898 		if (shmmap_s->shmid != -1)
899 			shmsegs[IPCID_TO_IX(shmmap_s->shmid)].u.shm_nattch++;
900 	mtx_unlock(&Giant);
901 }
902 
903 static void
904 shmexit_myhook(struct vmspace *vm)
905 {
906 	struct shmmap_state *base, *shm;
907 	int i;
908 
909 	if ((base = vm->vm_shm) != NULL) {
910 		vm->vm_shm = NULL;
911 		mtx_lock(&Giant);
912 		for (i = 0, shm = base; i < shminfo.shmseg; i++, shm++) {
913 			if (shm->shmid != -1)
914 				shm_delete_mapping(vm, shm);
915 		}
916 		mtx_unlock(&Giant);
917 		free(base, M_SHM);
918 	}
919 }
920 
921 static void
922 shmrealloc(void)
923 {
924 	int i;
925 	struct shmid_kernel *newsegs;
926 
927 	if (shmalloced >= shminfo.shmmni)
928 		return;
929 
930 	newsegs = malloc(shminfo.shmmni * sizeof(*newsegs), M_SHM, M_WAITOK);
931 	if (newsegs == NULL)
932 		return;
933 	for (i = 0; i < shmalloced; i++)
934 		bcopy(&shmsegs[i], &newsegs[i], sizeof(newsegs[0]));
935 	for (; i < shminfo.shmmni; i++) {
936 		shmsegs[i].u.shm_perm.mode = SHMSEG_FREE;
937 		shmsegs[i].u.shm_perm.seq = 0;
938 #ifdef MAC
939 		mac_init_sysv_shm(&shmsegs[i]);
940 #endif
941 	}
942 	free(shmsegs, M_SHM);
943 	shmsegs = newsegs;
944 	shmalloced = shminfo.shmmni;
945 }
946 
947 static void
948 shminit()
949 {
950 	int i;
951 
952 	TUNABLE_ULONG_FETCH("kern.ipc.shmmaxpgs", &shminfo.shmall);
953 	for (i = PAGE_SIZE; i > 0; i--) {
954 		shminfo.shmmax = shminfo.shmall * i;
955 		if (shminfo.shmmax >= shminfo.shmall)
956 			break;
957 	}
958 	TUNABLE_ULONG_FETCH("kern.ipc.shmmin", &shminfo.shmmin);
959 	TUNABLE_ULONG_FETCH("kern.ipc.shmmni", &shminfo.shmmni);
960 	TUNABLE_ULONG_FETCH("kern.ipc.shmseg", &shminfo.shmseg);
961 	TUNABLE_INT_FETCH("kern.ipc.shm_use_phys", &shm_use_phys);
962 
963 	shmalloced = shminfo.shmmni;
964 	shmsegs = malloc(shmalloced * sizeof(shmsegs[0]), M_SHM, M_WAITOK);
965 	if (shmsegs == NULL)
966 		panic("cannot allocate initial memory for sysvshm");
967 	for (i = 0; i < shmalloced; i++) {
968 		shmsegs[i].u.shm_perm.mode = SHMSEG_FREE;
969 		shmsegs[i].u.shm_perm.seq = 0;
970 #ifdef MAC
971 		mac_init_sysv_shm(&shmsegs[i]);
972 #endif
973 	}
974 	shm_last_free = 0;
975 	shm_nused = 0;
976 	shm_committed = 0;
977 	shmexit_hook = &shmexit_myhook;
978 	shmfork_hook = &shmfork_myhook;
979 }
980 
981 static int
982 shmunload()
983 {
984 #ifdef MAC
985 	int i;
986 #endif
987 
988 	if (shm_nused > 0)
989 		return (EBUSY);
990 
991 #ifdef MAC
992 	for (i = 0; i < shmalloced; i++)
993 		mac_destroy_sysv_shm(&shmsegs[i]);
994 #endif
995 	free(shmsegs, M_SHM);
996 	shmexit_hook = NULL;
997 	shmfork_hook = NULL;
998 	return (0);
999 }
1000 
1001 static int
1002 sysctl_shmsegs(SYSCTL_HANDLER_ARGS)
1003 {
1004 
1005 	return (SYSCTL_OUT(req, shmsegs, shmalloced * sizeof(shmsegs[0])));
1006 }
1007 
1008 static int
1009 sysvshm_modload(struct module *module, int cmd, void *arg)
1010 {
1011 	int error = 0;
1012 
1013 	switch (cmd) {
1014 	case MOD_LOAD:
1015 		shminit();
1016 		break;
1017 	case MOD_UNLOAD:
1018 		error = shmunload();
1019 		break;
1020 	case MOD_SHUTDOWN:
1021 		break;
1022 	default:
1023 		error = EINVAL;
1024 		break;
1025 	}
1026 	return (error);
1027 }
1028 
1029 static moduledata_t sysvshm_mod = {
1030 	"sysvshm",
1031 	&sysvshm_modload,
1032 	NULL
1033 };
1034 
1035 SYSCALL_MODULE_HELPER(shmsys);
1036 SYSCALL_MODULE_HELPER(shmat);
1037 SYSCALL_MODULE_HELPER(shmctl);
1038 SYSCALL_MODULE_HELPER(shmdt);
1039 SYSCALL_MODULE_HELPER(shmget);
1040 
1041 DECLARE_MODULE(sysvshm, sysvshm_mod,
1042 	SI_SUB_SYSV_SHM, SI_ORDER_FIRST);
1043 MODULE_VERSION(sysvshm, 1);
1044