xref: /freebsd/sys/kern/sysv_shm.c (revision e4e9813eb92cd7c4d4b819a8fbed5cbd3d92f5d8)
1 /*	$NetBSD: sysv_shm.c,v 1.23 1994/07/04 23:25:12 glass Exp $	*/
2 /*-
3  * Copyright (c) 1994 Adam Glass and Charles Hannum.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *	This product includes software developed by Adam Glass and Charles
16  *	Hannum.
17  * 4. The names of the authors may not be used to endorse or promote products
18  *    derived from this software without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
21  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
22  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
23  * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
25  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
29  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30  */
31 /*-
32  * Copyright (c) 2003-2005 McAfee, Inc.
33  * All rights reserved.
34  *
35  * This software was developed for the FreeBSD Project in part by McAfee
36  * Research, the Security Research Division of McAfee, Inc under DARPA/SPAWAR
37  * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS research
38  * program.
39  *
40  * Redistribution and use in source and binary forms, with or without
41  * modification, are permitted provided that the following conditions
42  * are met:
43  * 1. Redistributions of source code must retain the above copyright
44  *    notice, this list of conditions and the following disclaimer.
45  * 2. Redistributions in binary form must reproduce the above copyright
46  *    notice, this list of conditions and the following disclaimer in the
47  *    documentation and/or other materials provided with the distribution.
48  *
49  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59  * SUCH DAMAGE.
60  */
61 
62 #include <sys/cdefs.h>
63 __FBSDID("$FreeBSD$");
64 
65 #include "opt_compat.h"
66 #include "opt_sysvipc.h"
67 #include "opt_mac.h"
68 
69 #include <sys/param.h>
70 #include <sys/systm.h>
71 #include <sys/kernel.h>
72 #include <sys/lock.h>
73 #include <sys/sysctl.h>
74 #include <sys/shm.h>
75 #include <sys/proc.h>
76 #include <sys/malloc.h>
77 #include <sys/mman.h>
78 #include <sys/module.h>
79 #include <sys/mutex.h>
80 #include <sys/resourcevar.h>
81 #include <sys/stat.h>
82 #include <sys/syscall.h>
83 #include <sys/syscallsubr.h>
84 #include <sys/sysent.h>
85 #include <sys/sysproto.h>
86 #include <sys/jail.h>
87 #include <sys/mac.h>
88 
89 #include <vm/vm.h>
90 #include <vm/vm_param.h>
91 #include <vm/pmap.h>
92 #include <vm/vm_object.h>
93 #include <vm/vm_map.h>
94 #include <vm/vm_page.h>
95 #include <vm/vm_pager.h>
96 
97 #ifdef MAC_DEBUG
98 #define MPRINTF(a)      printf a
99 #else
100 #define MPRINTF(a)
101 #endif
102 
103 static MALLOC_DEFINE(M_SHM, "shm", "SVID compatible shared memory segments");
104 
105 #if defined(__i386__) && (defined(COMPAT_FREEBSD4) || defined(COMPAT_43))
106 struct oshmctl_args;
107 static int oshmctl(struct thread *td, struct oshmctl_args *uap);
108 #endif
109 
110 static int shmget_allocate_segment(struct thread *td,
111     struct shmget_args *uap, int mode);
112 static int shmget_existing(struct thread *td, struct shmget_args *uap,
113     int mode, int segnum);
114 
115 #if defined(__i386__) && (defined(COMPAT_FREEBSD4) || defined(COMPAT_43))
116 /* XXX casting to (sy_call_t *) is bogus, as usual. */
117 static sy_call_t *shmcalls[] = {
118 	(sy_call_t *)shmat, (sy_call_t *)oshmctl,
119 	(sy_call_t *)shmdt, (sy_call_t *)shmget,
120 	(sy_call_t *)shmctl
121 };
122 #endif
123 
124 #define	SHMSEG_FREE     	0x0200
125 #define	SHMSEG_REMOVED  	0x0400
126 #define	SHMSEG_ALLOCATED	0x0800
127 #define	SHMSEG_WANTED		0x1000
128 
129 static int shm_last_free, shm_nused, shm_committed, shmalloced;
130 static struct shmid_kernel	*shmsegs;
131 
132 struct shmmap_state {
133 	vm_offset_t va;
134 	int shmid;
135 };
136 
137 static void shm_deallocate_segment(struct shmid_kernel *);
138 static int shm_find_segment_by_key(key_t);
139 static struct shmid_kernel *shm_find_segment_by_shmid(int);
140 static struct shmid_kernel *shm_find_segment_by_shmidx(int);
141 static int shm_delete_mapping(struct vmspace *vm, struct shmmap_state *);
142 static void shmrealloc(void);
143 static void shminit(void);
144 static int sysvshm_modload(struct module *, int, void *);
145 static int shmunload(void);
146 static void shmexit_myhook(struct vmspace *vm);
147 static void shmfork_myhook(struct proc *p1, struct proc *p2);
148 static int sysctl_shmsegs(SYSCTL_HANDLER_ARGS);
149 
150 /*
151  * Tuneable values.
152  */
153 #ifndef SHMMAXPGS
154 #define	SHMMAXPGS	8192	/* Note: sysv shared memory is swap backed. */
155 #endif
156 #ifndef SHMMAX
157 #define	SHMMAX	(SHMMAXPGS*PAGE_SIZE)
158 #endif
159 #ifndef SHMMIN
160 #define	SHMMIN	1
161 #endif
162 #ifndef SHMMNI
163 #define	SHMMNI	192
164 #endif
165 #ifndef SHMSEG
166 #define	SHMSEG	128
167 #endif
168 #ifndef SHMALL
169 #define	SHMALL	(SHMMAXPGS)
170 #endif
171 
172 struct	shminfo shminfo = {
173 	SHMMAX,
174 	SHMMIN,
175 	SHMMNI,
176 	SHMSEG,
177 	SHMALL
178 };
179 
180 static int shm_use_phys;
181 static int shm_allow_removed;
182 
183 SYSCTL_ULONG(_kern_ipc, OID_AUTO, shmmax, CTLFLAG_RW, &shminfo.shmmax, 0,
184     "Maximum shared memory segment size");
185 SYSCTL_ULONG(_kern_ipc, OID_AUTO, shmmin, CTLFLAG_RW, &shminfo.shmmin, 0,
186     "Minimum shared memory segment size");
187 SYSCTL_ULONG(_kern_ipc, OID_AUTO, shmmni, CTLFLAG_RDTUN, &shminfo.shmmni, 0,
188     "Number of shared memory identifiers");
189 SYSCTL_ULONG(_kern_ipc, OID_AUTO, shmseg, CTLFLAG_RDTUN, &shminfo.shmseg, 0,
190     "Number of segments per process");
191 SYSCTL_ULONG(_kern_ipc, OID_AUTO, shmall, CTLFLAG_RW, &shminfo.shmall, 0,
192     "Maximum number of pages available for shared memory");
193 SYSCTL_INT(_kern_ipc, OID_AUTO, shm_use_phys, CTLFLAG_RW,
194     &shm_use_phys, 0, "Enable/Disable locking of shared memory pages in core");
195 SYSCTL_INT(_kern_ipc, OID_AUTO, shm_allow_removed, CTLFLAG_RW,
196     &shm_allow_removed, 0,
197     "Enable/Disable attachment to attached segments marked for removal");
198 SYSCTL_PROC(_kern_ipc, OID_AUTO, shmsegs, CTLFLAG_RD,
199     NULL, 0, sysctl_shmsegs, "",
200     "Current number of shared memory segments allocated");
201 
202 static int
203 shm_find_segment_by_key(key)
204 	key_t key;
205 {
206 	int i;
207 
208 	for (i = 0; i < shmalloced; i++)
209 		if ((shmsegs[i].u.shm_perm.mode & SHMSEG_ALLOCATED) &&
210 		    shmsegs[i].u.shm_perm.key == key)
211 			return (i);
212 	return (-1);
213 }
214 
215 static struct shmid_kernel *
216 shm_find_segment_by_shmid(int shmid)
217 {
218 	int segnum;
219 	struct shmid_kernel *shmseg;
220 
221 	segnum = IPCID_TO_IX(shmid);
222 	if (segnum < 0 || segnum >= shmalloced)
223 		return (NULL);
224 	shmseg = &shmsegs[segnum];
225 	if ((shmseg->u.shm_perm.mode & SHMSEG_ALLOCATED) == 0 ||
226 	    (!shm_allow_removed &&
227 	     (shmseg->u.shm_perm.mode & SHMSEG_REMOVED) != 0) ||
228 	    shmseg->u.shm_perm.seq != IPCID_TO_SEQ(shmid))
229 		return (NULL);
230 	return (shmseg);
231 }
232 
233 static struct shmid_kernel *
234 shm_find_segment_by_shmidx(int segnum)
235 {
236 	struct shmid_kernel *shmseg;
237 
238 	if (segnum < 0 || segnum >= shmalloced)
239 		return (NULL);
240 	shmseg = &shmsegs[segnum];
241 	if ((shmseg->u.shm_perm.mode & SHMSEG_ALLOCATED) == 0 ||
242 	    (!shm_allow_removed &&
243 	     (shmseg->u.shm_perm.mode & SHMSEG_REMOVED) != 0))
244 		return (NULL);
245 	return (shmseg);
246 }
247 
248 static void
249 shm_deallocate_segment(shmseg)
250 	struct shmid_kernel *shmseg;
251 {
252 	size_t size;
253 
254 	GIANT_REQUIRED;
255 
256 	vm_object_deallocate(shmseg->u.shm_internal);
257 	shmseg->u.shm_internal = NULL;
258 	size = round_page(shmseg->u.shm_segsz);
259 	shm_committed -= btoc(size);
260 	shm_nused--;
261 	shmseg->u.shm_perm.mode = SHMSEG_FREE;
262 #ifdef MAC
263 	mac_cleanup_sysv_shm(shmseg);
264 #endif
265 }
266 
267 static int
268 shm_delete_mapping(struct vmspace *vm, struct shmmap_state *shmmap_s)
269 {
270 	struct shmid_kernel *shmseg;
271 	int segnum, result;
272 	size_t size;
273 
274 	GIANT_REQUIRED;
275 
276 	segnum = IPCID_TO_IX(shmmap_s->shmid);
277 	shmseg = &shmsegs[segnum];
278 	size = round_page(shmseg->u.shm_segsz);
279 	result = vm_map_remove(&vm->vm_map, shmmap_s->va, shmmap_s->va + size);
280 	if (result != KERN_SUCCESS)
281 		return (EINVAL);
282 	shmmap_s->shmid = -1;
283 	shmseg->u.shm_dtime = time_second;
284 	if ((--shmseg->u.shm_nattch <= 0) &&
285 	    (shmseg->u.shm_perm.mode & SHMSEG_REMOVED)) {
286 		shm_deallocate_segment(shmseg);
287 		shm_last_free = segnum;
288 	}
289 	return (0);
290 }
291 
292 #ifndef _SYS_SYSPROTO_H_
293 struct shmdt_args {
294 	const void *shmaddr;
295 };
296 #endif
297 
298 /*
299  * MPSAFE
300  */
301 int
302 shmdt(td, uap)
303 	struct thread *td;
304 	struct shmdt_args *uap;
305 {
306 	struct proc *p = td->td_proc;
307 	struct shmmap_state *shmmap_s;
308 #ifdef MAC
309 	struct shmid_kernel *shmsegptr;
310 #endif
311 	int i;
312 	int error = 0;
313 
314 	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
315 		return (ENOSYS);
316 	mtx_lock(&Giant);
317 	shmmap_s = p->p_vmspace->vm_shm;
318  	if (shmmap_s == NULL) {
319 		error = EINVAL;
320 		goto done2;
321 	}
322 	for (i = 0; i < shminfo.shmseg; i++, shmmap_s++) {
323 		if (shmmap_s->shmid != -1 &&
324 		    shmmap_s->va == (vm_offset_t)uap->shmaddr) {
325 			break;
326 		}
327 	}
328 	if (i == shminfo.shmseg) {
329 		error = EINVAL;
330 		goto done2;
331 	}
332 #ifdef MAC
333 	shmsegptr = &shmsegs[IPCID_TO_IX(shmmap_s->shmid)];
334 	error = mac_check_sysv_shmdt(td->td_ucred, shmsegptr);
335 	if (error != 0) {
336 		MPRINTF(("mac_check_sysv_shmdt returned %d\n", error));
337 		goto done2;
338 	}
339 #endif
340 	error = shm_delete_mapping(p->p_vmspace, shmmap_s);
341 done2:
342 	mtx_unlock(&Giant);
343 	return (error);
344 }
345 
346 #ifndef _SYS_SYSPROTO_H_
347 struct shmat_args {
348 	int shmid;
349 	const void *shmaddr;
350 	int shmflg;
351 };
352 #endif
353 
354 /*
355  * MPSAFE
356  */
357 int
358 kern_shmat(td, shmid, shmaddr, shmflg)
359 	struct thread *td;
360 	int shmid;
361 	const void *shmaddr;
362 	int shmflg;
363 {
364 	struct proc *p = td->td_proc;
365 	int i, flags;
366 	struct shmid_kernel *shmseg;
367 	struct shmmap_state *shmmap_s = NULL;
368 	vm_offset_t attach_va;
369 	vm_prot_t prot;
370 	vm_size_t size;
371 	int rv;
372 	int error = 0;
373 
374 	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
375 		return (ENOSYS);
376 	mtx_lock(&Giant);
377 	shmmap_s = p->p_vmspace->vm_shm;
378 	if (shmmap_s == NULL) {
379 		size = shminfo.shmseg * sizeof(struct shmmap_state);
380 		shmmap_s = malloc(size, M_SHM, M_WAITOK);
381 		for (i = 0; i < shminfo.shmseg; i++)
382 			shmmap_s[i].shmid = -1;
383 		p->p_vmspace->vm_shm = shmmap_s;
384 	}
385 	shmseg = shm_find_segment_by_shmid(shmid);
386 	if (shmseg == NULL) {
387 		error = EINVAL;
388 		goto done2;
389 	}
390 	error = ipcperm(td, &shmseg->u.shm_perm,
391 	    (shmflg & SHM_RDONLY) ? IPC_R : IPC_R|IPC_W);
392 	if (error)
393 		goto done2;
394 #ifdef MAC
395 	error = mac_check_sysv_shmat(td->td_ucred, shmseg, shmflg);
396 	if (error != 0) {
397 	 	MPRINTF(("mac_check_sysv_shmat returned %d\n", error));
398 		goto done2;
399 	}
400 #endif
401 	for (i = 0; i < shminfo.shmseg; i++) {
402 		if (shmmap_s->shmid == -1)
403 			break;
404 		shmmap_s++;
405 	}
406 	if (i >= shminfo.shmseg) {
407 		error = EMFILE;
408 		goto done2;
409 	}
410 	size = round_page(shmseg->u.shm_segsz);
411 #ifdef VM_PROT_READ_IS_EXEC
412 	prot = VM_PROT_READ | VM_PROT_EXECUTE;
413 #else
414 	prot = VM_PROT_READ;
415 #endif
416 	if ((shmflg & SHM_RDONLY) == 0)
417 		prot |= VM_PROT_WRITE;
418 	flags = MAP_ANON | MAP_SHARED;
419 	if (shmaddr) {
420 		flags |= MAP_FIXED;
421 		if (shmflg & SHM_RND) {
422 			attach_va = (vm_offset_t)shmaddr & ~(SHMLBA-1);
423 		} else if (((vm_offset_t)shmaddr & (SHMLBA-1)) == 0) {
424 			attach_va = (vm_offset_t)shmaddr;
425 		} else {
426 			error = EINVAL;
427 			goto done2;
428 		}
429 	} else {
430 		/*
431 		 * This is just a hint to vm_map_find() about where to
432 		 * put it.
433 		 */
434 		PROC_LOCK(p);
435 		attach_va = round_page((vm_offset_t)p->p_vmspace->vm_daddr +
436 		    lim_max(p, RLIMIT_DATA));
437 		PROC_UNLOCK(p);
438 	}
439 
440 	vm_object_reference(shmseg->u.shm_internal);
441 	rv = vm_map_find(&p->p_vmspace->vm_map, shmseg->u.shm_internal,
442 		0, &attach_va, size, (flags & MAP_FIXED)?0:1, prot, prot, 0);
443 	if (rv != KERN_SUCCESS) {
444 		vm_object_deallocate(shmseg->u.shm_internal);
445 		error = ENOMEM;
446 		goto done2;
447 	}
448 	vm_map_inherit(&p->p_vmspace->vm_map,
449 		attach_va, attach_va + size, VM_INHERIT_SHARE);
450 
451 	shmmap_s->va = attach_va;
452 	shmmap_s->shmid = shmid;
453 	shmseg->u.shm_lpid = p->p_pid;
454 	shmseg->u.shm_atime = time_second;
455 	shmseg->u.shm_nattch++;
456 	td->td_retval[0] = attach_va;
457 done2:
458 	mtx_unlock(&Giant);
459 	return (error);
460 }
461 
462 int
463 shmat(td, uap)
464 	struct thread *td;
465 	struct shmat_args *uap;
466 {
467 	return kern_shmat(td, uap->shmid, uap->shmaddr, uap->shmflg);
468 }
469 
470 #if defined(__i386__) && (defined(COMPAT_FREEBSD4) || defined(COMPAT_43))
471 struct oshmid_ds {
472 	struct	ipc_perm shm_perm;	/* operation perms */
473 	int	shm_segsz;		/* size of segment (bytes) */
474 	u_short	shm_cpid;		/* pid, creator */
475 	u_short	shm_lpid;		/* pid, last operation */
476 	short	shm_nattch;		/* no. of current attaches */
477 	time_t	shm_atime;		/* last attach time */
478 	time_t	shm_dtime;		/* last detach time */
479 	time_t	shm_ctime;		/* last change time */
480 	void	*shm_handle;		/* internal handle for shm segment */
481 };
482 
483 struct oshmctl_args {
484 	int shmid;
485 	int cmd;
486 	struct oshmid_ds *ubuf;
487 };
488 
489 /*
490  * MPSAFE
491  */
492 static int
493 oshmctl(td, uap)
494 	struct thread *td;
495 	struct oshmctl_args *uap;
496 {
497 #ifdef COMPAT_43
498 	int error = 0;
499 	struct shmid_kernel *shmseg;
500 	struct oshmid_ds outbuf;
501 
502 	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
503 		return (ENOSYS);
504 	mtx_lock(&Giant);
505 	shmseg = shm_find_segment_by_shmid(uap->shmid);
506 	if (shmseg == NULL) {
507 		error = EINVAL;
508 		goto done2;
509 	}
510 	switch (uap->cmd) {
511 	case IPC_STAT:
512 		error = ipcperm(td, &shmseg->u.shm_perm, IPC_R);
513 		if (error)
514 			goto done2;
515 #ifdef MAC
516 		error = mac_check_sysv_shmctl(td->td_ucred, shmseg, uap->cmd);
517 		if (error != 0) {
518 			MPRINTF(("mac_check_sysv_shmctl returned %d\n",
519 			    error));
520 			goto done2;
521 		}
522 #endif
523 		outbuf.shm_perm = shmseg->u.shm_perm;
524 		outbuf.shm_segsz = shmseg->u.shm_segsz;
525 		outbuf.shm_cpid = shmseg->u.shm_cpid;
526 		outbuf.shm_lpid = shmseg->u.shm_lpid;
527 		outbuf.shm_nattch = shmseg->u.shm_nattch;
528 		outbuf.shm_atime = shmseg->u.shm_atime;
529 		outbuf.shm_dtime = shmseg->u.shm_dtime;
530 		outbuf.shm_ctime = shmseg->u.shm_ctime;
531 		outbuf.shm_handle = shmseg->u.shm_internal;
532 		error = copyout(&outbuf, uap->ubuf, sizeof(outbuf));
533 		if (error)
534 			goto done2;
535 		break;
536 	default:
537 		error = shmctl(td, (struct shmctl_args *)uap);
538 		break;
539 	}
540 done2:
541 	mtx_unlock(&Giant);
542 	return (error);
543 #else
544 	return (EINVAL);
545 #endif
546 }
547 #endif
548 
549 #ifndef _SYS_SYSPROTO_H_
550 struct shmctl_args {
551 	int shmid;
552 	int cmd;
553 	struct shmid_ds *buf;
554 };
555 #endif
556 
557 /*
558  * MPSAFE
559  */
560 int
561 kern_shmctl(td, shmid, cmd, buf, bufsz)
562 	struct thread *td;
563 	int shmid;
564 	int cmd;
565 	void *buf;
566 	size_t *bufsz;
567 {
568 	int error = 0;
569 	struct shmid_kernel *shmseg;
570 
571 	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
572 		return (ENOSYS);
573 
574 	mtx_lock(&Giant);
575 	switch (cmd) {
576 	case IPC_INFO:
577 		memcpy(buf, &shminfo, sizeof(shminfo));
578 		if (bufsz)
579 			*bufsz = sizeof(shminfo);
580 		td->td_retval[0] = shmalloced;
581 		goto done2;
582 	case SHM_INFO: {
583 		struct shm_info shm_info;
584 		shm_info.used_ids = shm_nused;
585 		shm_info.shm_rss = 0;	/*XXX where to get from ? */
586 		shm_info.shm_tot = 0;	/*XXX where to get from ? */
587 		shm_info.shm_swp = 0;	/*XXX where to get from ? */
588 		shm_info.swap_attempts = 0;	/*XXX where to get from ? */
589 		shm_info.swap_successes = 0;	/*XXX where to get from ? */
590 		memcpy(buf, &shm_info, sizeof(shm_info));
591 		if (bufsz)
592 			*bufsz = sizeof(shm_info);
593 		td->td_retval[0] = shmalloced;
594 		goto done2;
595 	}
596 	}
597 	if (cmd == SHM_STAT)
598 		shmseg = shm_find_segment_by_shmidx(shmid);
599 	else
600 		shmseg = shm_find_segment_by_shmid(shmid);
601 	if (shmseg == NULL) {
602 		error = EINVAL;
603 		goto done2;
604 	}
605 #ifdef MAC
606 	error = mac_check_sysv_shmctl(td->td_ucred, shmseg, cmd);
607 	if (error != 0) {
608 		MPRINTF(("mac_check_sysv_shmctl returned %d\n", error));
609 		goto done2;
610 	}
611 #endif
612 	switch (cmd) {
613 	case SHM_STAT:
614 	case IPC_STAT:
615 		error = ipcperm(td, &shmseg->u.shm_perm, IPC_R);
616 		if (error)
617 			goto done2;
618 		memcpy(buf, &shmseg->u, sizeof(struct shmid_ds));
619 		if (bufsz)
620 			*bufsz = sizeof(struct shmid_ds);
621 		if (cmd == SHM_STAT)
622 			td->td_retval[0] = IXSEQ_TO_IPCID(shmid, shmseg->u.shm_perm);
623 		break;
624 	case IPC_SET: {
625 		struct shmid_ds *shmid;
626 
627 		shmid = (struct shmid_ds *)buf;
628 		error = ipcperm(td, &shmseg->u.shm_perm, IPC_M);
629 		if (error)
630 			goto done2;
631 		shmseg->u.shm_perm.uid = shmid->shm_perm.uid;
632 		shmseg->u.shm_perm.gid = shmid->shm_perm.gid;
633 		shmseg->u.shm_perm.mode =
634 		    (shmseg->u.shm_perm.mode & ~ACCESSPERMS) |
635 		    (shmid->shm_perm.mode & ACCESSPERMS);
636 		shmseg->u.shm_ctime = time_second;
637 		break;
638 	}
639 	case IPC_RMID:
640 		error = ipcperm(td, &shmseg->u.shm_perm, IPC_M);
641 		if (error)
642 			goto done2;
643 		shmseg->u.shm_perm.key = IPC_PRIVATE;
644 		shmseg->u.shm_perm.mode |= SHMSEG_REMOVED;
645 		if (shmseg->u.shm_nattch <= 0) {
646 			shm_deallocate_segment(shmseg);
647 			shm_last_free = IPCID_TO_IX(shmid);
648 		}
649 		break;
650 #if 0
651 	case SHM_LOCK:
652 	case SHM_UNLOCK:
653 #endif
654 	default:
655 		error = EINVAL;
656 		break;
657 	}
658 done2:
659 	mtx_unlock(&Giant);
660 	return (error);
661 }
662 
663 int
664 shmctl(td, uap)
665 	struct thread *td;
666 	struct shmctl_args *uap;
667 {
668 	int error = 0;
669 	struct shmid_ds buf;
670 	size_t bufsz;
671 
672 	/* IPC_SET needs to copyin the buffer before calling kern_shmctl */
673 	if (uap->cmd == IPC_SET) {
674 		if ((error = copyin(uap->buf, &buf, sizeof(struct shmid_ds))))
675 			goto done;
676 	}
677 
678 	error = kern_shmctl(td, uap->shmid, uap->cmd, (void *)&buf, &bufsz);
679 	if (error)
680 		goto done;
681 
682 	/* Cases in which we need to copyout */
683 	switch (uap->cmd) {
684 	case IPC_INFO:
685 	case SHM_INFO:
686 	case SHM_STAT:
687 	case IPC_STAT:
688 		error = copyout(&buf, uap->buf, bufsz);
689 		break;
690 	}
691 
692 done:
693 	if (error) {
694 		/* Invalidate the return value */
695 		td->td_retval[0] = -1;
696 	}
697 	return (error);
698 }
699 
700 
701 #ifndef _SYS_SYSPROTO_H_
702 struct shmget_args {
703 	key_t key;
704 	size_t size;
705 	int shmflg;
706 };
707 #endif
708 
709 static int
710 shmget_existing(td, uap, mode, segnum)
711 	struct thread *td;
712 	struct shmget_args *uap;
713 	int mode;
714 	int segnum;
715 {
716 	struct shmid_kernel *shmseg;
717 	int error;
718 
719 	shmseg = &shmsegs[segnum];
720 	if (shmseg->u.shm_perm.mode & SHMSEG_REMOVED) {
721 		/*
722 		 * This segment is in the process of being allocated.  Wait
723 		 * until it's done, and look the key up again (in case the
724 		 * allocation failed or it was freed).
725 		 */
726 		shmseg->u.shm_perm.mode |= SHMSEG_WANTED;
727 		error = tsleep(shmseg, PLOCK | PCATCH, "shmget", 0);
728 		if (error)
729 			return (error);
730 		return (EAGAIN);
731 	}
732 	if ((uap->shmflg & (IPC_CREAT | IPC_EXCL)) == (IPC_CREAT | IPC_EXCL))
733 		return (EEXIST);
734 #ifdef MAC
735 	error = mac_check_sysv_shmget(td->td_ucred, shmseg, uap->shmflg);
736 	if (error != 0) {
737 		MPRINTF(("mac_check_sysv_shmget returned %d\n", error));
738 		return (error);
739 	}
740 #endif
741 	error = ipcperm(td, &shmseg->u.shm_perm, mode);
742 	if (error)
743 		return (error);
744 	if (uap->size && uap->size > shmseg->u.shm_segsz)
745 		return (EINVAL);
746 	td->td_retval[0] = IXSEQ_TO_IPCID(segnum, shmseg->u.shm_perm);
747 	return (0);
748 }
749 
750 static int
751 shmget_allocate_segment(td, uap, mode)
752 	struct thread *td;
753 	struct shmget_args *uap;
754 	int mode;
755 {
756 	int i, segnum, shmid, size;
757 	struct ucred *cred = td->td_ucred;
758 	struct shmid_kernel *shmseg;
759 	vm_object_t shm_object;
760 
761 	GIANT_REQUIRED;
762 
763 	if (uap->size < shminfo.shmmin || uap->size > shminfo.shmmax)
764 		return (EINVAL);
765 	if (shm_nused >= shminfo.shmmni) /* Any shmids left? */
766 		return (ENOSPC);
767 	size = round_page(uap->size);
768 	if (shm_committed + btoc(size) > shminfo.shmall)
769 		return (ENOMEM);
770 	if (shm_last_free < 0) {
771 		shmrealloc();	/* Maybe expand the shmsegs[] array. */
772 		for (i = 0; i < shmalloced; i++)
773 			if (shmsegs[i].u.shm_perm.mode & SHMSEG_FREE)
774 				break;
775 		if (i == shmalloced)
776 			return (ENOSPC);
777 		segnum = i;
778 	} else  {
779 		segnum = shm_last_free;
780 		shm_last_free = -1;
781 	}
782 	shmseg = &shmsegs[segnum];
783 	/*
784 	 * In case we sleep in malloc(), mark the segment present but deleted
785 	 * so that noone else tries to create the same key.
786 	 */
787 	shmseg->u.shm_perm.mode = SHMSEG_ALLOCATED | SHMSEG_REMOVED;
788 	shmseg->u.shm_perm.key = uap->key;
789 	shmseg->u.shm_perm.seq = (shmseg->u.shm_perm.seq + 1) & 0x7fff;
790 	shmid = IXSEQ_TO_IPCID(segnum, shmseg->u.shm_perm);
791 
792 	/*
793 	 * We make sure that we have allocated a pager before we need
794 	 * to.
795 	 */
796 	if (shm_use_phys) {
797 		shm_object =
798 		    vm_pager_allocate(OBJT_PHYS, 0, size, VM_PROT_DEFAULT, 0);
799 	} else {
800 		shm_object =
801 		    vm_pager_allocate(OBJT_SWAP, 0, size, VM_PROT_DEFAULT, 0);
802 	}
803 	VM_OBJECT_LOCK(shm_object);
804 	vm_object_clear_flag(shm_object, OBJ_ONEMAPPING);
805 	vm_object_set_flag(shm_object, OBJ_NOSPLIT);
806 	VM_OBJECT_UNLOCK(shm_object);
807 
808 	shmseg->u.shm_internal = shm_object;
809 	shmseg->u.shm_perm.cuid = shmseg->u.shm_perm.uid = cred->cr_uid;
810 	shmseg->u.shm_perm.cgid = shmseg->u.shm_perm.gid = cred->cr_gid;
811 	shmseg->u.shm_perm.mode = (shmseg->u.shm_perm.mode & SHMSEG_WANTED) |
812 	    (mode & ACCESSPERMS) | SHMSEG_ALLOCATED;
813 	shmseg->u.shm_segsz = uap->size;
814 	shmseg->u.shm_cpid = td->td_proc->p_pid;
815 	shmseg->u.shm_lpid = shmseg->u.shm_nattch = 0;
816 	shmseg->u.shm_atime = shmseg->u.shm_dtime = 0;
817 #ifdef MAC
818 	mac_create_sysv_shm(cred, shmseg);
819 #endif
820 	shmseg->u.shm_ctime = time_second;
821 	shm_committed += btoc(size);
822 	shm_nused++;
823 	if (shmseg->u.shm_perm.mode & SHMSEG_WANTED) {
824 		/*
825 		 * Somebody else wanted this key while we were asleep.  Wake
826 		 * them up now.
827 		 */
828 		shmseg->u.shm_perm.mode &= ~SHMSEG_WANTED;
829 		wakeup(shmseg);
830 	}
831 	td->td_retval[0] = shmid;
832 	return (0);
833 }
834 
835 /*
836  * MPSAFE
837  */
838 int
839 shmget(td, uap)
840 	struct thread *td;
841 	struct shmget_args *uap;
842 {
843 	int segnum, mode;
844 	int error;
845 
846 	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
847 		return (ENOSYS);
848 	mtx_lock(&Giant);
849 	mode = uap->shmflg & ACCESSPERMS;
850 	if (uap->key != IPC_PRIVATE) {
851 	again:
852 		segnum = shm_find_segment_by_key(uap->key);
853 		if (segnum >= 0) {
854 			error = shmget_existing(td, uap, mode, segnum);
855 			if (error == EAGAIN)
856 				goto again;
857 			goto done2;
858 		}
859 		if ((uap->shmflg & IPC_CREAT) == 0) {
860 			error = ENOENT;
861 			goto done2;
862 		}
863 	}
864 	error = shmget_allocate_segment(td, uap, mode);
865 done2:
866 	mtx_unlock(&Giant);
867 	return (error);
868 }
869 
870 /*
871  * MPSAFE
872  */
873 int
874 shmsys(td, uap)
875 	struct thread *td;
876 	/* XXX actually varargs. */
877 	struct shmsys_args /* {
878 		int	which;
879 		int	a2;
880 		int	a3;
881 		int	a4;
882 	} */ *uap;
883 {
884 #if defined(__i386__) && (defined(COMPAT_FREEBSD4) || defined(COMPAT_43))
885 	int error;
886 
887 	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
888 		return (ENOSYS);
889 	if (uap->which < 0 ||
890 	    uap->which >= sizeof(shmcalls)/sizeof(shmcalls[0]))
891 		return (EINVAL);
892 	mtx_lock(&Giant);
893 	error = (*shmcalls[uap->which])(td, &uap->a2);
894 	mtx_unlock(&Giant);
895 	return (error);
896 #else
897 	return (nosys(td, NULL));
898 #endif
899 }
900 
901 static void
902 shmfork_myhook(p1, p2)
903 	struct proc *p1, *p2;
904 {
905 	struct shmmap_state *shmmap_s;
906 	size_t size;
907 	int i;
908 
909 	mtx_lock(&Giant);
910 	size = shminfo.shmseg * sizeof(struct shmmap_state);
911 	shmmap_s = malloc(size, M_SHM, M_WAITOK);
912 	bcopy(p1->p_vmspace->vm_shm, shmmap_s, size);
913 	p2->p_vmspace->vm_shm = shmmap_s;
914 	for (i = 0; i < shminfo.shmseg; i++, shmmap_s++)
915 		if (shmmap_s->shmid != -1)
916 			shmsegs[IPCID_TO_IX(shmmap_s->shmid)].u.shm_nattch++;
917 	mtx_unlock(&Giant);
918 }
919 
920 static void
921 shmexit_myhook(struct vmspace *vm)
922 {
923 	struct shmmap_state *base, *shm;
924 	int i;
925 
926 	if ((base = vm->vm_shm) != NULL) {
927 		vm->vm_shm = NULL;
928 		mtx_lock(&Giant);
929 		for (i = 0, shm = base; i < shminfo.shmseg; i++, shm++) {
930 			if (shm->shmid != -1)
931 				shm_delete_mapping(vm, shm);
932 		}
933 		mtx_unlock(&Giant);
934 		free(base, M_SHM);
935 	}
936 }
937 
938 static void
939 shmrealloc(void)
940 {
941 	int i;
942 	struct shmid_kernel *newsegs;
943 
944 	if (shmalloced >= shminfo.shmmni)
945 		return;
946 
947 	newsegs = malloc(shminfo.shmmni * sizeof(*newsegs), M_SHM, M_WAITOK);
948 	if (newsegs == NULL)
949 		return;
950 	for (i = 0; i < shmalloced; i++)
951 		bcopy(&shmsegs[i], &newsegs[i], sizeof(newsegs[0]));
952 	for (; i < shminfo.shmmni; i++) {
953 		shmsegs[i].u.shm_perm.mode = SHMSEG_FREE;
954 		shmsegs[i].u.shm_perm.seq = 0;
955 #ifdef MAC
956 		mac_init_sysv_shm(&shmsegs[i]);
957 #endif
958 	}
959 	free(shmsegs, M_SHM);
960 	shmsegs = newsegs;
961 	shmalloced = shminfo.shmmni;
962 }
963 
964 static void
965 shminit()
966 {
967 	int i;
968 
969 	TUNABLE_ULONG_FETCH("kern.ipc.shmmaxpgs", &shminfo.shmall);
970 	for (i = PAGE_SIZE; i > 0; i--) {
971 		shminfo.shmmax = shminfo.shmall * i;
972 		if (shminfo.shmmax >= shminfo.shmall)
973 			break;
974 	}
975 	TUNABLE_ULONG_FETCH("kern.ipc.shmmin", &shminfo.shmmin);
976 	TUNABLE_ULONG_FETCH("kern.ipc.shmmni", &shminfo.shmmni);
977 	TUNABLE_ULONG_FETCH("kern.ipc.shmseg", &shminfo.shmseg);
978 	TUNABLE_INT_FETCH("kern.ipc.shm_use_phys", &shm_use_phys);
979 
980 	shmalloced = shminfo.shmmni;
981 	shmsegs = malloc(shmalloced * sizeof(shmsegs[0]), M_SHM, M_WAITOK);
982 	if (shmsegs == NULL)
983 		panic("cannot allocate initial memory for sysvshm");
984 	for (i = 0; i < shmalloced; i++) {
985 		shmsegs[i].u.shm_perm.mode = SHMSEG_FREE;
986 		shmsegs[i].u.shm_perm.seq = 0;
987 #ifdef MAC
988 		mac_init_sysv_shm(&shmsegs[i]);
989 #endif
990 	}
991 	shm_last_free = 0;
992 	shm_nused = 0;
993 	shm_committed = 0;
994 	shmexit_hook = &shmexit_myhook;
995 	shmfork_hook = &shmfork_myhook;
996 }
997 
998 static int
999 shmunload()
1000 {
1001 #ifdef MAC
1002 	int i;
1003 #endif
1004 
1005 	if (shm_nused > 0)
1006 		return (EBUSY);
1007 
1008 #ifdef MAC
1009 	for (i = 0; i < shmalloced; i++)
1010 		mac_destroy_sysv_shm(&shmsegs[i]);
1011 #endif
1012 	free(shmsegs, M_SHM);
1013 	shmexit_hook = NULL;
1014 	shmfork_hook = NULL;
1015 	return (0);
1016 }
1017 
1018 static int
1019 sysctl_shmsegs(SYSCTL_HANDLER_ARGS)
1020 {
1021 
1022 	return (SYSCTL_OUT(req, shmsegs, shmalloced * sizeof(shmsegs[0])));
1023 }
1024 
1025 static int
1026 sysvshm_modload(struct module *module, int cmd, void *arg)
1027 {
1028 	int error = 0;
1029 
1030 	switch (cmd) {
1031 	case MOD_LOAD:
1032 		shminit();
1033 		break;
1034 	case MOD_UNLOAD:
1035 		error = shmunload();
1036 		break;
1037 	case MOD_SHUTDOWN:
1038 		break;
1039 	default:
1040 		error = EINVAL;
1041 		break;
1042 	}
1043 	return (error);
1044 }
1045 
1046 static moduledata_t sysvshm_mod = {
1047 	"sysvshm",
1048 	&sysvshm_modload,
1049 	NULL
1050 };
1051 
1052 SYSCALL_MODULE_HELPER(shmsys);
1053 SYSCALL_MODULE_HELPER(shmat);
1054 SYSCALL_MODULE_HELPER(shmctl);
1055 SYSCALL_MODULE_HELPER(shmdt);
1056 SYSCALL_MODULE_HELPER(shmget);
1057 
1058 DECLARE_MODULE(sysvshm, sysvshm_mod,
1059 	SI_SUB_SYSV_SHM, SI_ORDER_FIRST);
1060 MODULE_VERSION(sysvshm, 1);
1061