xref: /freebsd/sys/kern/sysv_shm.c (revision d37ea99837e6ad50837fd9fe1771ddf1c3ba6002)
1 /*	$NetBSD: sysv_shm.c,v 1.23 1994/07/04 23:25:12 glass Exp $	*/
2 /*
3  * Copyright (c) 1994 Adam Glass and Charles Hannum.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *	This product includes software developed by Adam Glass and Charles
16  *	Hannum.
17  * 4. The names of the authors may not be used to endorse or promote products
18  *    derived from this software without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
21  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
22  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
23  * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
25  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
29  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
34 
35 #include "opt_compat.h"
36 #include "opt_sysvipc.h"
37 
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/kernel.h>
41 #include <sys/lock.h>
42 #include <sys/sysctl.h>
43 #include <sys/shm.h>
44 #include <sys/proc.h>
45 #include <sys/malloc.h>
46 #include <sys/mman.h>
47 #include <sys/module.h>
48 #include <sys/mutex.h>
49 #include <sys/resourcevar.h>
50 #include <sys/stat.h>
51 #include <sys/syscall.h>
52 #include <sys/syscallsubr.h>
53 #include <sys/sysent.h>
54 #include <sys/sysproto.h>
55 #include <sys/jail.h>
56 
57 #include <vm/vm.h>
58 #include <vm/vm_param.h>
59 #include <vm/pmap.h>
60 #include <vm/vm_object.h>
61 #include <vm/vm_map.h>
62 #include <vm/vm_page.h>
63 #include <vm/vm_pager.h>
64 
65 static MALLOC_DEFINE(M_SHM, "shm", "SVID compatible shared memory segments");
66 
67 struct oshmctl_args;
68 static int oshmctl(struct thread *td, struct oshmctl_args *uap);
69 
70 static int shmget_allocate_segment(struct thread *td,
71     struct shmget_args *uap, int mode);
72 static int shmget_existing(struct thread *td, struct shmget_args *uap,
73     int mode, int segnum);
74 
75 /* XXX casting to (sy_call_t *) is bogus, as usual. */
76 static sy_call_t *shmcalls[] = {
77 	(sy_call_t *)shmat, (sy_call_t *)oshmctl,
78 	(sy_call_t *)shmdt, (sy_call_t *)shmget,
79 	(sy_call_t *)shmctl
80 };
81 
82 #define	SHMSEG_FREE     	0x0200
83 #define	SHMSEG_REMOVED  	0x0400
84 #define	SHMSEG_ALLOCATED	0x0800
85 #define	SHMSEG_WANTED		0x1000
86 
87 static int shm_last_free, shm_nused, shm_committed, shmalloced;
88 static struct shmid_ds	*shmsegs;
89 
90 struct shm_handle {
91 	/* vm_offset_t kva; */
92 	vm_object_t shm_object;
93 };
94 
95 struct shmmap_state {
96 	vm_offset_t va;
97 	int shmid;
98 };
99 
100 static void shm_deallocate_segment(struct shmid_ds *);
101 static int shm_find_segment_by_key(key_t);
102 static struct shmid_ds *shm_find_segment_by_shmid(int);
103 static struct shmid_ds *shm_find_segment_by_shmidx(int);
104 static int shm_delete_mapping(struct vmspace *vm, struct shmmap_state *);
105 static void shmrealloc(void);
106 static void shminit(void);
107 static int sysvshm_modload(struct module *, int, void *);
108 static int shmunload(void);
109 static void shmexit_myhook(struct vmspace *vm);
110 static void shmfork_myhook(struct proc *p1, struct proc *p2);
111 static int sysctl_shmsegs(SYSCTL_HANDLER_ARGS);
112 
113 /*
114  * Tuneable values.
115  */
116 #ifndef SHMMAXPGS
117 #define	SHMMAXPGS	8192	/* Note: sysv shared memory is swap backed. */
118 #endif
119 #ifndef SHMMAX
120 #define	SHMMAX	(SHMMAXPGS*PAGE_SIZE)
121 #endif
122 #ifndef SHMMIN
123 #define	SHMMIN	1
124 #endif
125 #ifndef SHMMNI
126 #define	SHMMNI	192
127 #endif
128 #ifndef SHMSEG
129 #define	SHMSEG	128
130 #endif
131 #ifndef SHMALL
132 #define	SHMALL	(SHMMAXPGS)
133 #endif
134 
135 struct	shminfo shminfo = {
136 	SHMMAX,
137 	SHMMIN,
138 	SHMMNI,
139 	SHMSEG,
140 	SHMALL
141 };
142 
143 static int shm_use_phys;
144 static int shm_allow_removed;
145 
146 SYSCTL_DECL(_kern_ipc);
147 SYSCTL_INT(_kern_ipc, OID_AUTO, shmmax, CTLFLAG_RW, &shminfo.shmmax, 0, "");
148 SYSCTL_INT(_kern_ipc, OID_AUTO, shmmin, CTLFLAG_RW, &shminfo.shmmin, 0, "");
149 SYSCTL_INT(_kern_ipc, OID_AUTO, shmmni, CTLFLAG_RDTUN, &shminfo.shmmni, 0, "");
150 SYSCTL_INT(_kern_ipc, OID_AUTO, shmseg, CTLFLAG_RDTUN, &shminfo.shmseg, 0, "");
151 SYSCTL_INT(_kern_ipc, OID_AUTO, shmall, CTLFLAG_RW, &shminfo.shmall, 0, "");
152 SYSCTL_INT(_kern_ipc, OID_AUTO, shm_use_phys, CTLFLAG_RW,
153     &shm_use_phys, 0, "");
154 SYSCTL_INT(_kern_ipc, OID_AUTO, shm_allow_removed, CTLFLAG_RW,
155     &shm_allow_removed, 0, "");
156 SYSCTL_PROC(_kern_ipc, OID_AUTO, shmsegs, CTLFLAG_RD,
157     NULL, 0, sysctl_shmsegs, "", "");
158 
159 static int
160 shm_find_segment_by_key(key)
161 	key_t key;
162 {
163 	int i;
164 
165 	for (i = 0; i < shmalloced; i++)
166 		if ((shmsegs[i].shm_perm.mode & SHMSEG_ALLOCATED) &&
167 		    shmsegs[i].shm_perm.key == key)
168 			return (i);
169 	return (-1);
170 }
171 
172 static struct shmid_ds *
173 shm_find_segment_by_shmid(int shmid)
174 {
175 	int segnum;
176 	struct shmid_ds *shmseg;
177 
178 	segnum = IPCID_TO_IX(shmid);
179 	if (segnum < 0 || segnum >= shmalloced)
180 		return (NULL);
181 	shmseg = &shmsegs[segnum];
182 	if ((shmseg->shm_perm.mode & SHMSEG_ALLOCATED) == 0 ||
183 	    (!shm_allow_removed &&
184 	     (shmseg->shm_perm.mode & SHMSEG_REMOVED) != 0) ||
185 	    shmseg->shm_perm.seq != IPCID_TO_SEQ(shmid))
186 		return (NULL);
187 	return (shmseg);
188 }
189 
190 static struct shmid_ds *
191 shm_find_segment_by_shmidx(int segnum)
192 {
193 	struct shmid_ds *shmseg;
194 
195 	if (segnum < 0 || segnum >= shmalloced)
196 		return (NULL);
197 	shmseg = &shmsegs[segnum];
198 	if ((shmseg->shm_perm.mode & SHMSEG_ALLOCATED) == 0 ||
199 	    (!shm_allow_removed &&
200 	     (shmseg->shm_perm.mode & SHMSEG_REMOVED) != 0))
201 		return (NULL);
202 	return (shmseg);
203 }
204 
205 static void
206 shm_deallocate_segment(shmseg)
207 	struct shmid_ds *shmseg;
208 {
209 	struct shm_handle *shm_handle;
210 	size_t size;
211 
212 	GIANT_REQUIRED;
213 
214 	shm_handle = shmseg->shm_internal;
215 	vm_object_deallocate(shm_handle->shm_object);
216 	free(shm_handle, M_SHM);
217 	shmseg->shm_internal = NULL;
218 	size = round_page(shmseg->shm_segsz);
219 	shm_committed -= btoc(size);
220 	shm_nused--;
221 	shmseg->shm_perm.mode = SHMSEG_FREE;
222 }
223 
224 static int
225 shm_delete_mapping(struct vmspace *vm, struct shmmap_state *shmmap_s)
226 {
227 	struct shmid_ds *shmseg;
228 	int segnum, result;
229 	size_t size;
230 
231 	GIANT_REQUIRED;
232 
233 	segnum = IPCID_TO_IX(shmmap_s->shmid);
234 	shmseg = &shmsegs[segnum];
235 	size = round_page(shmseg->shm_segsz);
236 	result = vm_map_remove(&vm->vm_map, shmmap_s->va, shmmap_s->va + size);
237 	if (result != KERN_SUCCESS)
238 		return (EINVAL);
239 	shmmap_s->shmid = -1;
240 	shmseg->shm_dtime = time_second;
241 	if ((--shmseg->shm_nattch <= 0) &&
242 	    (shmseg->shm_perm.mode & SHMSEG_REMOVED)) {
243 		shm_deallocate_segment(shmseg);
244 		shm_last_free = segnum;
245 	}
246 	return (0);
247 }
248 
249 #ifndef _SYS_SYSPROTO_H_
250 struct shmdt_args {
251 	const void *shmaddr;
252 };
253 #endif
254 
255 /*
256  * MPSAFE
257  */
258 int
259 shmdt(td, uap)
260 	struct thread *td;
261 	struct shmdt_args *uap;
262 {
263 	struct proc *p = td->td_proc;
264 	struct shmmap_state *shmmap_s;
265 	int i;
266 	int error = 0;
267 
268 	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
269 		return (ENOSYS);
270 	mtx_lock(&Giant);
271 	shmmap_s = p->p_vmspace->vm_shm;
272  	if (shmmap_s == NULL) {
273 		error = EINVAL;
274 		goto done2;
275 	}
276 	for (i = 0; i < shminfo.shmseg; i++, shmmap_s++) {
277 		if (shmmap_s->shmid != -1 &&
278 		    shmmap_s->va == (vm_offset_t)uap->shmaddr) {
279 			break;
280 		}
281 	}
282 	if (i == shminfo.shmseg) {
283 		error = EINVAL;
284 		goto done2;
285 	}
286 	error = shm_delete_mapping(p->p_vmspace, shmmap_s);
287 done2:
288 	mtx_unlock(&Giant);
289 	return (error);
290 }
291 
292 #ifndef _SYS_SYSPROTO_H_
293 struct shmat_args {
294 	int shmid;
295 	const void *shmaddr;
296 	int shmflg;
297 };
298 #endif
299 
300 /*
301  * MPSAFE
302  */
303 int
304 kern_shmat(td, shmid, shmaddr, shmflg)
305 	struct thread *td;
306 	int shmid;
307 	const void *shmaddr;
308 	int shmflg;
309 {
310 	struct proc *p = td->td_proc;
311 	int i, flags;
312 	struct shmid_ds *shmseg;
313 	struct shmmap_state *shmmap_s = NULL;
314 	struct shm_handle *shm_handle;
315 	vm_offset_t attach_va;
316 	vm_prot_t prot;
317 	vm_size_t size;
318 	int rv;
319 	int error = 0;
320 
321 	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
322 		return (ENOSYS);
323 	mtx_lock(&Giant);
324 	shmmap_s = p->p_vmspace->vm_shm;
325 	if (shmmap_s == NULL) {
326 		size = shminfo.shmseg * sizeof(struct shmmap_state);
327 		shmmap_s = malloc(size, M_SHM, M_WAITOK);
328 		for (i = 0; i < shminfo.shmseg; i++)
329 			shmmap_s[i].shmid = -1;
330 		p->p_vmspace->vm_shm = shmmap_s;
331 	}
332 	shmseg = shm_find_segment_by_shmid(shmid);
333 	if (shmseg == NULL) {
334 		error = EINVAL;
335 		goto done2;
336 	}
337 	error = ipcperm(td, &shmseg->shm_perm,
338 	    (shmflg & SHM_RDONLY) ? IPC_R : IPC_R|IPC_W);
339 	if (error)
340 		goto done2;
341 	for (i = 0; i < shminfo.shmseg; i++) {
342 		if (shmmap_s->shmid == -1)
343 			break;
344 		shmmap_s++;
345 	}
346 	if (i >= shminfo.shmseg) {
347 		error = EMFILE;
348 		goto done2;
349 	}
350 	size = round_page(shmseg->shm_segsz);
351 #ifdef VM_PROT_READ_IS_EXEC
352 	prot = VM_PROT_READ | VM_PROT_EXECUTE;
353 #else
354 	prot = VM_PROT_READ;
355 #endif
356 	if ((shmflg & SHM_RDONLY) == 0)
357 		prot |= VM_PROT_WRITE;
358 	flags = MAP_ANON | MAP_SHARED;
359 	if (shmaddr) {
360 		flags |= MAP_FIXED;
361 		if (shmflg & SHM_RND) {
362 			attach_va = (vm_offset_t)shmaddr & ~(SHMLBA-1);
363 		} else if (((vm_offset_t)shmaddr & (SHMLBA-1)) == 0) {
364 			attach_va = (vm_offset_t)shmaddr;
365 		} else {
366 			error = EINVAL;
367 			goto done2;
368 		}
369 	} else {
370 		/*
371 		 * This is just a hint to vm_map_find() about where to
372 		 * put it.
373 		 */
374 		PROC_LOCK(p);
375 		attach_va = round_page((vm_offset_t)p->p_vmspace->vm_daddr +
376 		    lim_max(p, RLIMIT_DATA));
377 		PROC_UNLOCK(p);
378 	}
379 
380 	shm_handle = shmseg->shm_internal;
381 	vm_object_reference(shm_handle->shm_object);
382 	rv = vm_map_find(&p->p_vmspace->vm_map, shm_handle->shm_object,
383 		0, &attach_va, size, (flags & MAP_FIXED)?0:1, prot, prot, 0);
384 	if (rv != KERN_SUCCESS) {
385 		vm_object_deallocate(shm_handle->shm_object);
386 		error = ENOMEM;
387 		goto done2;
388 	}
389 	vm_map_inherit(&p->p_vmspace->vm_map,
390 		attach_va, attach_va + size, VM_INHERIT_SHARE);
391 
392 	shmmap_s->va = attach_va;
393 	shmmap_s->shmid = shmid;
394 	shmseg->shm_lpid = p->p_pid;
395 	shmseg->shm_atime = time_second;
396 	shmseg->shm_nattch++;
397 	td->td_retval[0] = attach_va;
398 done2:
399 	mtx_unlock(&Giant);
400 	return (error);
401 }
402 
403 int
404 shmat(td, uap)
405 	struct thread *td;
406 	struct shmat_args *uap;
407 {
408 	return kern_shmat(td, uap->shmid, uap->shmaddr, uap->shmflg);
409 }
410 
411 struct oshmid_ds {
412 	struct	ipc_perm shm_perm;	/* operation perms */
413 	int	shm_segsz;		/* size of segment (bytes) */
414 	u_short	shm_cpid;		/* pid, creator */
415 	u_short	shm_lpid;		/* pid, last operation */
416 	short	shm_nattch;		/* no. of current attaches */
417 	time_t	shm_atime;		/* last attach time */
418 	time_t	shm_dtime;		/* last detach time */
419 	time_t	shm_ctime;		/* last change time */
420 	void	*shm_handle;		/* internal handle for shm segment */
421 };
422 
423 struct oshmctl_args {
424 	int shmid;
425 	int cmd;
426 	struct oshmid_ds *ubuf;
427 };
428 
429 /*
430  * MPSAFE
431  */
432 static int
433 oshmctl(td, uap)
434 	struct thread *td;
435 	struct oshmctl_args *uap;
436 {
437 #ifdef COMPAT_43
438 	int error = 0;
439 	struct shmid_ds *shmseg;
440 	struct oshmid_ds outbuf;
441 
442 	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
443 		return (ENOSYS);
444 	mtx_lock(&Giant);
445 	shmseg = shm_find_segment_by_shmid(uap->shmid);
446 	if (shmseg == NULL) {
447 		error = EINVAL;
448 		goto done2;
449 	}
450 	switch (uap->cmd) {
451 	case IPC_STAT:
452 		error = ipcperm(td, &shmseg->shm_perm, IPC_R);
453 		if (error)
454 			goto done2;
455 		outbuf.shm_perm = shmseg->shm_perm;
456 		outbuf.shm_segsz = shmseg->shm_segsz;
457 		outbuf.shm_cpid = shmseg->shm_cpid;
458 		outbuf.shm_lpid = shmseg->shm_lpid;
459 		outbuf.shm_nattch = shmseg->shm_nattch;
460 		outbuf.shm_atime = shmseg->shm_atime;
461 		outbuf.shm_dtime = shmseg->shm_dtime;
462 		outbuf.shm_ctime = shmseg->shm_ctime;
463 		outbuf.shm_handle = shmseg->shm_internal;
464 		error = copyout(&outbuf, uap->ubuf, sizeof(outbuf));
465 		if (error)
466 			goto done2;
467 		break;
468 	default:
469 		/* XXX casting to (sy_call_t *) is bogus, as usual. */
470 		error = ((sy_call_t *)shmctl)(td, uap);
471 		break;
472 	}
473 done2:
474 	mtx_unlock(&Giant);
475 	return (error);
476 #else
477 	return (EINVAL);
478 #endif
479 }
480 
481 #ifndef _SYS_SYSPROTO_H_
482 struct shmctl_args {
483 	int shmid;
484 	int cmd;
485 	struct shmid_ds *buf;
486 };
487 #endif
488 
489 /*
490  * MPSAFE
491  */
492 int
493 kern_shmctl(td, shmid, cmd, buf, bufsz)
494 	struct thread *td;
495 	int shmid;
496 	int cmd;
497 	void *buf;
498 	size_t *bufsz;
499 {
500 	int error = 0;
501 	struct shmid_ds *shmseg;
502 
503 	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
504 		return (ENOSYS);
505 
506 	mtx_lock(&Giant);
507 	switch (cmd) {
508 	case IPC_INFO:
509 		memcpy(buf, &shminfo, sizeof(shminfo));
510 		if (bufsz)
511 			*bufsz = sizeof(shminfo);
512 		td->td_retval[0] = shmalloced;
513 		goto done2;
514 	case SHM_INFO: {
515 		struct shm_info shm_info;
516 		shm_info.used_ids = shm_nused;
517 		shm_info.shm_rss = 0;	/*XXX where to get from ? */
518 		shm_info.shm_tot = 0;	/*XXX where to get from ? */
519 		shm_info.shm_swp = 0;	/*XXX where to get from ? */
520 		shm_info.swap_attempts = 0;	/*XXX where to get from ? */
521 		shm_info.swap_successes = 0;	/*XXX where to get from ? */
522 		memcpy(buf, &shm_info, sizeof(shm_info));
523 		if (bufsz)
524 			*bufsz = sizeof(shm_info);
525 		td->td_retval[0] = shmalloced;
526 		goto done2;
527 	}
528 	}
529 	if (cmd == SHM_STAT)
530 		shmseg = shm_find_segment_by_shmidx(shmid);
531 	else
532 		shmseg = shm_find_segment_by_shmid(shmid);
533 	if (shmseg == NULL) {
534 		error = EINVAL;
535 		goto done2;
536 	}
537 	switch (cmd) {
538 	case SHM_STAT:
539 	case IPC_STAT:
540 		error = ipcperm(td, &shmseg->shm_perm, IPC_R);
541 		if (error)
542 			goto done2;
543 		memcpy(buf, shmseg, sizeof(struct shmid_ds));
544 		if (bufsz)
545 			*bufsz = sizeof(struct shmid_ds);
546 		if (cmd == SHM_STAT)
547 			td->td_retval[0] = IXSEQ_TO_IPCID(shmid, shmseg->shm_perm);
548 		break;
549 	case IPC_SET: {
550 		struct shmid_ds *shmid;
551 
552 		shmid = (struct shmid_ds *)buf;
553 		error = ipcperm(td, &shmseg->shm_perm, IPC_M);
554 		if (error)
555 			goto done2;
556 		shmseg->shm_perm.uid = shmid->shm_perm.uid;
557 		shmseg->shm_perm.gid = shmid->shm_perm.gid;
558 		shmseg->shm_perm.mode =
559 		    (shmseg->shm_perm.mode & ~ACCESSPERMS) |
560 		    (shmid->shm_perm.mode & ACCESSPERMS);
561 		shmseg->shm_ctime = time_second;
562 		break;
563 	}
564 	case IPC_RMID:
565 		error = ipcperm(td, &shmseg->shm_perm, IPC_M);
566 		if (error)
567 			goto done2;
568 		shmseg->shm_perm.key = IPC_PRIVATE;
569 		shmseg->shm_perm.mode |= SHMSEG_REMOVED;
570 		if (shmseg->shm_nattch <= 0) {
571 			shm_deallocate_segment(shmseg);
572 			shm_last_free = IPCID_TO_IX(shmid);
573 		}
574 		break;
575 #if 0
576 	case SHM_LOCK:
577 	case SHM_UNLOCK:
578 #endif
579 	default:
580 		error = EINVAL;
581 		break;
582 	}
583 done2:
584 	mtx_unlock(&Giant);
585 	return (error);
586 }
587 
588 int
589 shmctl(td, uap)
590 	struct thread *td;
591 	struct shmctl_args *uap;
592 {
593 	int error = 0;
594 	struct shmid_ds buf;
595 	size_t bufsz;
596 
597 	/* IPC_SET needs to copyin the buffer before calling kern_shmctl */
598 	if (uap->cmd == IPC_SET) {
599 		if ((error = copyin(uap->buf, &buf, sizeof(struct shmid_ds))))
600 			goto done;
601 	}
602 
603 	error = kern_shmctl(td, uap->shmid, uap->cmd, (void *)&buf, &bufsz);
604 	if (error)
605 		goto done;
606 
607 	/* Cases in which we need to copyout */
608 	switch (uap->cmd) {
609 	case IPC_INFO:
610 	case SHM_INFO:
611 	case SHM_STAT:
612 	case IPC_STAT:
613 		error = copyout(&buf, uap->buf, bufsz);
614 		break;
615 	}
616 
617 done:
618 	if (error) {
619 		/* Invalidate the return value */
620 		td->td_retval[0] = -1;
621 	}
622 	return (error);
623 }
624 
625 
626 #ifndef _SYS_SYSPROTO_H_
627 struct shmget_args {
628 	key_t key;
629 	size_t size;
630 	int shmflg;
631 };
632 #endif
633 
634 static int
635 shmget_existing(td, uap, mode, segnum)
636 	struct thread *td;
637 	struct shmget_args *uap;
638 	int mode;
639 	int segnum;
640 {
641 	struct shmid_ds *shmseg;
642 	int error;
643 
644 	shmseg = &shmsegs[segnum];
645 	if (shmseg->shm_perm.mode & SHMSEG_REMOVED) {
646 		/*
647 		 * This segment is in the process of being allocated.  Wait
648 		 * until it's done, and look the key up again (in case the
649 		 * allocation failed or it was freed).
650 		 */
651 		shmseg->shm_perm.mode |= SHMSEG_WANTED;
652 		error = tsleep(shmseg, PLOCK | PCATCH, "shmget", 0);
653 		if (error)
654 			return (error);
655 		return (EAGAIN);
656 	}
657 	if ((uap->shmflg & (IPC_CREAT | IPC_EXCL)) == (IPC_CREAT | IPC_EXCL))
658 		return (EEXIST);
659 	error = ipcperm(td, &shmseg->shm_perm, mode);
660 	if (error)
661 		return (error);
662 	if (uap->size && uap->size > shmseg->shm_segsz)
663 		return (EINVAL);
664 	td->td_retval[0] = IXSEQ_TO_IPCID(segnum, shmseg->shm_perm);
665 	return (0);
666 }
667 
668 static int
669 shmget_allocate_segment(td, uap, mode)
670 	struct thread *td;
671 	struct shmget_args *uap;
672 	int mode;
673 {
674 	int i, segnum, shmid, size;
675 	struct ucred *cred = td->td_ucred;
676 	struct shmid_ds *shmseg;
677 	struct shm_handle *shm_handle;
678 
679 	GIANT_REQUIRED;
680 
681 	if (uap->size < shminfo.shmmin || uap->size > shminfo.shmmax)
682 		return (EINVAL);
683 	if (shm_nused >= shminfo.shmmni) /* Any shmids left? */
684 		return (ENOSPC);
685 	size = round_page(uap->size);
686 	if (shm_committed + btoc(size) > shminfo.shmall)
687 		return (ENOMEM);
688 	if (shm_last_free < 0) {
689 		shmrealloc();	/* Maybe expand the shmsegs[] array. */
690 		for (i = 0; i < shmalloced; i++)
691 			if (shmsegs[i].shm_perm.mode & SHMSEG_FREE)
692 				break;
693 		if (i == shmalloced)
694 			return (ENOSPC);
695 		segnum = i;
696 	} else  {
697 		segnum = shm_last_free;
698 		shm_last_free = -1;
699 	}
700 	shmseg = &shmsegs[segnum];
701 	/*
702 	 * In case we sleep in malloc(), mark the segment present but deleted
703 	 * so that noone else tries to create the same key.
704 	 */
705 	shmseg->shm_perm.mode = SHMSEG_ALLOCATED | SHMSEG_REMOVED;
706 	shmseg->shm_perm.key = uap->key;
707 	shmseg->shm_perm.seq = (shmseg->shm_perm.seq + 1) & 0x7fff;
708 	shm_handle = (struct shm_handle *)
709 	    malloc(sizeof(struct shm_handle), M_SHM, M_WAITOK);
710 	shmid = IXSEQ_TO_IPCID(segnum, shmseg->shm_perm);
711 
712 	/*
713 	 * We make sure that we have allocated a pager before we need
714 	 * to.
715 	 */
716 	if (shm_use_phys) {
717 		shm_handle->shm_object =
718 		    vm_pager_allocate(OBJT_PHYS, 0, size, VM_PROT_DEFAULT, 0);
719 	} else {
720 		shm_handle->shm_object =
721 		    vm_pager_allocate(OBJT_SWAP, 0, size, VM_PROT_DEFAULT, 0);
722 	}
723 	VM_OBJECT_LOCK(shm_handle->shm_object);
724 	vm_object_clear_flag(shm_handle->shm_object, OBJ_ONEMAPPING);
725 	vm_object_set_flag(shm_handle->shm_object, OBJ_NOSPLIT);
726 	VM_OBJECT_UNLOCK(shm_handle->shm_object);
727 
728 	shmseg->shm_internal = shm_handle;
729 	shmseg->shm_perm.cuid = shmseg->shm_perm.uid = cred->cr_uid;
730 	shmseg->shm_perm.cgid = shmseg->shm_perm.gid = cred->cr_gid;
731 	shmseg->shm_perm.mode = (shmseg->shm_perm.mode & SHMSEG_WANTED) |
732 	    (mode & ACCESSPERMS) | SHMSEG_ALLOCATED;
733 	shmseg->shm_segsz = uap->size;
734 	shmseg->shm_cpid = td->td_proc->p_pid;
735 	shmseg->shm_lpid = shmseg->shm_nattch = 0;
736 	shmseg->shm_atime = shmseg->shm_dtime = 0;
737 	shmseg->shm_ctime = time_second;
738 	shm_committed += btoc(size);
739 	shm_nused++;
740 	if (shmseg->shm_perm.mode & SHMSEG_WANTED) {
741 		/*
742 		 * Somebody else wanted this key while we were asleep.  Wake
743 		 * them up now.
744 		 */
745 		shmseg->shm_perm.mode &= ~SHMSEG_WANTED;
746 		wakeup(shmseg);
747 	}
748 	td->td_retval[0] = shmid;
749 	return (0);
750 }
751 
752 /*
753  * MPSAFE
754  */
755 int
756 shmget(td, uap)
757 	struct thread *td;
758 	struct shmget_args *uap;
759 {
760 	int segnum, mode;
761 	int error;
762 
763 	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
764 		return (ENOSYS);
765 	mtx_lock(&Giant);
766 	mode = uap->shmflg & ACCESSPERMS;
767 	if (uap->key != IPC_PRIVATE) {
768 	again:
769 		segnum = shm_find_segment_by_key(uap->key);
770 		if (segnum >= 0) {
771 			error = shmget_existing(td, uap, mode, segnum);
772 			if (error == EAGAIN)
773 				goto again;
774 			goto done2;
775 		}
776 		if ((uap->shmflg & IPC_CREAT) == 0) {
777 			error = ENOENT;
778 			goto done2;
779 		}
780 	}
781 	error = shmget_allocate_segment(td, uap, mode);
782 done2:
783 	mtx_unlock(&Giant);
784 	return (error);
785 }
786 
787 /*
788  * MPSAFE
789  */
790 int
791 shmsys(td, uap)
792 	struct thread *td;
793 	/* XXX actually varargs. */
794 	struct shmsys_args /* {
795 		int	which;
796 		int	a2;
797 		int	a3;
798 		int	a4;
799 	} */ *uap;
800 {
801 	int error;
802 
803 	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
804 		return (ENOSYS);
805 	if (uap->which < 0 ||
806 	    uap->which >= sizeof(shmcalls)/sizeof(shmcalls[0]))
807 		return (EINVAL);
808 	mtx_lock(&Giant);
809 	error = (*shmcalls[uap->which])(td, &uap->a2);
810 	mtx_unlock(&Giant);
811 	return (error);
812 }
813 
814 static void
815 shmfork_myhook(p1, p2)
816 	struct proc *p1, *p2;
817 {
818 	struct shmmap_state *shmmap_s;
819 	size_t size;
820 	int i;
821 
822 	size = shminfo.shmseg * sizeof(struct shmmap_state);
823 	shmmap_s = malloc(size, M_SHM, M_WAITOK);
824 	bcopy(p1->p_vmspace->vm_shm, shmmap_s, size);
825 	p2->p_vmspace->vm_shm = shmmap_s;
826 	for (i = 0; i < shminfo.shmseg; i++, shmmap_s++)
827 		if (shmmap_s->shmid != -1)
828 			shmsegs[IPCID_TO_IX(shmmap_s->shmid)].shm_nattch++;
829 }
830 
831 static void
832 shmexit_myhook(struct vmspace *vm)
833 {
834 	struct shmmap_state *base, *shm;
835 	int i;
836 
837 	GIANT_REQUIRED;
838 
839 	if ((base = vm->vm_shm) != NULL) {
840 		vm->vm_shm = NULL;
841 		for (i = 0, shm = base; i < shminfo.shmseg; i++, shm++) {
842 			if (shm->shmid != -1)
843 				shm_delete_mapping(vm, shm);
844 		}
845 		free(base, M_SHM);
846 	}
847 }
848 
849 static void
850 shmrealloc(void)
851 {
852 	int i;
853 	struct shmid_ds *newsegs;
854 
855 	if (shmalloced >= shminfo.shmmni)
856 		return;
857 
858 	newsegs = malloc(shminfo.shmmni * sizeof(*newsegs), M_SHM, M_WAITOK);
859 	if (newsegs == NULL)
860 		return;
861 	for (i = 0; i < shmalloced; i++)
862 		bcopy(&shmsegs[i], &newsegs[i], sizeof(newsegs[0]));
863 	for (; i < shminfo.shmmni; i++) {
864 		shmsegs[i].shm_perm.mode = SHMSEG_FREE;
865 		shmsegs[i].shm_perm.seq = 0;
866 	}
867 	free(shmsegs, M_SHM);
868 	shmsegs = newsegs;
869 	shmalloced = shminfo.shmmni;
870 }
871 
872 static void
873 shminit()
874 {
875 	int i;
876 
877 	TUNABLE_INT_FETCH("kern.ipc.shmmaxpgs", &shminfo.shmall);
878 	for (i = PAGE_SIZE; i > 0; i--) {
879 		shminfo.shmmax = shminfo.shmall * PAGE_SIZE;
880 		if (shminfo.shmmax >= shminfo.shmall)
881 			break;
882 	}
883 	TUNABLE_INT_FETCH("kern.ipc.shmmin", &shminfo.shmmin);
884 	TUNABLE_INT_FETCH("kern.ipc.shmmni", &shminfo.shmmni);
885 	TUNABLE_INT_FETCH("kern.ipc.shmseg", &shminfo.shmseg);
886 	TUNABLE_INT_FETCH("kern.ipc.shm_use_phys", &shm_use_phys);
887 
888 	shmalloced = shminfo.shmmni;
889 	shmsegs = malloc(shmalloced * sizeof(shmsegs[0]), M_SHM, M_WAITOK);
890 	if (shmsegs == NULL)
891 		panic("cannot allocate initial memory for sysvshm");
892 	for (i = 0; i < shmalloced; i++) {
893 		shmsegs[i].shm_perm.mode = SHMSEG_FREE;
894 		shmsegs[i].shm_perm.seq = 0;
895 	}
896 	shm_last_free = 0;
897 	shm_nused = 0;
898 	shm_committed = 0;
899 	shmexit_hook = &shmexit_myhook;
900 	shmfork_hook = &shmfork_myhook;
901 }
902 
903 static int
904 shmunload()
905 {
906 
907 	if (shm_nused > 0)
908 		return (EBUSY);
909 
910 	free(shmsegs, M_SHM);
911 	shmexit_hook = NULL;
912 	shmfork_hook = NULL;
913 	return (0);
914 }
915 
916 static int
917 sysctl_shmsegs(SYSCTL_HANDLER_ARGS)
918 {
919 
920 	return (SYSCTL_OUT(req, shmsegs, shmalloced * sizeof(shmsegs[0])));
921 }
922 
923 static int
924 sysvshm_modload(struct module *module, int cmd, void *arg)
925 {
926 	int error = 0;
927 
928 	switch (cmd) {
929 	case MOD_LOAD:
930 		shminit();
931 		break;
932 	case MOD_UNLOAD:
933 		error = shmunload();
934 		break;
935 	case MOD_SHUTDOWN:
936 		break;
937 	default:
938 		error = EINVAL;
939 		break;
940 	}
941 	return (error);
942 }
943 
944 static moduledata_t sysvshm_mod = {
945 	"sysvshm",
946 	&sysvshm_modload,
947 	NULL
948 };
949 
950 SYSCALL_MODULE_HELPER(shmsys);
951 SYSCALL_MODULE_HELPER(shmat);
952 SYSCALL_MODULE_HELPER(shmctl);
953 SYSCALL_MODULE_HELPER(shmdt);
954 SYSCALL_MODULE_HELPER(shmget);
955 
956 DECLARE_MODULE(sysvshm, sysvshm_mod,
957 	SI_SUB_SYSV_SHM, SI_ORDER_FIRST);
958 MODULE_VERSION(sysvshm, 1);
959