xref: /freebsd/sys/kern/sysv_msg.c (revision 0761549550dad58bfcc0f227ba6ffa78b4d8c017)
1 /*-
2  * Implementation of SVID messages
3  *
4  * Author:  Daniel Boulet
5  *
6  * Copyright 1993 Daniel Boulet and RTMX Inc.
7  *
8  * This system call was implemented by Daniel Boulet under contract from RTMX.
9  *
10  * Redistribution and use in source forms, with and without modification,
11  * are permitted provided that this entire comment appears intact.
12  *
13  * Redistribution in binary form may occur without any restrictions.
14  * Obviously, it would be nice if you gave credit where credit is due
15  * but requiring it would be too onerous.
16  *
17  * This software is provided ``AS IS'' without any warranties of any kind.
18  */
19 /*-
20  * Copyright (c) 2003-2005 McAfee, Inc.
21  * All rights reserved.
22  *
23  * This software was developed for the FreeBSD Project in part by McAfee
24  * Research, the Security Research Division of McAfee, Inc under DARPA/SPAWAR
25  * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS research
26  * program.
27  *
28  * Redistribution and use in source and binary forms, with or without
29  * modification, are permitted provided that the following conditions
30  * are met:
31  * 1. Redistributions of source code must retain the above copyright
32  *    notice, this list of conditions and the following disclaimer.
33  * 2. Redistributions in binary form must reproduce the above copyright
34  *    notice, this list of conditions and the following disclaimer in the
35  *    documentation and/or other materials provided with the distribution.
36  *
37  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
38  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
39  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
40  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
41  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
42  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
43  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
44  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
45  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
46  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
47  * SUCH DAMAGE.
48  */
49 
50 #include <sys/cdefs.h>
51 __FBSDID("$FreeBSD$");
52 
53 #include "opt_compat.h"
54 #include "opt_sysvipc.h"
55 
56 #include <sys/param.h>
57 #include <sys/systm.h>
58 #include <sys/sysproto.h>
59 #include <sys/kernel.h>
60 #include <sys/priv.h>
61 #include <sys/proc.h>
62 #include <sys/lock.h>
63 #include <sys/mutex.h>
64 #include <sys/module.h>
65 #include <sys/msg.h>
66 #include <sys/racct.h>
67 #include <sys/syscall.h>
68 #include <sys/syscallsubr.h>
69 #include <sys/sysent.h>
70 #include <sys/sysctl.h>
71 #include <sys/malloc.h>
72 #include <sys/jail.h>
73 
74 #include <security/mac/mac_framework.h>
75 
76 FEATURE(sysv_msg, "System V message queues support");
77 
78 static MALLOC_DEFINE(M_MSG, "msg", "SVID compatible message queues");
79 
80 static int msginit(void);
81 static int msgunload(void);
82 static int sysvmsg_modload(struct module *, int, void *);
83 
84 
85 #ifdef MSG_DEBUG
86 #define DPRINTF(a)	printf a
87 #else
88 #define DPRINTF(a)	(void)0
89 #endif
90 
91 static void msg_freehdr(struct msg *msghdr);
92 
93 #ifndef MSGSSZ
94 #define MSGSSZ	8		/* Each segment must be 2^N long */
95 #endif
96 #ifndef MSGSEG
97 #define MSGSEG	2048		/* must be less than 32767 */
98 #endif
99 #define MSGMAX	(MSGSSZ*MSGSEG)
100 #ifndef MSGMNB
101 #define MSGMNB	2048		/* max # of bytes in a queue */
102 #endif
103 #ifndef MSGMNI
104 #define MSGMNI	40
105 #endif
106 #ifndef MSGTQL
107 #define MSGTQL	40
108 #endif
109 
110 /*
111  * Based on the configuration parameters described in an SVR2 (yes, two)
112  * config(1m) man page.
113  *
114  * Each message is broken up and stored in segments that are msgssz bytes
115  * long.  For efficiency reasons, this should be a power of two.  Also,
116  * it doesn't make sense if it is less than 8 or greater than about 256.
117  * Consequently, msginit in kern/sysv_msg.c checks that msgssz is a power of
118  * two between 8 and 1024 inclusive (and panic's if it isn't).
119  */
120 struct msginfo msginfo = {
121                 MSGMAX,         /* max chars in a message */
122                 MSGMNI,         /* # of message queue identifiers */
123                 MSGMNB,         /* max chars in a queue */
124                 MSGTQL,         /* max messages in system */
125                 MSGSSZ,         /* size of a message segment */
126                 		/* (must be small power of 2 greater than 4) */
127                 MSGSEG          /* number of message segments */
128 };
129 
130 /*
131  * macros to convert between msqid_ds's and msqid's.
132  * (specific to this implementation)
133  */
134 #define MSQID(ix,ds)	((ix) & 0xffff | (((ds).msg_perm.seq << 16) & 0xffff0000))
135 #define MSQID_IX(id)	((id) & 0xffff)
136 #define MSQID_SEQ(id)	(((id) >> 16) & 0xffff)
137 
138 /*
139  * The rest of this file is specific to this particular implementation.
140  */
141 
142 struct msgmap {
143 	short	next;		/* next segment in buffer */
144     				/* -1 -> available */
145     				/* 0..(MSGSEG-1) -> index of next segment */
146 };
147 
148 #define MSG_LOCKED	01000	/* Is this msqid_ds locked? */
149 
150 static int nfree_msgmaps;	/* # of free map entries */
151 static short free_msgmaps;	/* head of linked list of free map entries */
152 static struct msg *free_msghdrs;/* list of free msg headers */
153 static char *msgpool;		/* MSGMAX byte long msg buffer pool */
154 static struct msgmap *msgmaps;	/* MSGSEG msgmap structures */
155 static struct msg *msghdrs;	/* MSGTQL msg headers */
156 static struct msqid_kernel *msqids;	/* MSGMNI msqid_kernel struct's */
157 static struct mtx msq_mtx;	/* global mutex for message queues. */
158 
159 static struct syscall_helper_data msg_syscalls[] = {
160 	SYSCALL_INIT_HELPER(msgctl),
161 	SYSCALL_INIT_HELPER(msgget),
162 	SYSCALL_INIT_HELPER(msgsnd),
163 	SYSCALL_INIT_HELPER(msgrcv),
164 #if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \
165     defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7)
166 	SYSCALL_INIT_HELPER(msgsys),
167 	SYSCALL_INIT_HELPER_COMPAT(freebsd7_msgctl),
168 #endif
169 	SYSCALL_INIT_LAST
170 };
171 
172 #ifdef COMPAT_FREEBSD32
173 #include <compat/freebsd32/freebsd32.h>
174 #include <compat/freebsd32/freebsd32_ipc.h>
175 #include <compat/freebsd32/freebsd32_proto.h>
176 #include <compat/freebsd32/freebsd32_signal.h>
177 #include <compat/freebsd32/freebsd32_syscall.h>
178 #include <compat/freebsd32/freebsd32_util.h>
179 
180 static struct syscall_helper_data msg32_syscalls[] = {
181 	SYSCALL32_INIT_HELPER(freebsd32_msgctl),
182 	SYSCALL32_INIT_HELPER(freebsd32_msgsnd),
183 	SYSCALL32_INIT_HELPER(freebsd32_msgrcv),
184 	SYSCALL32_INIT_HELPER_COMPAT(msgget),
185 	SYSCALL32_INIT_HELPER(freebsd32_msgsys),
186 #if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \
187     defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7)
188 	SYSCALL32_INIT_HELPER(freebsd7_freebsd32_msgctl),
189 #endif
190 	SYSCALL_INIT_LAST
191 };
192 #endif
193 
194 static int
195 msginit()
196 {
197 	int i, error;
198 
199 	msginfo.msgmax = msginfo.msgseg * msginfo.msgssz;
200 	msgpool = malloc(msginfo.msgmax, M_MSG, M_WAITOK);
201 	msgmaps = malloc(sizeof(struct msgmap) * msginfo.msgseg, M_MSG, M_WAITOK);
202 	msghdrs = malloc(sizeof(struct msg) * msginfo.msgtql, M_MSG, M_WAITOK);
203 	msqids = malloc(sizeof(struct msqid_kernel) * msginfo.msgmni, M_MSG,
204 	    M_WAITOK);
205 
206 	/*
207 	 * msginfo.msgssz should be a power of two for efficiency reasons.
208 	 * It is also pretty silly if msginfo.msgssz is less than 8
209 	 * or greater than about 256 so ...
210 	 */
211 
212 	i = 8;
213 	while (i < 1024 && i != msginfo.msgssz)
214 		i <<= 1;
215     	if (i != msginfo.msgssz) {
216 		DPRINTF(("msginfo.msgssz=%d (0x%x)\n", msginfo.msgssz,
217 		    msginfo.msgssz));
218 		panic("msginfo.msgssz not a small power of 2");
219 	}
220 
221 	if (msginfo.msgseg > 32767) {
222 		DPRINTF(("msginfo.msgseg=%d\n", msginfo.msgseg));
223 		panic("msginfo.msgseg > 32767");
224 	}
225 
226 	for (i = 0; i < msginfo.msgseg; i++) {
227 		if (i > 0)
228 			msgmaps[i-1].next = i;
229 		msgmaps[i].next = -1;	/* implies entry is available */
230 	}
231 	free_msgmaps = 0;
232 	nfree_msgmaps = msginfo.msgseg;
233 
234 	for (i = 0; i < msginfo.msgtql; i++) {
235 		msghdrs[i].msg_type = 0;
236 		if (i > 0)
237 			msghdrs[i-1].msg_next = &msghdrs[i];
238 		msghdrs[i].msg_next = NULL;
239 #ifdef MAC
240 		mac_sysvmsg_init(&msghdrs[i]);
241 #endif
242     	}
243 	free_msghdrs = &msghdrs[0];
244 
245 	for (i = 0; i < msginfo.msgmni; i++) {
246 		msqids[i].u.msg_qbytes = 0;	/* implies entry is available */
247 		msqids[i].u.msg_perm.seq = 0;	/* reset to a known value */
248 		msqids[i].u.msg_perm.mode = 0;
249 #ifdef MAC
250 		mac_sysvmsq_init(&msqids[i]);
251 #endif
252 	}
253 	mtx_init(&msq_mtx, "msq", NULL, MTX_DEF);
254 
255 	error = syscall_helper_register(msg_syscalls, SY_THR_STATIC_KLD);
256 	if (error != 0)
257 		return (error);
258 #ifdef COMPAT_FREEBSD32
259 	error = syscall32_helper_register(msg32_syscalls, SY_THR_STATIC_KLD);
260 	if (error != 0)
261 		return (error);
262 #endif
263 	return (0);
264 }
265 
266 static int
267 msgunload()
268 {
269 	struct msqid_kernel *msqkptr;
270 	int msqid;
271 #ifdef MAC
272 	int i;
273 #endif
274 
275 	syscall_helper_unregister(msg_syscalls);
276 #ifdef COMPAT_FREEBSD32
277 	syscall32_helper_unregister(msg32_syscalls);
278 #endif
279 
280 	for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
281 		/*
282 		 * Look for an unallocated and unlocked msqid_ds.
283 		 * msqid_ds's can be locked by msgsnd or msgrcv while
284 		 * they are copying the message in/out.  We can't
285 		 * re-use the entry until they release it.
286 		 */
287 		msqkptr = &msqids[msqid];
288 		if (msqkptr->u.msg_qbytes != 0 ||
289 		    (msqkptr->u.msg_perm.mode & MSG_LOCKED) != 0)
290 			break;
291 	}
292 	if (msqid != msginfo.msgmni)
293 		return (EBUSY);
294 
295 #ifdef MAC
296 	for (i = 0; i < msginfo.msgtql; i++)
297 		mac_sysvmsg_destroy(&msghdrs[i]);
298 	for (msqid = 0; msqid < msginfo.msgmni; msqid++)
299 		mac_sysvmsq_destroy(&msqids[msqid]);
300 #endif
301 	free(msgpool, M_MSG);
302 	free(msgmaps, M_MSG);
303 	free(msghdrs, M_MSG);
304 	free(msqids, M_MSG);
305 	mtx_destroy(&msq_mtx);
306 	return (0);
307 }
308 
309 
310 static int
311 sysvmsg_modload(struct module *module, int cmd, void *arg)
312 {
313 	int error = 0;
314 
315 	switch (cmd) {
316 	case MOD_LOAD:
317 		error = msginit();
318 		if (error != 0)
319 			msgunload();
320 		break;
321 	case MOD_UNLOAD:
322 		error = msgunload();
323 		break;
324 	case MOD_SHUTDOWN:
325 		break;
326 	default:
327 		error = EINVAL;
328 		break;
329 	}
330 	return (error);
331 }
332 
333 static moduledata_t sysvmsg_mod = {
334 	"sysvmsg",
335 	&sysvmsg_modload,
336 	NULL
337 };
338 
339 DECLARE_MODULE(sysvmsg, sysvmsg_mod, SI_SUB_SYSV_MSG, SI_ORDER_FIRST);
340 MODULE_VERSION(sysvmsg, 1);
341 
342 static void
343 msg_freehdr(msghdr)
344 	struct msg *msghdr;
345 {
346 	while (msghdr->msg_ts > 0) {
347 		short next;
348 		if (msghdr->msg_spot < 0 || msghdr->msg_spot >= msginfo.msgseg)
349 			panic("msghdr->msg_spot out of range");
350 		next = msgmaps[msghdr->msg_spot].next;
351 		msgmaps[msghdr->msg_spot].next = free_msgmaps;
352 		free_msgmaps = msghdr->msg_spot;
353 		nfree_msgmaps++;
354 		msghdr->msg_spot = next;
355 		if (msghdr->msg_ts >= msginfo.msgssz)
356 			msghdr->msg_ts -= msginfo.msgssz;
357 		else
358 			msghdr->msg_ts = 0;
359 	}
360 	if (msghdr->msg_spot != -1)
361 		panic("msghdr->msg_spot != -1");
362 	msghdr->msg_next = free_msghdrs;
363 	free_msghdrs = msghdr;
364 #ifdef MAC
365 	mac_sysvmsg_cleanup(msghdr);
366 #endif
367 }
368 
369 #ifndef _SYS_SYSPROTO_H_
370 struct msgctl_args {
371 	int	msqid;
372 	int	cmd;
373 	struct	msqid_ds *buf;
374 };
375 #endif
376 int
377 sys_msgctl(td, uap)
378 	struct thread *td;
379 	register struct msgctl_args *uap;
380 {
381 	int msqid = uap->msqid;
382 	int cmd = uap->cmd;
383 	struct msqid_ds msqbuf;
384 	int error;
385 
386 	DPRINTF(("call to msgctl(%d, %d, %p)\n", msqid, cmd, uap->buf));
387 	if (cmd == IPC_SET &&
388 	    (error = copyin(uap->buf, &msqbuf, sizeof(msqbuf))) != 0)
389 		return (error);
390 	error = kern_msgctl(td, msqid, cmd, &msqbuf);
391 	if (cmd == IPC_STAT && error == 0)
392 		error = copyout(&msqbuf, uap->buf, sizeof(struct msqid_ds));
393 	return (error);
394 }
395 
396 int
397 kern_msgctl(td, msqid, cmd, msqbuf)
398 	struct thread *td;
399 	int msqid;
400 	int cmd;
401 	struct msqid_ds *msqbuf;
402 {
403 	int rval, error, msqix;
404 	register struct msqid_kernel *msqkptr;
405 
406 	if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
407 		return (ENOSYS);
408 
409 	msqix = IPCID_TO_IX(msqid);
410 
411 	if (msqix < 0 || msqix >= msginfo.msgmni) {
412 		DPRINTF(("msqid (%d) out of range (0<=msqid<%d)\n", msqix,
413 		    msginfo.msgmni));
414 		return (EINVAL);
415 	}
416 
417 	msqkptr = &msqids[msqix];
418 
419 	mtx_lock(&msq_mtx);
420 	if (msqkptr->u.msg_qbytes == 0) {
421 		DPRINTF(("no such msqid\n"));
422 		error = EINVAL;
423 		goto done2;
424 	}
425 	if (msqkptr->u.msg_perm.seq != IPCID_TO_SEQ(msqid)) {
426 		DPRINTF(("wrong sequence number\n"));
427 		error = EINVAL;
428 		goto done2;
429 	}
430 #ifdef MAC
431 	error = mac_sysvmsq_check_msqctl(td->td_ucred, msqkptr, cmd);
432 	if (error != 0)
433 		goto done2;
434 #endif
435 
436 	error = 0;
437 	rval = 0;
438 
439 	switch (cmd) {
440 
441 	case IPC_RMID:
442 	{
443 		struct msg *msghdr;
444 		if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_M)))
445 			goto done2;
446 
447 #ifdef MAC
448 		/*
449 		 * Check that the thread has MAC access permissions to
450 		 * individual msghdrs.  Note: We need to do this in a
451 		 * separate loop because the actual loop alters the
452 		 * msq/msghdr info as it progresses, and there is no going
453 		 * back if half the way through we discover that the
454 		 * thread cannot free a certain msghdr.  The msq will get
455 		 * into an inconsistent state.
456 		 */
457 		for (msghdr = msqkptr->u.msg_first; msghdr != NULL;
458 		    msghdr = msghdr->msg_next) {
459 			error = mac_sysvmsq_check_msgrmid(td->td_ucred, msghdr);
460 			if (error != 0)
461 				goto done2;
462 		}
463 #endif
464 
465 		racct_sub_cred(msqkptr->cred, RACCT_NMSGQ, 1);
466 		racct_sub_cred(msqkptr->cred, RACCT_MSGQQUEUED, msqkptr->u.msg_qnum);
467 		racct_sub_cred(msqkptr->cred, RACCT_MSGQSIZE, msqkptr->u.msg_cbytes);
468 		crfree(msqkptr->cred);
469 		msqkptr->cred = NULL;
470 
471 		/* Free the message headers */
472 		msghdr = msqkptr->u.msg_first;
473 		while (msghdr != NULL) {
474 			struct msg *msghdr_tmp;
475 
476 			/* Free the segments of each message */
477 			msqkptr->u.msg_cbytes -= msghdr->msg_ts;
478 			msqkptr->u.msg_qnum--;
479 			msghdr_tmp = msghdr;
480 			msghdr = msghdr->msg_next;
481 			msg_freehdr(msghdr_tmp);
482 		}
483 
484 		if (msqkptr->u.msg_cbytes != 0)
485 			panic("msg_cbytes is screwed up");
486 		if (msqkptr->u.msg_qnum != 0)
487 			panic("msg_qnum is screwed up");
488 
489 		msqkptr->u.msg_qbytes = 0;	/* Mark it as free */
490 
491 #ifdef MAC
492 		mac_sysvmsq_cleanup(msqkptr);
493 #endif
494 
495 		wakeup(msqkptr);
496 	}
497 
498 		break;
499 
500 	case IPC_SET:
501 		if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_M)))
502 			goto done2;
503 		if (msqbuf->msg_qbytes > msqkptr->u.msg_qbytes) {
504 			error = priv_check(td, PRIV_IPC_MSGSIZE);
505 			if (error)
506 				goto done2;
507 		}
508 		if (msqbuf->msg_qbytes > msginfo.msgmnb) {
509 			DPRINTF(("can't increase msg_qbytes beyond %d"
510 			    "(truncating)\n", msginfo.msgmnb));
511 			msqbuf->msg_qbytes = msginfo.msgmnb;	/* silently restrict qbytes to system limit */
512 		}
513 		if (msqbuf->msg_qbytes == 0) {
514 			DPRINTF(("can't reduce msg_qbytes to 0\n"));
515 			error = EINVAL;		/* non-standard errno! */
516 			goto done2;
517 		}
518 		msqkptr->u.msg_perm.uid = msqbuf->msg_perm.uid;	/* change the owner */
519 		msqkptr->u.msg_perm.gid = msqbuf->msg_perm.gid;	/* change the owner */
520 		msqkptr->u.msg_perm.mode = (msqkptr->u.msg_perm.mode & ~0777) |
521 		    (msqbuf->msg_perm.mode & 0777);
522 		msqkptr->u.msg_qbytes = msqbuf->msg_qbytes;
523 		msqkptr->u.msg_ctime = time_second;
524 		break;
525 
526 	case IPC_STAT:
527 		if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_R))) {
528 			DPRINTF(("requester doesn't have read access\n"));
529 			goto done2;
530 		}
531 		*msqbuf = msqkptr->u;
532 		break;
533 
534 	default:
535 		DPRINTF(("invalid command %d\n", cmd));
536 		error = EINVAL;
537 		goto done2;
538 	}
539 
540 	if (error == 0)
541 		td->td_retval[0] = rval;
542 done2:
543 	mtx_unlock(&msq_mtx);
544 	return (error);
545 }
546 
547 #ifndef _SYS_SYSPROTO_H_
548 struct msgget_args {
549 	key_t	key;
550 	int	msgflg;
551 };
552 #endif
553 
554 int
555 sys_msgget(td, uap)
556 	struct thread *td;
557 	register struct msgget_args *uap;
558 {
559 	int msqid, error = 0;
560 	int key = uap->key;
561 	int msgflg = uap->msgflg;
562 	struct ucred *cred = td->td_ucred;
563 	register struct msqid_kernel *msqkptr = NULL;
564 
565 	DPRINTF(("msgget(0x%x, 0%o)\n", key, msgflg));
566 
567 	if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
568 		return (ENOSYS);
569 
570 	mtx_lock(&msq_mtx);
571 	if (key != IPC_PRIVATE) {
572 		for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
573 			msqkptr = &msqids[msqid];
574 			if (msqkptr->u.msg_qbytes != 0 &&
575 			    msqkptr->u.msg_perm.key == key)
576 				break;
577 		}
578 		if (msqid < msginfo.msgmni) {
579 			DPRINTF(("found public key\n"));
580 			if ((msgflg & IPC_CREAT) && (msgflg & IPC_EXCL)) {
581 				DPRINTF(("not exclusive\n"));
582 				error = EEXIST;
583 				goto done2;
584 			}
585 			if ((error = ipcperm(td, &msqkptr->u.msg_perm,
586 			    msgflg & 0700))) {
587 				DPRINTF(("requester doesn't have 0%o access\n",
588 				    msgflg & 0700));
589 				goto done2;
590 			}
591 #ifdef MAC
592 			error = mac_sysvmsq_check_msqget(cred, msqkptr);
593 			if (error != 0)
594 				goto done2;
595 #endif
596 			goto found;
597 		}
598 	}
599 
600 	DPRINTF(("need to allocate the msqid_ds\n"));
601 	if (key == IPC_PRIVATE || (msgflg & IPC_CREAT)) {
602 		for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
603 			/*
604 			 * Look for an unallocated and unlocked msqid_ds.
605 			 * msqid_ds's can be locked by msgsnd or msgrcv while
606 			 * they are copying the message in/out.  We can't
607 			 * re-use the entry until they release it.
608 			 */
609 			msqkptr = &msqids[msqid];
610 			if (msqkptr->u.msg_qbytes == 0 &&
611 			    (msqkptr->u.msg_perm.mode & MSG_LOCKED) == 0)
612 				break;
613 		}
614 		if (msqid == msginfo.msgmni) {
615 			DPRINTF(("no more msqid_ds's available\n"));
616 			error = ENOSPC;
617 			goto done2;
618 		}
619 #ifdef RACCT
620 		PROC_LOCK(td->td_proc);
621 		error = racct_add(td->td_proc, RACCT_NMSGQ, 1);
622 		PROC_UNLOCK(td->td_proc);
623 		if (error != 0) {
624 			error = ENOSPC;
625 			goto done2;
626 		}
627 #endif
628 		DPRINTF(("msqid %d is available\n", msqid));
629 		msqkptr->u.msg_perm.key = key;
630 		msqkptr->u.msg_perm.cuid = cred->cr_uid;
631 		msqkptr->u.msg_perm.uid = cred->cr_uid;
632 		msqkptr->u.msg_perm.cgid = cred->cr_gid;
633 		msqkptr->u.msg_perm.gid = cred->cr_gid;
634 		msqkptr->u.msg_perm.mode = (msgflg & 0777);
635 		msqkptr->cred = crhold(cred);
636 		/* Make sure that the returned msqid is unique */
637 		msqkptr->u.msg_perm.seq = (msqkptr->u.msg_perm.seq + 1) & 0x7fff;
638 		msqkptr->u.msg_first = NULL;
639 		msqkptr->u.msg_last = NULL;
640 		msqkptr->u.msg_cbytes = 0;
641 		msqkptr->u.msg_qnum = 0;
642 		msqkptr->u.msg_qbytes = msginfo.msgmnb;
643 		msqkptr->u.msg_lspid = 0;
644 		msqkptr->u.msg_lrpid = 0;
645 		msqkptr->u.msg_stime = 0;
646 		msqkptr->u.msg_rtime = 0;
647 		msqkptr->u.msg_ctime = time_second;
648 #ifdef MAC
649 		mac_sysvmsq_create(cred, msqkptr);
650 #endif
651 	} else {
652 		DPRINTF(("didn't find it and wasn't asked to create it\n"));
653 		error = ENOENT;
654 		goto done2;
655 	}
656 
657 found:
658 	/* Construct the unique msqid */
659 	td->td_retval[0] = IXSEQ_TO_IPCID(msqid, msqkptr->u.msg_perm);
660 done2:
661 	mtx_unlock(&msq_mtx);
662 	return (error);
663 }
664 
665 #ifndef _SYS_SYSPROTO_H_
666 struct msgsnd_args {
667 	int	msqid;
668 	const void	*msgp;
669 	size_t	msgsz;
670 	int	msgflg;
671 };
672 #endif
673 int
674 kern_msgsnd(td, msqid, msgp, msgsz, msgflg, mtype)
675 	struct thread *td;
676 	int msqid;
677 	const void *msgp;	/* XXX msgp is actually mtext. */
678 	size_t msgsz;
679 	int msgflg;
680 	long mtype;
681 {
682 	int msqix, segs_needed, error = 0;
683 	register struct msqid_kernel *msqkptr;
684 	register struct msg *msghdr;
685 	short next;
686 #ifdef RACCT
687 	size_t saved_msgsz;
688 #endif
689 
690 	if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
691 		return (ENOSYS);
692 
693 	mtx_lock(&msq_mtx);
694 	msqix = IPCID_TO_IX(msqid);
695 
696 	if (msqix < 0 || msqix >= msginfo.msgmni) {
697 		DPRINTF(("msqid (%d) out of range (0<=msqid<%d)\n", msqix,
698 		    msginfo.msgmni));
699 		error = EINVAL;
700 		goto done2;
701 	}
702 
703 	msqkptr = &msqids[msqix];
704 	if (msqkptr->u.msg_qbytes == 0) {
705 		DPRINTF(("no such message queue id\n"));
706 		error = EINVAL;
707 		goto done2;
708 	}
709 	if (msqkptr->u.msg_perm.seq != IPCID_TO_SEQ(msqid)) {
710 		DPRINTF(("wrong sequence number\n"));
711 		error = EINVAL;
712 		goto done2;
713 	}
714 
715 	if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_W))) {
716 		DPRINTF(("requester doesn't have write access\n"));
717 		goto done2;
718 	}
719 
720 #ifdef MAC
721 	error = mac_sysvmsq_check_msqsnd(td->td_ucred, msqkptr);
722 	if (error != 0)
723 		goto done2;
724 #endif
725 
726 #ifdef RACCT
727 	PROC_LOCK(td->td_proc);
728 	if (racct_add(td->td_proc, RACCT_MSGQQUEUED, 1)) {
729 		PROC_UNLOCK(td->td_proc);
730 		error = EAGAIN;
731 		goto done2;
732 	}
733 	saved_msgsz = msgsz;
734 	if (racct_add(td->td_proc, RACCT_MSGQSIZE, msgsz)) {
735 		racct_sub(td->td_proc, RACCT_MSGQQUEUED, 1);
736 		PROC_UNLOCK(td->td_proc);
737 		error = EAGAIN;
738 		goto done2;
739 	}
740 	PROC_UNLOCK(td->td_proc);
741 #endif
742 
743 	segs_needed = (msgsz + msginfo.msgssz - 1) / msginfo.msgssz;
744 	DPRINTF(("msgsz=%zu, msgssz=%d, segs_needed=%d\n", msgsz,
745 	    msginfo.msgssz, segs_needed));
746 	for (;;) {
747 		int need_more_resources = 0;
748 
749 		/*
750 		 * check msgsz
751 		 * (inside this loop in case msg_qbytes changes while we sleep)
752 		 */
753 
754 		if (msgsz > msqkptr->u.msg_qbytes) {
755 			DPRINTF(("msgsz > msqkptr->u.msg_qbytes\n"));
756 			error = EINVAL;
757 			goto done3;
758 		}
759 
760 		if (msqkptr->u.msg_perm.mode & MSG_LOCKED) {
761 			DPRINTF(("msqid is locked\n"));
762 			need_more_resources = 1;
763 		}
764 		if (msgsz + msqkptr->u.msg_cbytes > msqkptr->u.msg_qbytes) {
765 			DPRINTF(("msgsz + msg_cbytes > msg_qbytes\n"));
766 			need_more_resources = 1;
767 		}
768 		if (segs_needed > nfree_msgmaps) {
769 			DPRINTF(("segs_needed > nfree_msgmaps\n"));
770 			need_more_resources = 1;
771 		}
772 		if (free_msghdrs == NULL) {
773 			DPRINTF(("no more msghdrs\n"));
774 			need_more_resources = 1;
775 		}
776 
777 		if (need_more_resources) {
778 			int we_own_it;
779 
780 			if ((msgflg & IPC_NOWAIT) != 0) {
781 				DPRINTF(("need more resources but caller "
782 				    "doesn't want to wait\n"));
783 				error = EAGAIN;
784 				goto done3;
785 			}
786 
787 			if ((msqkptr->u.msg_perm.mode & MSG_LOCKED) != 0) {
788 				DPRINTF(("we don't own the msqid_ds\n"));
789 				we_own_it = 0;
790 			} else {
791 				/* Force later arrivals to wait for our
792 				   request */
793 				DPRINTF(("we own the msqid_ds\n"));
794 				msqkptr->u.msg_perm.mode |= MSG_LOCKED;
795 				we_own_it = 1;
796 			}
797 			DPRINTF(("msgsnd:  goodnight\n"));
798 			error = msleep(msqkptr, &msq_mtx, (PZERO - 4) | PCATCH,
799 			    "msgsnd", hz);
800 			DPRINTF(("msgsnd:  good morning, error=%d\n", error));
801 			if (we_own_it)
802 				msqkptr->u.msg_perm.mode &= ~MSG_LOCKED;
803 			if (error == EWOULDBLOCK) {
804 				DPRINTF(("msgsnd:  timed out\n"));
805 				continue;
806 			}
807 			if (error != 0) {
808 				DPRINTF(("msgsnd:  interrupted system call\n"));
809 				error = EINTR;
810 				goto done3;
811 			}
812 
813 			/*
814 			 * Make sure that the msq queue still exists
815 			 */
816 
817 			if (msqkptr->u.msg_qbytes == 0) {
818 				DPRINTF(("msqid deleted\n"));
819 				error = EIDRM;
820 				goto done3;
821 			}
822 
823 		} else {
824 			DPRINTF(("got all the resources that we need\n"));
825 			break;
826 		}
827 	}
828 
829 	/*
830 	 * We have the resources that we need.
831 	 * Make sure!
832 	 */
833 
834 	if (msqkptr->u.msg_perm.mode & MSG_LOCKED)
835 		panic("msg_perm.mode & MSG_LOCKED");
836 	if (segs_needed > nfree_msgmaps)
837 		panic("segs_needed > nfree_msgmaps");
838 	if (msgsz + msqkptr->u.msg_cbytes > msqkptr->u.msg_qbytes)
839 		panic("msgsz + msg_cbytes > msg_qbytes");
840 	if (free_msghdrs == NULL)
841 		panic("no more msghdrs");
842 
843 	/*
844 	 * Re-lock the msqid_ds in case we page-fault when copying in the
845 	 * message
846 	 */
847 
848 	if ((msqkptr->u.msg_perm.mode & MSG_LOCKED) != 0)
849 		panic("msqid_ds is already locked");
850 	msqkptr->u.msg_perm.mode |= MSG_LOCKED;
851 
852 	/*
853 	 * Allocate a message header
854 	 */
855 
856 	msghdr = free_msghdrs;
857 	free_msghdrs = msghdr->msg_next;
858 	msghdr->msg_spot = -1;
859 	msghdr->msg_ts = msgsz;
860 	msghdr->msg_type = mtype;
861 #ifdef MAC
862 	/*
863 	 * XXXMAC: Should the mac_sysvmsq_check_msgmsq check follow here
864 	 * immediately?  Or, should it be checked just before the msg is
865 	 * enqueued in the msgq (as it is done now)?
866 	 */
867 	mac_sysvmsg_create(td->td_ucred, msqkptr, msghdr);
868 #endif
869 
870 	/*
871 	 * Allocate space for the message
872 	 */
873 
874 	while (segs_needed > 0) {
875 		if (nfree_msgmaps <= 0)
876 			panic("not enough msgmaps");
877 		if (free_msgmaps == -1)
878 			panic("nil free_msgmaps");
879 		next = free_msgmaps;
880 		if (next <= -1)
881 			panic("next too low #1");
882 		if (next >= msginfo.msgseg)
883 			panic("next out of range #1");
884 		DPRINTF(("allocating segment %d to message\n", next));
885 		free_msgmaps = msgmaps[next].next;
886 		nfree_msgmaps--;
887 		msgmaps[next].next = msghdr->msg_spot;
888 		msghdr->msg_spot = next;
889 		segs_needed--;
890 	}
891 
892 	/*
893 	 * Validate the message type
894 	 */
895 
896 	if (msghdr->msg_type < 1) {
897 		msg_freehdr(msghdr);
898 		msqkptr->u.msg_perm.mode &= ~MSG_LOCKED;
899 		wakeup(msqkptr);
900 		DPRINTF(("mtype (%ld) < 1\n", msghdr->msg_type));
901 		error = EINVAL;
902 		goto done3;
903 	}
904 
905 	/*
906 	 * Copy in the message body
907 	 */
908 
909 	next = msghdr->msg_spot;
910 	while (msgsz > 0) {
911 		size_t tlen;
912 		if (msgsz > msginfo.msgssz)
913 			tlen = msginfo.msgssz;
914 		else
915 			tlen = msgsz;
916 		if (next <= -1)
917 			panic("next too low #2");
918 		if (next >= msginfo.msgseg)
919 			panic("next out of range #2");
920 		mtx_unlock(&msq_mtx);
921 		if ((error = copyin(msgp, &msgpool[next * msginfo.msgssz],
922 		    tlen)) != 0) {
923 			mtx_lock(&msq_mtx);
924 			DPRINTF(("error %d copying in message segment\n",
925 			    error));
926 			msg_freehdr(msghdr);
927 			msqkptr->u.msg_perm.mode &= ~MSG_LOCKED;
928 			wakeup(msqkptr);
929 			goto done3;
930 		}
931 		mtx_lock(&msq_mtx);
932 		msgsz -= tlen;
933 		msgp = (const char *)msgp + tlen;
934 		next = msgmaps[next].next;
935 	}
936 	if (next != -1)
937 		panic("didn't use all the msg segments");
938 
939 	/*
940 	 * We've got the message.  Unlock the msqid_ds.
941 	 */
942 
943 	msqkptr->u.msg_perm.mode &= ~MSG_LOCKED;
944 
945 	/*
946 	 * Make sure that the msqid_ds is still allocated.
947 	 */
948 
949 	if (msqkptr->u.msg_qbytes == 0) {
950 		msg_freehdr(msghdr);
951 		wakeup(msqkptr);
952 		error = EIDRM;
953 		goto done3;
954 	}
955 
956 #ifdef MAC
957 	/*
958 	 * Note: Since the task/thread allocates the msghdr and usually
959 	 * primes it with its own MAC label, for a majority of policies, it
960 	 * won't be necessary to check whether the msghdr has access
961 	 * permissions to the msgq.  The mac_sysvmsq_check_msqsnd check would
962 	 * suffice in that case.  However, this hook may be required where
963 	 * individual policies derive a non-identical label for the msghdr
964 	 * from the current thread label and may want to check the msghdr
965 	 * enqueue permissions, along with read/write permissions to the
966 	 * msgq.
967 	 */
968 	error = mac_sysvmsq_check_msgmsq(td->td_ucred, msghdr, msqkptr);
969 	if (error != 0) {
970 		msg_freehdr(msghdr);
971 		wakeup(msqkptr);
972 		goto done3;
973 	}
974 #endif
975 
976 	/*
977 	 * Put the message into the queue
978 	 */
979 	if (msqkptr->u.msg_first == NULL) {
980 		msqkptr->u.msg_first = msghdr;
981 		msqkptr->u.msg_last = msghdr;
982 	} else {
983 		msqkptr->u.msg_last->msg_next = msghdr;
984 		msqkptr->u.msg_last = msghdr;
985 	}
986 	msqkptr->u.msg_last->msg_next = NULL;
987 
988 	msqkptr->u.msg_cbytes += msghdr->msg_ts;
989 	msqkptr->u.msg_qnum++;
990 	msqkptr->u.msg_lspid = td->td_proc->p_pid;
991 	msqkptr->u.msg_stime = time_second;
992 
993 	wakeup(msqkptr);
994 	td->td_retval[0] = 0;
995 done3:
996 #ifdef RACCT
997 	if (error != 0) {
998 		PROC_LOCK(td->td_proc);
999 		racct_sub(td->td_proc, RACCT_MSGQQUEUED, 1);
1000 		racct_sub(td->td_proc, RACCT_MSGQSIZE, saved_msgsz);
1001 		PROC_UNLOCK(td->td_proc);
1002 	}
1003 #endif
1004 done2:
1005 	mtx_unlock(&msq_mtx);
1006 	return (error);
1007 }
1008 
1009 int
1010 sys_msgsnd(td, uap)
1011 	struct thread *td;
1012 	register struct msgsnd_args *uap;
1013 {
1014 	int error;
1015 	long mtype;
1016 
1017 	DPRINTF(("call to msgsnd(%d, %p, %zu, %d)\n", uap->msqid, uap->msgp,
1018 	    uap->msgsz, uap->msgflg));
1019 
1020 	if ((error = copyin(uap->msgp, &mtype, sizeof(mtype))) != 0) {
1021 		DPRINTF(("error %d copying the message type\n", error));
1022 		return (error);
1023 	}
1024 	return (kern_msgsnd(td, uap->msqid,
1025 	    (const char *)uap->msgp + sizeof(mtype),
1026 	    uap->msgsz, uap->msgflg, mtype));
1027 }
1028 
1029 #ifndef _SYS_SYSPROTO_H_
1030 struct msgrcv_args {
1031 	int	msqid;
1032 	void	*msgp;
1033 	size_t	msgsz;
1034 	long	msgtyp;
1035 	int	msgflg;
1036 };
1037 #endif
1038 int
1039 kern_msgrcv(td, msqid, msgp, msgsz, msgtyp, msgflg, mtype)
1040 	struct thread *td;
1041 	int msqid;
1042 	void *msgp;	/* XXX msgp is actually mtext. */
1043 	size_t msgsz;
1044 	long msgtyp;
1045 	int msgflg;
1046 	long *mtype;
1047 {
1048 	size_t len;
1049 	register struct msqid_kernel *msqkptr;
1050 	register struct msg *msghdr;
1051 	int msqix, error = 0;
1052 	short next;
1053 
1054 	if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
1055 		return (ENOSYS);
1056 
1057 	msqix = IPCID_TO_IX(msqid);
1058 
1059 	if (msqix < 0 || msqix >= msginfo.msgmni) {
1060 		DPRINTF(("msqid (%d) out of range (0<=msqid<%d)\n", msqix,
1061 		    msginfo.msgmni));
1062 		return (EINVAL);
1063 	}
1064 
1065 	msqkptr = &msqids[msqix];
1066 	mtx_lock(&msq_mtx);
1067 	if (msqkptr->u.msg_qbytes == 0) {
1068 		DPRINTF(("no such message queue id\n"));
1069 		error = EINVAL;
1070 		goto done2;
1071 	}
1072 	if (msqkptr->u.msg_perm.seq != IPCID_TO_SEQ(msqid)) {
1073 		DPRINTF(("wrong sequence number\n"));
1074 		error = EINVAL;
1075 		goto done2;
1076 	}
1077 
1078 	if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_R))) {
1079 		DPRINTF(("requester doesn't have read access\n"));
1080 		goto done2;
1081 	}
1082 
1083 #ifdef MAC
1084 	error = mac_sysvmsq_check_msqrcv(td->td_ucred, msqkptr);
1085 	if (error != 0)
1086 		goto done2;
1087 #endif
1088 
1089 	msghdr = NULL;
1090 	while (msghdr == NULL) {
1091 		if (msgtyp == 0) {
1092 			msghdr = msqkptr->u.msg_first;
1093 			if (msghdr != NULL) {
1094 				if (msgsz < msghdr->msg_ts &&
1095 				    (msgflg & MSG_NOERROR) == 0) {
1096 					DPRINTF(("first message on the queue "
1097 					    "is too big (want %zu, got %d)\n",
1098 					    msgsz, msghdr->msg_ts));
1099 					error = E2BIG;
1100 					goto done2;
1101 				}
1102 #ifdef MAC
1103 				error = mac_sysvmsq_check_msgrcv(td->td_ucred,
1104 				    msghdr);
1105 				if (error != 0)
1106 					goto done2;
1107 #endif
1108 				if (msqkptr->u.msg_first == msqkptr->u.msg_last) {
1109 					msqkptr->u.msg_first = NULL;
1110 					msqkptr->u.msg_last = NULL;
1111 				} else {
1112 					msqkptr->u.msg_first = msghdr->msg_next;
1113 					if (msqkptr->u.msg_first == NULL)
1114 						panic("msg_first/last screwed up #1");
1115 				}
1116 			}
1117 		} else {
1118 			struct msg *previous;
1119 			struct msg **prev;
1120 
1121 			previous = NULL;
1122 			prev = &(msqkptr->u.msg_first);
1123 			while ((msghdr = *prev) != NULL) {
1124 				/*
1125 				 * Is this message's type an exact match or is
1126 				 * this message's type less than or equal to
1127 				 * the absolute value of a negative msgtyp?
1128 				 * Note that the second half of this test can
1129 				 * NEVER be true if msgtyp is positive since
1130 				 * msg_type is always positive!
1131 				 */
1132 
1133 				if (msgtyp == msghdr->msg_type ||
1134 				    msghdr->msg_type <= -msgtyp) {
1135 					DPRINTF(("found message type %ld, "
1136 					    "requested %ld\n",
1137 					    msghdr->msg_type, msgtyp));
1138 					if (msgsz < msghdr->msg_ts &&
1139 					    (msgflg & MSG_NOERROR) == 0) {
1140 						DPRINTF(("requested message "
1141 						    "on the queue is too big "
1142 						    "(want %zu, got %hu)\n",
1143 						    msgsz, msghdr->msg_ts));
1144 						error = E2BIG;
1145 						goto done2;
1146 					}
1147 #ifdef MAC
1148 					error = mac_sysvmsq_check_msgrcv(
1149 					    td->td_ucred, msghdr);
1150 					if (error != 0)
1151 						goto done2;
1152 #endif
1153 					*prev = msghdr->msg_next;
1154 					if (msghdr == msqkptr->u.msg_last) {
1155 						if (previous == NULL) {
1156 							if (prev !=
1157 							    &msqkptr->u.msg_first)
1158 								panic("msg_first/last screwed up #2");
1159 							msqkptr->u.msg_first =
1160 							    NULL;
1161 							msqkptr->u.msg_last =
1162 							    NULL;
1163 						} else {
1164 							if (prev ==
1165 							    &msqkptr->u.msg_first)
1166 								panic("msg_first/last screwed up #3");
1167 							msqkptr->u.msg_last =
1168 							    previous;
1169 						}
1170 					}
1171 					break;
1172 				}
1173 				previous = msghdr;
1174 				prev = &(msghdr->msg_next);
1175 			}
1176 		}
1177 
1178 		/*
1179 		 * We've either extracted the msghdr for the appropriate
1180 		 * message or there isn't one.
1181 		 * If there is one then bail out of this loop.
1182 		 */
1183 
1184 		if (msghdr != NULL)
1185 			break;
1186 
1187 		/*
1188 		 * Hmph!  No message found.  Does the user want to wait?
1189 		 */
1190 
1191 		if ((msgflg & IPC_NOWAIT) != 0) {
1192 			DPRINTF(("no appropriate message found (msgtyp=%ld)\n",
1193 			    msgtyp));
1194 			/* The SVID says to return ENOMSG. */
1195 			error = ENOMSG;
1196 			goto done2;
1197 		}
1198 
1199 		/*
1200 		 * Wait for something to happen
1201 		 */
1202 
1203 		DPRINTF(("msgrcv:  goodnight\n"));
1204 		error = msleep(msqkptr, &msq_mtx, (PZERO - 4) | PCATCH,
1205 		    "msgrcv", 0);
1206 		DPRINTF(("msgrcv:  good morning (error=%d)\n", error));
1207 
1208 		if (error != 0) {
1209 			DPRINTF(("msgrcv:  interrupted system call\n"));
1210 			error = EINTR;
1211 			goto done2;
1212 		}
1213 
1214 		/*
1215 		 * Make sure that the msq queue still exists
1216 		 */
1217 
1218 		if (msqkptr->u.msg_qbytes == 0 ||
1219 		    msqkptr->u.msg_perm.seq != IPCID_TO_SEQ(msqid)) {
1220 			DPRINTF(("msqid deleted\n"));
1221 			error = EIDRM;
1222 			goto done2;
1223 		}
1224 	}
1225 
1226 	/*
1227 	 * Return the message to the user.
1228 	 *
1229 	 * First, do the bookkeeping (before we risk being interrupted).
1230 	 */
1231 
1232 	msqkptr->u.msg_cbytes -= msghdr->msg_ts;
1233 	msqkptr->u.msg_qnum--;
1234 	msqkptr->u.msg_lrpid = td->td_proc->p_pid;
1235 	msqkptr->u.msg_rtime = time_second;
1236 
1237 	racct_sub_cred(msqkptr->cred, RACCT_MSGQQUEUED, 1);
1238 	racct_sub_cred(msqkptr->cred, RACCT_MSGQSIZE, msghdr->msg_ts);
1239 
1240 	/*
1241 	 * Make msgsz the actual amount that we'll be returning.
1242 	 * Note that this effectively truncates the message if it is too long
1243 	 * (since msgsz is never increased).
1244 	 */
1245 
1246 	DPRINTF(("found a message, msgsz=%zu, msg_ts=%hu\n", msgsz,
1247 	    msghdr->msg_ts));
1248 	if (msgsz > msghdr->msg_ts)
1249 		msgsz = msghdr->msg_ts;
1250 	*mtype = msghdr->msg_type;
1251 
1252 	/*
1253 	 * Return the segments to the user
1254 	 */
1255 
1256 	next = msghdr->msg_spot;
1257 	for (len = 0; len < msgsz; len += msginfo.msgssz) {
1258 		size_t tlen;
1259 
1260 		if (msgsz - len > msginfo.msgssz)
1261 			tlen = msginfo.msgssz;
1262 		else
1263 			tlen = msgsz - len;
1264 		if (next <= -1)
1265 			panic("next too low #3");
1266 		if (next >= msginfo.msgseg)
1267 			panic("next out of range #3");
1268 		mtx_unlock(&msq_mtx);
1269 		error = copyout(&msgpool[next * msginfo.msgssz], msgp, tlen);
1270 		mtx_lock(&msq_mtx);
1271 		if (error != 0) {
1272 			DPRINTF(("error (%d) copying out message segment\n",
1273 			    error));
1274 			msg_freehdr(msghdr);
1275 			wakeup(msqkptr);
1276 			goto done2;
1277 		}
1278 		msgp = (char *)msgp + tlen;
1279 		next = msgmaps[next].next;
1280 	}
1281 
1282 	/*
1283 	 * Done, return the actual number of bytes copied out.
1284 	 */
1285 
1286 	msg_freehdr(msghdr);
1287 	wakeup(msqkptr);
1288 	td->td_retval[0] = msgsz;
1289 done2:
1290 	mtx_unlock(&msq_mtx);
1291 	return (error);
1292 }
1293 
1294 int
1295 sys_msgrcv(td, uap)
1296 	struct thread *td;
1297 	register struct msgrcv_args *uap;
1298 {
1299 	int error;
1300 	long mtype;
1301 
1302 	DPRINTF(("call to msgrcv(%d, %p, %zu, %ld, %d)\n", uap->msqid,
1303 	    uap->msgp, uap->msgsz, uap->msgtyp, uap->msgflg));
1304 
1305 	if ((error = kern_msgrcv(td, uap->msqid,
1306 	    (char *)uap->msgp + sizeof(mtype), uap->msgsz,
1307 	    uap->msgtyp, uap->msgflg, &mtype)) != 0)
1308 		return (error);
1309 	if ((error = copyout(&mtype, uap->msgp, sizeof(mtype))) != 0)
1310 		DPRINTF(("error %d copying the message type\n", error));
1311 	return (error);
1312 }
1313 
1314 static int
1315 sysctl_msqids(SYSCTL_HANDLER_ARGS)
1316 {
1317 
1318 	return (SYSCTL_OUT(req, msqids,
1319 	    sizeof(struct msqid_kernel) * msginfo.msgmni));
1320 }
1321 
1322 SYSCTL_INT(_kern_ipc, OID_AUTO, msgmax, CTLFLAG_RD, &msginfo.msgmax, 0,
1323     "Maximum message size");
1324 SYSCTL_INT(_kern_ipc, OID_AUTO, msgmni, CTLFLAG_RDTUN, &msginfo.msgmni, 0,
1325     "Number of message queue identifiers");
1326 SYSCTL_INT(_kern_ipc, OID_AUTO, msgmnb, CTLFLAG_RDTUN, &msginfo.msgmnb, 0,
1327     "Maximum number of bytes in a queue");
1328 SYSCTL_INT(_kern_ipc, OID_AUTO, msgtql, CTLFLAG_RDTUN, &msginfo.msgtql, 0,
1329     "Maximum number of messages in the system");
1330 SYSCTL_INT(_kern_ipc, OID_AUTO, msgssz, CTLFLAG_RDTUN, &msginfo.msgssz, 0,
1331     "Size of a message segment");
1332 SYSCTL_INT(_kern_ipc, OID_AUTO, msgseg, CTLFLAG_RDTUN, &msginfo.msgseg, 0,
1333     "Number of message segments");
1334 SYSCTL_PROC(_kern_ipc, OID_AUTO, msqids, CTLTYPE_OPAQUE | CTLFLAG_RD,
1335     NULL, 0, sysctl_msqids, "", "Message queue IDs");
1336 
1337 #ifdef COMPAT_FREEBSD32
1338 int
1339 freebsd32_msgsys(struct thread *td, struct freebsd32_msgsys_args *uap)
1340 {
1341 
1342 #if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \
1343     defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7)
1344 	switch (uap->which) {
1345 	case 0:
1346 		return (freebsd7_freebsd32_msgctl(td,
1347 		    (struct freebsd7_freebsd32_msgctl_args *)&uap->a2));
1348 	case 2:
1349 		return (freebsd32_msgsnd(td,
1350 		    (struct freebsd32_msgsnd_args *)&uap->a2));
1351 	case 3:
1352 		return (freebsd32_msgrcv(td,
1353 		    (struct freebsd32_msgrcv_args *)&uap->a2));
1354 	default:
1355 		return (sys_msgsys(td, (struct msgsys_args *)uap));
1356 	}
1357 #else
1358 	return (nosys(td, NULL));
1359 #endif
1360 }
1361 
1362 #if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \
1363     defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7)
1364 int
1365 freebsd7_freebsd32_msgctl(struct thread *td,
1366     struct freebsd7_freebsd32_msgctl_args *uap)
1367 {
1368 	struct msqid_ds msqbuf;
1369 	struct msqid_ds32_old msqbuf32;
1370 	int error;
1371 
1372 	if (uap->cmd == IPC_SET) {
1373 		error = copyin(uap->buf, &msqbuf32, sizeof(msqbuf32));
1374 		if (error)
1375 			return (error);
1376 		freebsd32_ipcperm_old_in(&msqbuf32.msg_perm, &msqbuf.msg_perm);
1377 		PTRIN_CP(msqbuf32, msqbuf, msg_first);
1378 		PTRIN_CP(msqbuf32, msqbuf, msg_last);
1379 		CP(msqbuf32, msqbuf, msg_cbytes);
1380 		CP(msqbuf32, msqbuf, msg_qnum);
1381 		CP(msqbuf32, msqbuf, msg_qbytes);
1382 		CP(msqbuf32, msqbuf, msg_lspid);
1383 		CP(msqbuf32, msqbuf, msg_lrpid);
1384 		CP(msqbuf32, msqbuf, msg_stime);
1385 		CP(msqbuf32, msqbuf, msg_rtime);
1386 		CP(msqbuf32, msqbuf, msg_ctime);
1387 	}
1388 	error = kern_msgctl(td, uap->msqid, uap->cmd, &msqbuf);
1389 	if (error)
1390 		return (error);
1391 	if (uap->cmd == IPC_STAT) {
1392 		bzero(&msqbuf32, sizeof(msqbuf32));
1393 		freebsd32_ipcperm_old_out(&msqbuf.msg_perm, &msqbuf32.msg_perm);
1394 		PTROUT_CP(msqbuf, msqbuf32, msg_first);
1395 		PTROUT_CP(msqbuf, msqbuf32, msg_last);
1396 		CP(msqbuf, msqbuf32, msg_cbytes);
1397 		CP(msqbuf, msqbuf32, msg_qnum);
1398 		CP(msqbuf, msqbuf32, msg_qbytes);
1399 		CP(msqbuf, msqbuf32, msg_lspid);
1400 		CP(msqbuf, msqbuf32, msg_lrpid);
1401 		CP(msqbuf, msqbuf32, msg_stime);
1402 		CP(msqbuf, msqbuf32, msg_rtime);
1403 		CP(msqbuf, msqbuf32, msg_ctime);
1404 		error = copyout(&msqbuf32, uap->buf, sizeof(struct msqid_ds32));
1405 	}
1406 	return (error);
1407 }
1408 #endif
1409 
1410 int
1411 freebsd32_msgctl(struct thread *td, struct freebsd32_msgctl_args *uap)
1412 {
1413 	struct msqid_ds msqbuf;
1414 	struct msqid_ds32 msqbuf32;
1415 	int error;
1416 
1417 	if (uap->cmd == IPC_SET) {
1418 		error = copyin(uap->buf, &msqbuf32, sizeof(msqbuf32));
1419 		if (error)
1420 			return (error);
1421 		freebsd32_ipcperm_in(&msqbuf32.msg_perm, &msqbuf.msg_perm);
1422 		PTRIN_CP(msqbuf32, msqbuf, msg_first);
1423 		PTRIN_CP(msqbuf32, msqbuf, msg_last);
1424 		CP(msqbuf32, msqbuf, msg_cbytes);
1425 		CP(msqbuf32, msqbuf, msg_qnum);
1426 		CP(msqbuf32, msqbuf, msg_qbytes);
1427 		CP(msqbuf32, msqbuf, msg_lspid);
1428 		CP(msqbuf32, msqbuf, msg_lrpid);
1429 		CP(msqbuf32, msqbuf, msg_stime);
1430 		CP(msqbuf32, msqbuf, msg_rtime);
1431 		CP(msqbuf32, msqbuf, msg_ctime);
1432 	}
1433 	error = kern_msgctl(td, uap->msqid, uap->cmd, &msqbuf);
1434 	if (error)
1435 		return (error);
1436 	if (uap->cmd == IPC_STAT) {
1437 		freebsd32_ipcperm_out(&msqbuf.msg_perm, &msqbuf32.msg_perm);
1438 		PTROUT_CP(msqbuf, msqbuf32, msg_first);
1439 		PTROUT_CP(msqbuf, msqbuf32, msg_last);
1440 		CP(msqbuf, msqbuf32, msg_cbytes);
1441 		CP(msqbuf, msqbuf32, msg_qnum);
1442 		CP(msqbuf, msqbuf32, msg_qbytes);
1443 		CP(msqbuf, msqbuf32, msg_lspid);
1444 		CP(msqbuf, msqbuf32, msg_lrpid);
1445 		CP(msqbuf, msqbuf32, msg_stime);
1446 		CP(msqbuf, msqbuf32, msg_rtime);
1447 		CP(msqbuf, msqbuf32, msg_ctime);
1448 		error = copyout(&msqbuf32, uap->buf, sizeof(struct msqid_ds32));
1449 	}
1450 	return (error);
1451 }
1452 
1453 int
1454 freebsd32_msgsnd(struct thread *td, struct freebsd32_msgsnd_args *uap)
1455 {
1456 	const void *msgp;
1457 	long mtype;
1458 	int32_t mtype32;
1459 	int error;
1460 
1461 	msgp = PTRIN(uap->msgp);
1462 	if ((error = copyin(msgp, &mtype32, sizeof(mtype32))) != 0)
1463 		return (error);
1464 	mtype = mtype32;
1465 	return (kern_msgsnd(td, uap->msqid,
1466 	    (const char *)msgp + sizeof(mtype32),
1467 	    uap->msgsz, uap->msgflg, mtype));
1468 }
1469 
1470 int
1471 freebsd32_msgrcv(struct thread *td, struct freebsd32_msgrcv_args *uap)
1472 {
1473 	void *msgp;
1474 	long mtype;
1475 	int32_t mtype32;
1476 	int error;
1477 
1478 	msgp = PTRIN(uap->msgp);
1479 	if ((error = kern_msgrcv(td, uap->msqid,
1480 	    (char *)msgp + sizeof(mtype32), uap->msgsz,
1481 	    uap->msgtyp, uap->msgflg, &mtype)) != 0)
1482 		return (error);
1483 	mtype32 = (int32_t)mtype;
1484 	return (copyout(&mtype32, msgp, sizeof(mtype32)));
1485 }
1486 #endif
1487 
1488 #if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \
1489     defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7)
1490 
1491 /* XXX casting to (sy_call_t *) is bogus, as usual. */
1492 static sy_call_t *msgcalls[] = {
1493 	(sy_call_t *)freebsd7_msgctl, (sy_call_t *)sys_msgget,
1494 	(sy_call_t *)sys_msgsnd, (sy_call_t *)sys_msgrcv
1495 };
1496 
1497 /*
1498  * Entry point for all MSG calls.
1499  */
1500 int
1501 sys_msgsys(td, uap)
1502 	struct thread *td;
1503 	/* XXX actually varargs. */
1504 	struct msgsys_args /* {
1505 		int	which;
1506 		int	a2;
1507 		int	a3;
1508 		int	a4;
1509 		int	a5;
1510 		int	a6;
1511 	} */ *uap;
1512 {
1513 	int error;
1514 
1515 	if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
1516 		return (ENOSYS);
1517 	if (uap->which < 0 ||
1518 	    uap->which >= sizeof(msgcalls)/sizeof(msgcalls[0]))
1519 		return (EINVAL);
1520 	error = (*msgcalls[uap->which])(td, &uap->a2);
1521 	return (error);
1522 }
1523 
1524 #ifndef CP
1525 #define CP(src, dst, fld)	do { (dst).fld = (src).fld; } while (0)
1526 #endif
1527 
1528 #ifndef _SYS_SYSPROTO_H_
1529 struct freebsd7_msgctl_args {
1530 	int	msqid;
1531 	int	cmd;
1532 	struct	msqid_ds_old *buf;
1533 };
1534 #endif
1535 int
1536 freebsd7_msgctl(td, uap)
1537 	struct thread *td;
1538 	struct freebsd7_msgctl_args *uap;
1539 {
1540 	struct msqid_ds_old msqold;
1541 	struct msqid_ds msqbuf;
1542 	int error;
1543 
1544 	DPRINTF(("call to freebsd7_msgctl(%d, %d, %p)\n", uap->msqid, uap->cmd,
1545 	    uap->buf));
1546 	if (uap->cmd == IPC_SET) {
1547 		error = copyin(uap->buf, &msqold, sizeof(msqold));
1548 		if (error)
1549 			return (error);
1550 		ipcperm_old2new(&msqold.msg_perm, &msqbuf.msg_perm);
1551 		CP(msqold, msqbuf, msg_first);
1552 		CP(msqold, msqbuf, msg_last);
1553 		CP(msqold, msqbuf, msg_cbytes);
1554 		CP(msqold, msqbuf, msg_qnum);
1555 		CP(msqold, msqbuf, msg_qbytes);
1556 		CP(msqold, msqbuf, msg_lspid);
1557 		CP(msqold, msqbuf, msg_lrpid);
1558 		CP(msqold, msqbuf, msg_stime);
1559 		CP(msqold, msqbuf, msg_rtime);
1560 		CP(msqold, msqbuf, msg_ctime);
1561 	}
1562 	error = kern_msgctl(td, uap->msqid, uap->cmd, &msqbuf);
1563 	if (error)
1564 		return (error);
1565 	if (uap->cmd == IPC_STAT) {
1566 		bzero(&msqold, sizeof(msqold));
1567 		ipcperm_new2old(&msqbuf.msg_perm, &msqold.msg_perm);
1568 		CP(msqbuf, msqold, msg_first);
1569 		CP(msqbuf, msqold, msg_last);
1570 		CP(msqbuf, msqold, msg_cbytes);
1571 		CP(msqbuf, msqold, msg_qnum);
1572 		CP(msqbuf, msqold, msg_qbytes);
1573 		CP(msqbuf, msqold, msg_lspid);
1574 		CP(msqbuf, msqold, msg_lrpid);
1575 		CP(msqbuf, msqold, msg_stime);
1576 		CP(msqbuf, msqold, msg_rtime);
1577 		CP(msqbuf, msqold, msg_ctime);
1578 		error = copyout(&msqold, uap->buf, sizeof(struct msqid_ds_old));
1579 	}
1580 	return (error);
1581 }
1582 
1583 #undef CP
1584 
1585 #endif	/* COMPAT_FREEBSD4 || COMPAT_FREEBSD5 || COMPAT_FREEBSD6 ||
1586 	   COMPAT_FREEBSD7 */
1587