xref: /freebsd/sys/kern/sysv_msg.c (revision b1f9167f94059fd55c630891d359bcff987bd7eb)
1 /*-
2  * Implementation of SVID messages
3  *
4  * Author:  Daniel Boulet
5  *
6  * Copyright 1993 Daniel Boulet and RTMX Inc.
7  *
8  * This system call was implemented by Daniel Boulet under contract from RTMX.
9  *
10  * Redistribution and use in source forms, with and without modification,
11  * are permitted provided that this entire comment appears intact.
12  *
13  * Redistribution in binary form may occur without any restrictions.
14  * Obviously, it would be nice if you gave credit where credit is due
15  * but requiring it would be too onerous.
16  *
17  * This software is provided ``AS IS'' without any warranties of any kind.
18  */
19 /*-
20  * Copyright (c) 2003-2005 McAfee, Inc.
21  * All rights reserved.
22  *
23  * This software was developed for the FreeBSD Project in part by McAfee
24  * Research, the Security Research Division of McAfee, Inc under DARPA/SPAWAR
25  * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS research
26  * program.
27  *
28  * Redistribution and use in source and binary forms, with or without
29  * modification, are permitted provided that the following conditions
30  * are met:
31  * 1. Redistributions of source code must retain the above copyright
32  *    notice, this list of conditions and the following disclaimer.
33  * 2. Redistributions in binary form must reproduce the above copyright
34  *    notice, this list of conditions and the following disclaimer in the
35  *    documentation and/or other materials provided with the distribution.
36  *
37  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
38  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
39  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
40  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
41  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
42  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
43  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
44  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
45  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
46  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
47  * SUCH DAMAGE.
48  */
49 
50 #include <sys/cdefs.h>
51 __FBSDID("$FreeBSD$");
52 
53 #include "opt_compat.h"
54 #include "opt_sysvipc.h"
55 
56 #include <sys/param.h>
57 #include <sys/systm.h>
58 #include <sys/sysproto.h>
59 #include <sys/kernel.h>
60 #include <sys/priv.h>
61 #include <sys/proc.h>
62 #include <sys/lock.h>
63 #include <sys/mutex.h>
64 #include <sys/module.h>
65 #include <sys/msg.h>
66 #include <sys/racct.h>
67 #include <sys/syscall.h>
68 #include <sys/syscallsubr.h>
69 #include <sys/sysent.h>
70 #include <sys/sysctl.h>
71 #include <sys/malloc.h>
72 #include <sys/jail.h>
73 
74 #include <security/mac/mac_framework.h>
75 
76 FEATURE(sysv_msg, "System V message queues support");
77 
78 static MALLOC_DEFINE(M_MSG, "msg", "SVID compatible message queues");
79 
80 static int msginit(void);
81 static int msgunload(void);
82 static int sysvmsg_modload(struct module *, int, void *);
83 
84 
85 #ifdef MSG_DEBUG
86 #define DPRINTF(a)	printf a
87 #else
88 #define DPRINTF(a)	(void)0
89 #endif
90 
91 static void msg_freehdr(struct msg *msghdr);
92 
93 #ifndef MSGSSZ
94 #define MSGSSZ	8		/* Each segment must be 2^N long */
95 #endif
96 #ifndef MSGSEG
97 #define MSGSEG	2048		/* must be less than 32767 */
98 #endif
99 #define MSGMAX	(MSGSSZ*MSGSEG)
100 #ifndef MSGMNB
101 #define MSGMNB	2048		/* max # of bytes in a queue */
102 #endif
103 #ifndef MSGMNI
104 #define MSGMNI	40
105 #endif
106 #ifndef MSGTQL
107 #define MSGTQL	40
108 #endif
109 
110 /*
111  * Based on the configuration parameters described in an SVR2 (yes, two)
112  * config(1m) man page.
113  *
114  * Each message is broken up and stored in segments that are msgssz bytes
115  * long.  For efficiency reasons, this should be a power of two.  Also,
116  * it doesn't make sense if it is less than 8 or greater than about 256.
117  * Consequently, msginit in kern/sysv_msg.c checks that msgssz is a power of
118  * two between 8 and 1024 inclusive (and panic's if it isn't).
119  */
120 struct msginfo msginfo = {
121                 MSGMAX,         /* max chars in a message */
122                 MSGMNI,         /* # of message queue identifiers */
123                 MSGMNB,         /* max chars in a queue */
124                 MSGTQL,         /* max messages in system */
125                 MSGSSZ,         /* size of a message segment */
126                 		/* (must be small power of 2 greater than 4) */
127                 MSGSEG          /* number of message segments */
128 };
129 
130 /*
131  * macros to convert between msqid_ds's and msqid's.
132  * (specific to this implementation)
133  */
134 #define MSQID(ix,ds)	((ix) & 0xffff | (((ds).msg_perm.seq << 16) & 0xffff0000))
135 #define MSQID_IX(id)	((id) & 0xffff)
136 #define MSQID_SEQ(id)	(((id) >> 16) & 0xffff)
137 
138 /*
139  * The rest of this file is specific to this particular implementation.
140  */
141 
142 struct msgmap {
143 	short	next;		/* next segment in buffer */
144     				/* -1 -> available */
145     				/* 0..(MSGSEG-1) -> index of next segment */
146 };
147 
148 #define MSG_LOCKED	01000	/* Is this msqid_ds locked? */
149 
150 static int nfree_msgmaps;	/* # of free map entries */
151 static short free_msgmaps;	/* head of linked list of free map entries */
152 static struct msg *free_msghdrs;/* list of free msg headers */
153 static char *msgpool;		/* MSGMAX byte long msg buffer pool */
154 static struct msgmap *msgmaps;	/* MSGSEG msgmap structures */
155 static struct msg *msghdrs;	/* MSGTQL msg headers */
156 static struct msqid_kernel *msqids;	/* MSGMNI msqid_kernel struct's */
157 static struct mtx msq_mtx;	/* global mutex for message queues. */
158 
159 static struct syscall_helper_data msg_syscalls[] = {
160 	SYSCALL_INIT_HELPER(msgctl),
161 	SYSCALL_INIT_HELPER(msgget),
162 	SYSCALL_INIT_HELPER(msgsnd),
163 	SYSCALL_INIT_HELPER(msgrcv),
164 #if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \
165     defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7)
166 	SYSCALL_INIT_HELPER(msgsys),
167 	SYSCALL_INIT_HELPER_COMPAT(freebsd7_msgctl),
168 #endif
169 	SYSCALL_INIT_LAST
170 };
171 
172 #ifdef COMPAT_FREEBSD32
173 #include <compat/freebsd32/freebsd32.h>
174 #include <compat/freebsd32/freebsd32_ipc.h>
175 #include <compat/freebsd32/freebsd32_proto.h>
176 #include <compat/freebsd32/freebsd32_signal.h>
177 #include <compat/freebsd32/freebsd32_syscall.h>
178 #include <compat/freebsd32/freebsd32_util.h>
179 
180 static struct syscall_helper_data msg32_syscalls[] = {
181 	SYSCALL32_INIT_HELPER(freebsd32_msgctl),
182 	SYSCALL32_INIT_HELPER(freebsd32_msgsnd),
183 	SYSCALL32_INIT_HELPER(freebsd32_msgrcv),
184 	SYSCALL32_INIT_HELPER_COMPAT(msgget),
185 	SYSCALL32_INIT_HELPER(freebsd32_msgsys),
186 #if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \
187     defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7)
188 	SYSCALL32_INIT_HELPER(freebsd7_freebsd32_msgctl),
189 #endif
190 	SYSCALL_INIT_LAST
191 };
192 #endif
193 
194 static int
195 msginit()
196 {
197 	int i, error;
198 
199 	TUNABLE_INT_FETCH("kern.ipc.msgseg", &msginfo.msgseg);
200 	TUNABLE_INT_FETCH("kern.ipc.msgssz", &msginfo.msgssz);
201 	msginfo.msgmax = msginfo.msgseg * msginfo.msgssz;
202 	TUNABLE_INT_FETCH("kern.ipc.msgmni", &msginfo.msgmni);
203 	TUNABLE_INT_FETCH("kern.ipc.msgmnb", &msginfo.msgmnb);
204 	TUNABLE_INT_FETCH("kern.ipc.msgtql", &msginfo.msgtql);
205 
206 	msgpool = malloc(msginfo.msgmax, M_MSG, M_WAITOK);
207 	msgmaps = malloc(sizeof(struct msgmap) * msginfo.msgseg, M_MSG, M_WAITOK);
208 	msghdrs = malloc(sizeof(struct msg) * msginfo.msgtql, M_MSG, M_WAITOK);
209 	msqids = malloc(sizeof(struct msqid_kernel) * msginfo.msgmni, M_MSG,
210 	    M_WAITOK);
211 
212 	/*
213 	 * msginfo.msgssz should be a power of two for efficiency reasons.
214 	 * It is also pretty silly if msginfo.msgssz is less than 8
215 	 * or greater than about 256 so ...
216 	 */
217 
218 	i = 8;
219 	while (i < 1024 && i != msginfo.msgssz)
220 		i <<= 1;
221     	if (i != msginfo.msgssz) {
222 		DPRINTF(("msginfo.msgssz=%d (0x%x)\n", msginfo.msgssz,
223 		    msginfo.msgssz));
224 		panic("msginfo.msgssz not a small power of 2");
225 	}
226 
227 	if (msginfo.msgseg > 32767) {
228 		DPRINTF(("msginfo.msgseg=%d\n", msginfo.msgseg));
229 		panic("msginfo.msgseg > 32767");
230 	}
231 
232 	for (i = 0; i < msginfo.msgseg; i++) {
233 		if (i > 0)
234 			msgmaps[i-1].next = i;
235 		msgmaps[i].next = -1;	/* implies entry is available */
236 	}
237 	free_msgmaps = 0;
238 	nfree_msgmaps = msginfo.msgseg;
239 
240 	for (i = 0; i < msginfo.msgtql; i++) {
241 		msghdrs[i].msg_type = 0;
242 		if (i > 0)
243 			msghdrs[i-1].msg_next = &msghdrs[i];
244 		msghdrs[i].msg_next = NULL;
245 #ifdef MAC
246 		mac_sysvmsg_init(&msghdrs[i]);
247 #endif
248     	}
249 	free_msghdrs = &msghdrs[0];
250 
251 	for (i = 0; i < msginfo.msgmni; i++) {
252 		msqids[i].u.msg_qbytes = 0;	/* implies entry is available */
253 		msqids[i].u.msg_perm.seq = 0;	/* reset to a known value */
254 		msqids[i].u.msg_perm.mode = 0;
255 #ifdef MAC
256 		mac_sysvmsq_init(&msqids[i]);
257 #endif
258 	}
259 	mtx_init(&msq_mtx, "msq", NULL, MTX_DEF);
260 
261 	error = syscall_helper_register(msg_syscalls);
262 	if (error != 0)
263 		return (error);
264 #ifdef COMPAT_FREEBSD32
265 	error = syscall32_helper_register(msg32_syscalls);
266 	if (error != 0)
267 		return (error);
268 #endif
269 	return (0);
270 }
271 
272 static int
273 msgunload()
274 {
275 	struct msqid_kernel *msqkptr;
276 	int msqid;
277 #ifdef MAC
278 	int i;
279 #endif
280 
281 	syscall_helper_unregister(msg_syscalls);
282 #ifdef COMPAT_FREEBSD32
283 	syscall32_helper_unregister(msg32_syscalls);
284 #endif
285 
286 	for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
287 		/*
288 		 * Look for an unallocated and unlocked msqid_ds.
289 		 * msqid_ds's can be locked by msgsnd or msgrcv while
290 		 * they are copying the message in/out.  We can't
291 		 * re-use the entry until they release it.
292 		 */
293 		msqkptr = &msqids[msqid];
294 		if (msqkptr->u.msg_qbytes != 0 ||
295 		    (msqkptr->u.msg_perm.mode & MSG_LOCKED) != 0)
296 			break;
297 	}
298 	if (msqid != msginfo.msgmni)
299 		return (EBUSY);
300 
301 #ifdef MAC
302 	for (i = 0; i < msginfo.msgtql; i++)
303 		mac_sysvmsg_destroy(&msghdrs[i]);
304 	for (msqid = 0; msqid < msginfo.msgmni; msqid++)
305 		mac_sysvmsq_destroy(&msqids[msqid]);
306 #endif
307 	free(msgpool, M_MSG);
308 	free(msgmaps, M_MSG);
309 	free(msghdrs, M_MSG);
310 	free(msqids, M_MSG);
311 	mtx_destroy(&msq_mtx);
312 	return (0);
313 }
314 
315 
316 static int
317 sysvmsg_modload(struct module *module, int cmd, void *arg)
318 {
319 	int error = 0;
320 
321 	switch (cmd) {
322 	case MOD_LOAD:
323 		error = msginit();
324 		if (error != 0)
325 			msgunload();
326 		break;
327 	case MOD_UNLOAD:
328 		error = msgunload();
329 		break;
330 	case MOD_SHUTDOWN:
331 		break;
332 	default:
333 		error = EINVAL;
334 		break;
335 	}
336 	return (error);
337 }
338 
339 static moduledata_t sysvmsg_mod = {
340 	"sysvmsg",
341 	&sysvmsg_modload,
342 	NULL
343 };
344 
345 DECLARE_MODULE(sysvmsg, sysvmsg_mod, SI_SUB_SYSV_MSG, SI_ORDER_FIRST);
346 MODULE_VERSION(sysvmsg, 1);
347 
348 static void
349 msg_freehdr(msghdr)
350 	struct msg *msghdr;
351 {
352 	while (msghdr->msg_ts > 0) {
353 		short next;
354 		if (msghdr->msg_spot < 0 || msghdr->msg_spot >= msginfo.msgseg)
355 			panic("msghdr->msg_spot out of range");
356 		next = msgmaps[msghdr->msg_spot].next;
357 		msgmaps[msghdr->msg_spot].next = free_msgmaps;
358 		free_msgmaps = msghdr->msg_spot;
359 		nfree_msgmaps++;
360 		msghdr->msg_spot = next;
361 		if (msghdr->msg_ts >= msginfo.msgssz)
362 			msghdr->msg_ts -= msginfo.msgssz;
363 		else
364 			msghdr->msg_ts = 0;
365 	}
366 	if (msghdr->msg_spot != -1)
367 		panic("msghdr->msg_spot != -1");
368 	msghdr->msg_next = free_msghdrs;
369 	free_msghdrs = msghdr;
370 #ifdef MAC
371 	mac_sysvmsg_cleanup(msghdr);
372 #endif
373 }
374 
375 #ifndef _SYS_SYSPROTO_H_
376 struct msgctl_args {
377 	int	msqid;
378 	int	cmd;
379 	struct	msqid_ds *buf;
380 };
381 #endif
382 int
383 sys_msgctl(td, uap)
384 	struct thread *td;
385 	register struct msgctl_args *uap;
386 {
387 	int msqid = uap->msqid;
388 	int cmd = uap->cmd;
389 	struct msqid_ds msqbuf;
390 	int error;
391 
392 	DPRINTF(("call to msgctl(%d, %d, %p)\n", msqid, cmd, uap->buf));
393 	if (cmd == IPC_SET &&
394 	    (error = copyin(uap->buf, &msqbuf, sizeof(msqbuf))) != 0)
395 		return (error);
396 	error = kern_msgctl(td, msqid, cmd, &msqbuf);
397 	if (cmd == IPC_STAT && error == 0)
398 		error = copyout(&msqbuf, uap->buf, sizeof(struct msqid_ds));
399 	return (error);
400 }
401 
402 int
403 kern_msgctl(td, msqid, cmd, msqbuf)
404 	struct thread *td;
405 	int msqid;
406 	int cmd;
407 	struct msqid_ds *msqbuf;
408 {
409 	int rval, error, msqix;
410 	register struct msqid_kernel *msqkptr;
411 
412 	if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
413 		return (ENOSYS);
414 
415 	msqix = IPCID_TO_IX(msqid);
416 
417 	if (msqix < 0 || msqix >= msginfo.msgmni) {
418 		DPRINTF(("msqid (%d) out of range (0<=msqid<%d)\n", msqix,
419 		    msginfo.msgmni));
420 		return (EINVAL);
421 	}
422 
423 	msqkptr = &msqids[msqix];
424 
425 	mtx_lock(&msq_mtx);
426 	if (msqkptr->u.msg_qbytes == 0) {
427 		DPRINTF(("no such msqid\n"));
428 		error = EINVAL;
429 		goto done2;
430 	}
431 	if (msqkptr->u.msg_perm.seq != IPCID_TO_SEQ(msqid)) {
432 		DPRINTF(("wrong sequence number\n"));
433 		error = EINVAL;
434 		goto done2;
435 	}
436 #ifdef MAC
437 	error = mac_sysvmsq_check_msqctl(td->td_ucred, msqkptr, cmd);
438 	if (error != 0)
439 		goto done2;
440 #endif
441 
442 	error = 0;
443 	rval = 0;
444 
445 	switch (cmd) {
446 
447 	case IPC_RMID:
448 	{
449 		struct msg *msghdr;
450 		if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_M)))
451 			goto done2;
452 
453 #ifdef MAC
454 		/*
455 		 * Check that the thread has MAC access permissions to
456 		 * individual msghdrs.  Note: We need to do this in a
457 		 * separate loop because the actual loop alters the
458 		 * msq/msghdr info as it progresses, and there is no going
459 		 * back if half the way through we discover that the
460 		 * thread cannot free a certain msghdr.  The msq will get
461 		 * into an inconsistent state.
462 		 */
463 		for (msghdr = msqkptr->u.msg_first; msghdr != NULL;
464 		    msghdr = msghdr->msg_next) {
465 			error = mac_sysvmsq_check_msgrmid(td->td_ucred, msghdr);
466 			if (error != 0)
467 				goto done2;
468 		}
469 #endif
470 
471 		racct_sub_cred(msqkptr->cred, RACCT_NMSGQ, 1);
472 		racct_sub_cred(msqkptr->cred, RACCT_MSGQQUEUED, msqkptr->u.msg_qnum);
473 		racct_sub_cred(msqkptr->cred, RACCT_MSGQSIZE, msqkptr->u.msg_cbytes);
474 		crfree(msqkptr->cred);
475 		msqkptr->cred = NULL;
476 
477 		/* Free the message headers */
478 		msghdr = msqkptr->u.msg_first;
479 		while (msghdr != NULL) {
480 			struct msg *msghdr_tmp;
481 
482 			/* Free the segments of each message */
483 			msqkptr->u.msg_cbytes -= msghdr->msg_ts;
484 			msqkptr->u.msg_qnum--;
485 			msghdr_tmp = msghdr;
486 			msghdr = msghdr->msg_next;
487 			msg_freehdr(msghdr_tmp);
488 		}
489 
490 		if (msqkptr->u.msg_cbytes != 0)
491 			panic("msg_cbytes is screwed up");
492 		if (msqkptr->u.msg_qnum != 0)
493 			panic("msg_qnum is screwed up");
494 
495 		msqkptr->u.msg_qbytes = 0;	/* Mark it as free */
496 
497 #ifdef MAC
498 		mac_sysvmsq_cleanup(msqkptr);
499 #endif
500 
501 		wakeup(msqkptr);
502 	}
503 
504 		break;
505 
506 	case IPC_SET:
507 		if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_M)))
508 			goto done2;
509 		if (msqbuf->msg_qbytes > msqkptr->u.msg_qbytes) {
510 			error = priv_check(td, PRIV_IPC_MSGSIZE);
511 			if (error)
512 				goto done2;
513 		}
514 		if (msqbuf->msg_qbytes > msginfo.msgmnb) {
515 			DPRINTF(("can't increase msg_qbytes beyond %d"
516 			    "(truncating)\n", msginfo.msgmnb));
517 			msqbuf->msg_qbytes = msginfo.msgmnb;	/* silently restrict qbytes to system limit */
518 		}
519 		if (msqbuf->msg_qbytes == 0) {
520 			DPRINTF(("can't reduce msg_qbytes to 0\n"));
521 			error = EINVAL;		/* non-standard errno! */
522 			goto done2;
523 		}
524 		msqkptr->u.msg_perm.uid = msqbuf->msg_perm.uid;	/* change the owner */
525 		msqkptr->u.msg_perm.gid = msqbuf->msg_perm.gid;	/* change the owner */
526 		msqkptr->u.msg_perm.mode = (msqkptr->u.msg_perm.mode & ~0777) |
527 		    (msqbuf->msg_perm.mode & 0777);
528 		msqkptr->u.msg_qbytes = msqbuf->msg_qbytes;
529 		msqkptr->u.msg_ctime = time_second;
530 		break;
531 
532 	case IPC_STAT:
533 		if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_R))) {
534 			DPRINTF(("requester doesn't have read access\n"));
535 			goto done2;
536 		}
537 		*msqbuf = msqkptr->u;
538 		break;
539 
540 	default:
541 		DPRINTF(("invalid command %d\n", cmd));
542 		error = EINVAL;
543 		goto done2;
544 	}
545 
546 	if (error == 0)
547 		td->td_retval[0] = rval;
548 done2:
549 	mtx_unlock(&msq_mtx);
550 	return (error);
551 }
552 
553 #ifndef _SYS_SYSPROTO_H_
554 struct msgget_args {
555 	key_t	key;
556 	int	msgflg;
557 };
558 #endif
559 
560 int
561 sys_msgget(td, uap)
562 	struct thread *td;
563 	register struct msgget_args *uap;
564 {
565 	int msqid, error = 0;
566 	int key = uap->key;
567 	int msgflg = uap->msgflg;
568 	struct ucred *cred = td->td_ucred;
569 	register struct msqid_kernel *msqkptr = NULL;
570 
571 	DPRINTF(("msgget(0x%x, 0%o)\n", key, msgflg));
572 
573 	if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
574 		return (ENOSYS);
575 
576 	mtx_lock(&msq_mtx);
577 	if (key != IPC_PRIVATE) {
578 		for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
579 			msqkptr = &msqids[msqid];
580 			if (msqkptr->u.msg_qbytes != 0 &&
581 			    msqkptr->u.msg_perm.key == key)
582 				break;
583 		}
584 		if (msqid < msginfo.msgmni) {
585 			DPRINTF(("found public key\n"));
586 			if ((msgflg & IPC_CREAT) && (msgflg & IPC_EXCL)) {
587 				DPRINTF(("not exclusive\n"));
588 				error = EEXIST;
589 				goto done2;
590 			}
591 			if ((error = ipcperm(td, &msqkptr->u.msg_perm,
592 			    msgflg & 0700))) {
593 				DPRINTF(("requester doesn't have 0%o access\n",
594 				    msgflg & 0700));
595 				goto done2;
596 			}
597 #ifdef MAC
598 			error = mac_sysvmsq_check_msqget(cred, msqkptr);
599 			if (error != 0)
600 				goto done2;
601 #endif
602 			goto found;
603 		}
604 	}
605 
606 	DPRINTF(("need to allocate the msqid_ds\n"));
607 	if (key == IPC_PRIVATE || (msgflg & IPC_CREAT)) {
608 		for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
609 			/*
610 			 * Look for an unallocated and unlocked msqid_ds.
611 			 * msqid_ds's can be locked by msgsnd or msgrcv while
612 			 * they are copying the message in/out.  We can't
613 			 * re-use the entry until they release it.
614 			 */
615 			msqkptr = &msqids[msqid];
616 			if (msqkptr->u.msg_qbytes == 0 &&
617 			    (msqkptr->u.msg_perm.mode & MSG_LOCKED) == 0)
618 				break;
619 		}
620 		if (msqid == msginfo.msgmni) {
621 			DPRINTF(("no more msqid_ds's available\n"));
622 			error = ENOSPC;
623 			goto done2;
624 		}
625 #ifdef RACCT
626 		PROC_LOCK(td->td_proc);
627 		error = racct_add(td->td_proc, RACCT_NMSGQ, 1);
628 		PROC_UNLOCK(td->td_proc);
629 		if (error != 0) {
630 			error = ENOSPC;
631 			goto done2;
632 		}
633 #endif
634 		DPRINTF(("msqid %d is available\n", msqid));
635 		msqkptr->u.msg_perm.key = key;
636 		msqkptr->u.msg_perm.cuid = cred->cr_uid;
637 		msqkptr->u.msg_perm.uid = cred->cr_uid;
638 		msqkptr->u.msg_perm.cgid = cred->cr_gid;
639 		msqkptr->u.msg_perm.gid = cred->cr_gid;
640 		msqkptr->u.msg_perm.mode = (msgflg & 0777);
641 		msqkptr->cred = crhold(cred);
642 		/* Make sure that the returned msqid is unique */
643 		msqkptr->u.msg_perm.seq = (msqkptr->u.msg_perm.seq + 1) & 0x7fff;
644 		msqkptr->u.msg_first = NULL;
645 		msqkptr->u.msg_last = NULL;
646 		msqkptr->u.msg_cbytes = 0;
647 		msqkptr->u.msg_qnum = 0;
648 		msqkptr->u.msg_qbytes = msginfo.msgmnb;
649 		msqkptr->u.msg_lspid = 0;
650 		msqkptr->u.msg_lrpid = 0;
651 		msqkptr->u.msg_stime = 0;
652 		msqkptr->u.msg_rtime = 0;
653 		msqkptr->u.msg_ctime = time_second;
654 #ifdef MAC
655 		mac_sysvmsq_create(cred, msqkptr);
656 #endif
657 	} else {
658 		DPRINTF(("didn't find it and wasn't asked to create it\n"));
659 		error = ENOENT;
660 		goto done2;
661 	}
662 
663 found:
664 	/* Construct the unique msqid */
665 	td->td_retval[0] = IXSEQ_TO_IPCID(msqid, msqkptr->u.msg_perm);
666 done2:
667 	mtx_unlock(&msq_mtx);
668 	return (error);
669 }
670 
671 #ifndef _SYS_SYSPROTO_H_
672 struct msgsnd_args {
673 	int	msqid;
674 	const void	*msgp;
675 	size_t	msgsz;
676 	int	msgflg;
677 };
678 #endif
679 int
680 kern_msgsnd(td, msqid, msgp, msgsz, msgflg, mtype)
681 	struct thread *td;
682 	int msqid;
683 	const void *msgp;	/* XXX msgp is actually mtext. */
684 	size_t msgsz;
685 	int msgflg;
686 	long mtype;
687 {
688 	int msqix, segs_needed, error = 0;
689 	register struct msqid_kernel *msqkptr;
690 	register struct msg *msghdr;
691 	short next;
692 #ifdef RACCT
693 	size_t saved_msgsz;
694 #endif
695 
696 	if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
697 		return (ENOSYS);
698 
699 	mtx_lock(&msq_mtx);
700 	msqix = IPCID_TO_IX(msqid);
701 
702 	if (msqix < 0 || msqix >= msginfo.msgmni) {
703 		DPRINTF(("msqid (%d) out of range (0<=msqid<%d)\n", msqix,
704 		    msginfo.msgmni));
705 		error = EINVAL;
706 		goto done2;
707 	}
708 
709 	msqkptr = &msqids[msqix];
710 	if (msqkptr->u.msg_qbytes == 0) {
711 		DPRINTF(("no such message queue id\n"));
712 		error = EINVAL;
713 		goto done2;
714 	}
715 	if (msqkptr->u.msg_perm.seq != IPCID_TO_SEQ(msqid)) {
716 		DPRINTF(("wrong sequence number\n"));
717 		error = EINVAL;
718 		goto done2;
719 	}
720 
721 	if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_W))) {
722 		DPRINTF(("requester doesn't have write access\n"));
723 		goto done2;
724 	}
725 
726 #ifdef MAC
727 	error = mac_sysvmsq_check_msqsnd(td->td_ucred, msqkptr);
728 	if (error != 0)
729 		goto done2;
730 #endif
731 
732 #ifdef RACCT
733 	PROC_LOCK(td->td_proc);
734 	if (racct_add(td->td_proc, RACCT_MSGQQUEUED, 1)) {
735 		PROC_UNLOCK(td->td_proc);
736 		error = EAGAIN;
737 		goto done2;
738 	}
739 	saved_msgsz = msgsz;
740 	if (racct_add(td->td_proc, RACCT_MSGQSIZE, msgsz)) {
741 		racct_sub(td->td_proc, RACCT_MSGQQUEUED, 1);
742 		PROC_UNLOCK(td->td_proc);
743 		error = EAGAIN;
744 		goto done2;
745 	}
746 	PROC_UNLOCK(td->td_proc);
747 #endif
748 
749 	segs_needed = (msgsz + msginfo.msgssz - 1) / msginfo.msgssz;
750 	DPRINTF(("msgsz=%zu, msgssz=%d, segs_needed=%d\n", msgsz,
751 	    msginfo.msgssz, segs_needed));
752 	for (;;) {
753 		int need_more_resources = 0;
754 
755 		/*
756 		 * check msgsz
757 		 * (inside this loop in case msg_qbytes changes while we sleep)
758 		 */
759 
760 		if (msgsz > msqkptr->u.msg_qbytes) {
761 			DPRINTF(("msgsz > msqkptr->u.msg_qbytes\n"));
762 			error = EINVAL;
763 			goto done3;
764 		}
765 
766 		if (msqkptr->u.msg_perm.mode & MSG_LOCKED) {
767 			DPRINTF(("msqid is locked\n"));
768 			need_more_resources = 1;
769 		}
770 		if (msgsz + msqkptr->u.msg_cbytes > msqkptr->u.msg_qbytes) {
771 			DPRINTF(("msgsz + msg_cbytes > msg_qbytes\n"));
772 			need_more_resources = 1;
773 		}
774 		if (segs_needed > nfree_msgmaps) {
775 			DPRINTF(("segs_needed > nfree_msgmaps\n"));
776 			need_more_resources = 1;
777 		}
778 		if (free_msghdrs == NULL) {
779 			DPRINTF(("no more msghdrs\n"));
780 			need_more_resources = 1;
781 		}
782 
783 		if (need_more_resources) {
784 			int we_own_it;
785 
786 			if ((msgflg & IPC_NOWAIT) != 0) {
787 				DPRINTF(("need more resources but caller "
788 				    "doesn't want to wait\n"));
789 				error = EAGAIN;
790 				goto done3;
791 			}
792 
793 			if ((msqkptr->u.msg_perm.mode & MSG_LOCKED) != 0) {
794 				DPRINTF(("we don't own the msqid_ds\n"));
795 				we_own_it = 0;
796 			} else {
797 				/* Force later arrivals to wait for our
798 				   request */
799 				DPRINTF(("we own the msqid_ds\n"));
800 				msqkptr->u.msg_perm.mode |= MSG_LOCKED;
801 				we_own_it = 1;
802 			}
803 			DPRINTF(("msgsnd:  goodnight\n"));
804 			error = msleep(msqkptr, &msq_mtx, (PZERO - 4) | PCATCH,
805 			    "msgsnd", hz);
806 			DPRINTF(("msgsnd:  good morning, error=%d\n", error));
807 			if (we_own_it)
808 				msqkptr->u.msg_perm.mode &= ~MSG_LOCKED;
809 			if (error == EWOULDBLOCK) {
810 				DPRINTF(("msgsnd:  timed out\n"));
811 				continue;
812 			}
813 			if (error != 0) {
814 				DPRINTF(("msgsnd:  interrupted system call\n"));
815 				error = EINTR;
816 				goto done3;
817 			}
818 
819 			/*
820 			 * Make sure that the msq queue still exists
821 			 */
822 
823 			if (msqkptr->u.msg_qbytes == 0) {
824 				DPRINTF(("msqid deleted\n"));
825 				error = EIDRM;
826 				goto done3;
827 			}
828 
829 		} else {
830 			DPRINTF(("got all the resources that we need\n"));
831 			break;
832 		}
833 	}
834 
835 	/*
836 	 * We have the resources that we need.
837 	 * Make sure!
838 	 */
839 
840 	if (msqkptr->u.msg_perm.mode & MSG_LOCKED)
841 		panic("msg_perm.mode & MSG_LOCKED");
842 	if (segs_needed > nfree_msgmaps)
843 		panic("segs_needed > nfree_msgmaps");
844 	if (msgsz + msqkptr->u.msg_cbytes > msqkptr->u.msg_qbytes)
845 		panic("msgsz + msg_cbytes > msg_qbytes");
846 	if (free_msghdrs == NULL)
847 		panic("no more msghdrs");
848 
849 	/*
850 	 * Re-lock the msqid_ds in case we page-fault when copying in the
851 	 * message
852 	 */
853 
854 	if ((msqkptr->u.msg_perm.mode & MSG_LOCKED) != 0)
855 		panic("msqid_ds is already locked");
856 	msqkptr->u.msg_perm.mode |= MSG_LOCKED;
857 
858 	/*
859 	 * Allocate a message header
860 	 */
861 
862 	msghdr = free_msghdrs;
863 	free_msghdrs = msghdr->msg_next;
864 	msghdr->msg_spot = -1;
865 	msghdr->msg_ts = msgsz;
866 	msghdr->msg_type = mtype;
867 #ifdef MAC
868 	/*
869 	 * XXXMAC: Should the mac_sysvmsq_check_msgmsq check follow here
870 	 * immediately?  Or, should it be checked just before the msg is
871 	 * enqueued in the msgq (as it is done now)?
872 	 */
873 	mac_sysvmsg_create(td->td_ucred, msqkptr, msghdr);
874 #endif
875 
876 	/*
877 	 * Allocate space for the message
878 	 */
879 
880 	while (segs_needed > 0) {
881 		if (nfree_msgmaps <= 0)
882 			panic("not enough msgmaps");
883 		if (free_msgmaps == -1)
884 			panic("nil free_msgmaps");
885 		next = free_msgmaps;
886 		if (next <= -1)
887 			panic("next too low #1");
888 		if (next >= msginfo.msgseg)
889 			panic("next out of range #1");
890 		DPRINTF(("allocating segment %d to message\n", next));
891 		free_msgmaps = msgmaps[next].next;
892 		nfree_msgmaps--;
893 		msgmaps[next].next = msghdr->msg_spot;
894 		msghdr->msg_spot = next;
895 		segs_needed--;
896 	}
897 
898 	/*
899 	 * Validate the message type
900 	 */
901 
902 	if (msghdr->msg_type < 1) {
903 		msg_freehdr(msghdr);
904 		msqkptr->u.msg_perm.mode &= ~MSG_LOCKED;
905 		wakeup(msqkptr);
906 		DPRINTF(("mtype (%ld) < 1\n", msghdr->msg_type));
907 		error = EINVAL;
908 		goto done3;
909 	}
910 
911 	/*
912 	 * Copy in the message body
913 	 */
914 
915 	next = msghdr->msg_spot;
916 	while (msgsz > 0) {
917 		size_t tlen;
918 		if (msgsz > msginfo.msgssz)
919 			tlen = msginfo.msgssz;
920 		else
921 			tlen = msgsz;
922 		if (next <= -1)
923 			panic("next too low #2");
924 		if (next >= msginfo.msgseg)
925 			panic("next out of range #2");
926 		mtx_unlock(&msq_mtx);
927 		if ((error = copyin(msgp, &msgpool[next * msginfo.msgssz],
928 		    tlen)) != 0) {
929 			mtx_lock(&msq_mtx);
930 			DPRINTF(("error %d copying in message segment\n",
931 			    error));
932 			msg_freehdr(msghdr);
933 			msqkptr->u.msg_perm.mode &= ~MSG_LOCKED;
934 			wakeup(msqkptr);
935 			goto done3;
936 		}
937 		mtx_lock(&msq_mtx);
938 		msgsz -= tlen;
939 		msgp = (const char *)msgp + tlen;
940 		next = msgmaps[next].next;
941 	}
942 	if (next != -1)
943 		panic("didn't use all the msg segments");
944 
945 	/*
946 	 * We've got the message.  Unlock the msqid_ds.
947 	 */
948 
949 	msqkptr->u.msg_perm.mode &= ~MSG_LOCKED;
950 
951 	/*
952 	 * Make sure that the msqid_ds is still allocated.
953 	 */
954 
955 	if (msqkptr->u.msg_qbytes == 0) {
956 		msg_freehdr(msghdr);
957 		wakeup(msqkptr);
958 		error = EIDRM;
959 		goto done3;
960 	}
961 
962 #ifdef MAC
963 	/*
964 	 * Note: Since the task/thread allocates the msghdr and usually
965 	 * primes it with its own MAC label, for a majority of policies, it
966 	 * won't be necessary to check whether the msghdr has access
967 	 * permissions to the msgq.  The mac_sysvmsq_check_msqsnd check would
968 	 * suffice in that case.  However, this hook may be required where
969 	 * individual policies derive a non-identical label for the msghdr
970 	 * from the current thread label and may want to check the msghdr
971 	 * enqueue permissions, along with read/write permissions to the
972 	 * msgq.
973 	 */
974 	error = mac_sysvmsq_check_msgmsq(td->td_ucred, msghdr, msqkptr);
975 	if (error != 0) {
976 		msg_freehdr(msghdr);
977 		wakeup(msqkptr);
978 		goto done3;
979 	}
980 #endif
981 
982 	/*
983 	 * Put the message into the queue
984 	 */
985 	if (msqkptr->u.msg_first == NULL) {
986 		msqkptr->u.msg_first = msghdr;
987 		msqkptr->u.msg_last = msghdr;
988 	} else {
989 		msqkptr->u.msg_last->msg_next = msghdr;
990 		msqkptr->u.msg_last = msghdr;
991 	}
992 	msqkptr->u.msg_last->msg_next = NULL;
993 
994 	msqkptr->u.msg_cbytes += msghdr->msg_ts;
995 	msqkptr->u.msg_qnum++;
996 	msqkptr->u.msg_lspid = td->td_proc->p_pid;
997 	msqkptr->u.msg_stime = time_second;
998 
999 	wakeup(msqkptr);
1000 	td->td_retval[0] = 0;
1001 done3:
1002 #ifdef RACCT
1003 	if (error != 0) {
1004 		PROC_LOCK(td->td_proc);
1005 		racct_sub(td->td_proc, RACCT_MSGQQUEUED, 1);
1006 		racct_sub(td->td_proc, RACCT_MSGQSIZE, saved_msgsz);
1007 		PROC_UNLOCK(td->td_proc);
1008 	}
1009 #endif
1010 done2:
1011 	mtx_unlock(&msq_mtx);
1012 	return (error);
1013 }
1014 
1015 int
1016 sys_msgsnd(td, uap)
1017 	struct thread *td;
1018 	register struct msgsnd_args *uap;
1019 {
1020 	int error;
1021 	long mtype;
1022 
1023 	DPRINTF(("call to msgsnd(%d, %p, %zu, %d)\n", uap->msqid, uap->msgp,
1024 	    uap->msgsz, uap->msgflg));
1025 
1026 	if ((error = copyin(uap->msgp, &mtype, sizeof(mtype))) != 0) {
1027 		DPRINTF(("error %d copying the message type\n", error));
1028 		return (error);
1029 	}
1030 	return (kern_msgsnd(td, uap->msqid,
1031 	    (const char *)uap->msgp + sizeof(mtype),
1032 	    uap->msgsz, uap->msgflg, mtype));
1033 }
1034 
1035 #ifndef _SYS_SYSPROTO_H_
1036 struct msgrcv_args {
1037 	int	msqid;
1038 	void	*msgp;
1039 	size_t	msgsz;
1040 	long	msgtyp;
1041 	int	msgflg;
1042 };
1043 #endif
1044 int
1045 kern_msgrcv(td, msqid, msgp, msgsz, msgtyp, msgflg, mtype)
1046 	struct thread *td;
1047 	int msqid;
1048 	void *msgp;	/* XXX msgp is actually mtext. */
1049 	size_t msgsz;
1050 	long msgtyp;
1051 	int msgflg;
1052 	long *mtype;
1053 {
1054 	size_t len;
1055 	register struct msqid_kernel *msqkptr;
1056 	register struct msg *msghdr;
1057 	int msqix, error = 0;
1058 	short next;
1059 
1060 	if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
1061 		return (ENOSYS);
1062 
1063 	msqix = IPCID_TO_IX(msqid);
1064 
1065 	if (msqix < 0 || msqix >= msginfo.msgmni) {
1066 		DPRINTF(("msqid (%d) out of range (0<=msqid<%d)\n", msqix,
1067 		    msginfo.msgmni));
1068 		return (EINVAL);
1069 	}
1070 
1071 	msqkptr = &msqids[msqix];
1072 	mtx_lock(&msq_mtx);
1073 	if (msqkptr->u.msg_qbytes == 0) {
1074 		DPRINTF(("no such message queue id\n"));
1075 		error = EINVAL;
1076 		goto done2;
1077 	}
1078 	if (msqkptr->u.msg_perm.seq != IPCID_TO_SEQ(msqid)) {
1079 		DPRINTF(("wrong sequence number\n"));
1080 		error = EINVAL;
1081 		goto done2;
1082 	}
1083 
1084 	if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_R))) {
1085 		DPRINTF(("requester doesn't have read access\n"));
1086 		goto done2;
1087 	}
1088 
1089 #ifdef MAC
1090 	error = mac_sysvmsq_check_msqrcv(td->td_ucred, msqkptr);
1091 	if (error != 0)
1092 		goto done2;
1093 #endif
1094 
1095 	msghdr = NULL;
1096 	while (msghdr == NULL) {
1097 		if (msgtyp == 0) {
1098 			msghdr = msqkptr->u.msg_first;
1099 			if (msghdr != NULL) {
1100 				if (msgsz < msghdr->msg_ts &&
1101 				    (msgflg & MSG_NOERROR) == 0) {
1102 					DPRINTF(("first message on the queue "
1103 					    "is too big (want %zu, got %d)\n",
1104 					    msgsz, msghdr->msg_ts));
1105 					error = E2BIG;
1106 					goto done2;
1107 				}
1108 #ifdef MAC
1109 				error = mac_sysvmsq_check_msgrcv(td->td_ucred,
1110 				    msghdr);
1111 				if (error != 0)
1112 					goto done2;
1113 #endif
1114 				if (msqkptr->u.msg_first == msqkptr->u.msg_last) {
1115 					msqkptr->u.msg_first = NULL;
1116 					msqkptr->u.msg_last = NULL;
1117 				} else {
1118 					msqkptr->u.msg_first = msghdr->msg_next;
1119 					if (msqkptr->u.msg_first == NULL)
1120 						panic("msg_first/last screwed up #1");
1121 				}
1122 			}
1123 		} else {
1124 			struct msg *previous;
1125 			struct msg **prev;
1126 
1127 			previous = NULL;
1128 			prev = &(msqkptr->u.msg_first);
1129 			while ((msghdr = *prev) != NULL) {
1130 				/*
1131 				 * Is this message's type an exact match or is
1132 				 * this message's type less than or equal to
1133 				 * the absolute value of a negative msgtyp?
1134 				 * Note that the second half of this test can
1135 				 * NEVER be true if msgtyp is positive since
1136 				 * msg_type is always positive!
1137 				 */
1138 
1139 				if (msgtyp == msghdr->msg_type ||
1140 				    msghdr->msg_type <= -msgtyp) {
1141 					DPRINTF(("found message type %ld, "
1142 					    "requested %ld\n",
1143 					    msghdr->msg_type, msgtyp));
1144 					if (msgsz < msghdr->msg_ts &&
1145 					    (msgflg & MSG_NOERROR) == 0) {
1146 						DPRINTF(("requested message "
1147 						    "on the queue is too big "
1148 						    "(want %zu, got %hu)\n",
1149 						    msgsz, msghdr->msg_ts));
1150 						error = E2BIG;
1151 						goto done2;
1152 					}
1153 #ifdef MAC
1154 					error = mac_sysvmsq_check_msgrcv(
1155 					    td->td_ucred, msghdr);
1156 					if (error != 0)
1157 						goto done2;
1158 #endif
1159 					*prev = msghdr->msg_next;
1160 					if (msghdr == msqkptr->u.msg_last) {
1161 						if (previous == NULL) {
1162 							if (prev !=
1163 							    &msqkptr->u.msg_first)
1164 								panic("msg_first/last screwed up #2");
1165 							msqkptr->u.msg_first =
1166 							    NULL;
1167 							msqkptr->u.msg_last =
1168 							    NULL;
1169 						} else {
1170 							if (prev ==
1171 							    &msqkptr->u.msg_first)
1172 								panic("msg_first/last screwed up #3");
1173 							msqkptr->u.msg_last =
1174 							    previous;
1175 						}
1176 					}
1177 					break;
1178 				}
1179 				previous = msghdr;
1180 				prev = &(msghdr->msg_next);
1181 			}
1182 		}
1183 
1184 		/*
1185 		 * We've either extracted the msghdr for the appropriate
1186 		 * message or there isn't one.
1187 		 * If there is one then bail out of this loop.
1188 		 */
1189 
1190 		if (msghdr != NULL)
1191 			break;
1192 
1193 		/*
1194 		 * Hmph!  No message found.  Does the user want to wait?
1195 		 */
1196 
1197 		if ((msgflg & IPC_NOWAIT) != 0) {
1198 			DPRINTF(("no appropriate message found (msgtyp=%ld)\n",
1199 			    msgtyp));
1200 			/* The SVID says to return ENOMSG. */
1201 			error = ENOMSG;
1202 			goto done2;
1203 		}
1204 
1205 		/*
1206 		 * Wait for something to happen
1207 		 */
1208 
1209 		DPRINTF(("msgrcv:  goodnight\n"));
1210 		error = msleep(msqkptr, &msq_mtx, (PZERO - 4) | PCATCH,
1211 		    "msgrcv", 0);
1212 		DPRINTF(("msgrcv:  good morning (error=%d)\n", error));
1213 
1214 		if (error != 0) {
1215 			DPRINTF(("msgrcv:  interrupted system call\n"));
1216 			error = EINTR;
1217 			goto done2;
1218 		}
1219 
1220 		/*
1221 		 * Make sure that the msq queue still exists
1222 		 */
1223 
1224 		if (msqkptr->u.msg_qbytes == 0 ||
1225 		    msqkptr->u.msg_perm.seq != IPCID_TO_SEQ(msqid)) {
1226 			DPRINTF(("msqid deleted\n"));
1227 			error = EIDRM;
1228 			goto done2;
1229 		}
1230 	}
1231 
1232 	/*
1233 	 * Return the message to the user.
1234 	 *
1235 	 * First, do the bookkeeping (before we risk being interrupted).
1236 	 */
1237 
1238 	msqkptr->u.msg_cbytes -= msghdr->msg_ts;
1239 	msqkptr->u.msg_qnum--;
1240 	msqkptr->u.msg_lrpid = td->td_proc->p_pid;
1241 	msqkptr->u.msg_rtime = time_second;
1242 
1243 	racct_sub_cred(msqkptr->cred, RACCT_MSGQQUEUED, 1);
1244 	racct_sub_cred(msqkptr->cred, RACCT_MSGQSIZE, msghdr->msg_ts);
1245 
1246 	/*
1247 	 * Make msgsz the actual amount that we'll be returning.
1248 	 * Note that this effectively truncates the message if it is too long
1249 	 * (since msgsz is never increased).
1250 	 */
1251 
1252 	DPRINTF(("found a message, msgsz=%zu, msg_ts=%hu\n", msgsz,
1253 	    msghdr->msg_ts));
1254 	if (msgsz > msghdr->msg_ts)
1255 		msgsz = msghdr->msg_ts;
1256 	*mtype = msghdr->msg_type;
1257 
1258 	/*
1259 	 * Return the segments to the user
1260 	 */
1261 
1262 	next = msghdr->msg_spot;
1263 	for (len = 0; len < msgsz; len += msginfo.msgssz) {
1264 		size_t tlen;
1265 
1266 		if (msgsz - len > msginfo.msgssz)
1267 			tlen = msginfo.msgssz;
1268 		else
1269 			tlen = msgsz - len;
1270 		if (next <= -1)
1271 			panic("next too low #3");
1272 		if (next >= msginfo.msgseg)
1273 			panic("next out of range #3");
1274 		mtx_unlock(&msq_mtx);
1275 		error = copyout(&msgpool[next * msginfo.msgssz], msgp, tlen);
1276 		mtx_lock(&msq_mtx);
1277 		if (error != 0) {
1278 			DPRINTF(("error (%d) copying out message segment\n",
1279 			    error));
1280 			msg_freehdr(msghdr);
1281 			wakeup(msqkptr);
1282 			goto done2;
1283 		}
1284 		msgp = (char *)msgp + tlen;
1285 		next = msgmaps[next].next;
1286 	}
1287 
1288 	/*
1289 	 * Done, return the actual number of bytes copied out.
1290 	 */
1291 
1292 	msg_freehdr(msghdr);
1293 	wakeup(msqkptr);
1294 	td->td_retval[0] = msgsz;
1295 done2:
1296 	mtx_unlock(&msq_mtx);
1297 	return (error);
1298 }
1299 
1300 int
1301 sys_msgrcv(td, uap)
1302 	struct thread *td;
1303 	register struct msgrcv_args *uap;
1304 {
1305 	int error;
1306 	long mtype;
1307 
1308 	DPRINTF(("call to msgrcv(%d, %p, %zu, %ld, %d)\n", uap->msqid,
1309 	    uap->msgp, uap->msgsz, uap->msgtyp, uap->msgflg));
1310 
1311 	if ((error = kern_msgrcv(td, uap->msqid,
1312 	    (char *)uap->msgp + sizeof(mtype), uap->msgsz,
1313 	    uap->msgtyp, uap->msgflg, &mtype)) != 0)
1314 		return (error);
1315 	if ((error = copyout(&mtype, uap->msgp, sizeof(mtype))) != 0)
1316 		DPRINTF(("error %d copying the message type\n", error));
1317 	return (error);
1318 }
1319 
1320 static int
1321 sysctl_msqids(SYSCTL_HANDLER_ARGS)
1322 {
1323 
1324 	return (SYSCTL_OUT(req, msqids,
1325 	    sizeof(struct msqid_kernel) * msginfo.msgmni));
1326 }
1327 
1328 SYSCTL_INT(_kern_ipc, OID_AUTO, msgmax, CTLFLAG_RD, &msginfo.msgmax, 0,
1329     "Maximum message size");
1330 SYSCTL_INT(_kern_ipc, OID_AUTO, msgmni, CTLFLAG_RDTUN, &msginfo.msgmni, 0,
1331     "Number of message queue identifiers");
1332 SYSCTL_INT(_kern_ipc, OID_AUTO, msgmnb, CTLFLAG_RDTUN, &msginfo.msgmnb, 0,
1333     "Maximum number of bytes in a queue");
1334 SYSCTL_INT(_kern_ipc, OID_AUTO, msgtql, CTLFLAG_RDTUN, &msginfo.msgtql, 0,
1335     "Maximum number of messages in the system");
1336 SYSCTL_INT(_kern_ipc, OID_AUTO, msgssz, CTLFLAG_RDTUN, &msginfo.msgssz, 0,
1337     "Size of a message segment");
1338 SYSCTL_INT(_kern_ipc, OID_AUTO, msgseg, CTLFLAG_RDTUN, &msginfo.msgseg, 0,
1339     "Number of message segments");
1340 SYSCTL_PROC(_kern_ipc, OID_AUTO, msqids, CTLTYPE_OPAQUE | CTLFLAG_RD,
1341     NULL, 0, sysctl_msqids, "", "Message queue IDs");
1342 
1343 #ifdef COMPAT_FREEBSD32
1344 int
1345 freebsd32_msgsys(struct thread *td, struct freebsd32_msgsys_args *uap)
1346 {
1347 
1348 #if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \
1349     defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7)
1350 	switch (uap->which) {
1351 	case 0:
1352 		return (freebsd7_freebsd32_msgctl(td,
1353 		    (struct freebsd7_freebsd32_msgctl_args *)&uap->a2));
1354 	case 2:
1355 		return (freebsd32_msgsnd(td,
1356 		    (struct freebsd32_msgsnd_args *)&uap->a2));
1357 	case 3:
1358 		return (freebsd32_msgrcv(td,
1359 		    (struct freebsd32_msgrcv_args *)&uap->a2));
1360 	default:
1361 		return (sys_msgsys(td, (struct msgsys_args *)uap));
1362 	}
1363 #else
1364 	return (nosys(td, NULL));
1365 #endif
1366 }
1367 
1368 #if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \
1369     defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7)
1370 int
1371 freebsd7_freebsd32_msgctl(struct thread *td,
1372     struct freebsd7_freebsd32_msgctl_args *uap)
1373 {
1374 	struct msqid_ds msqbuf;
1375 	struct msqid_ds32_old msqbuf32;
1376 	int error;
1377 
1378 	if (uap->cmd == IPC_SET) {
1379 		error = copyin(uap->buf, &msqbuf32, sizeof(msqbuf32));
1380 		if (error)
1381 			return (error);
1382 		freebsd32_ipcperm_old_in(&msqbuf32.msg_perm, &msqbuf.msg_perm);
1383 		PTRIN_CP(msqbuf32, msqbuf, msg_first);
1384 		PTRIN_CP(msqbuf32, msqbuf, msg_last);
1385 		CP(msqbuf32, msqbuf, msg_cbytes);
1386 		CP(msqbuf32, msqbuf, msg_qnum);
1387 		CP(msqbuf32, msqbuf, msg_qbytes);
1388 		CP(msqbuf32, msqbuf, msg_lspid);
1389 		CP(msqbuf32, msqbuf, msg_lrpid);
1390 		CP(msqbuf32, msqbuf, msg_stime);
1391 		CP(msqbuf32, msqbuf, msg_rtime);
1392 		CP(msqbuf32, msqbuf, msg_ctime);
1393 	}
1394 	error = kern_msgctl(td, uap->msqid, uap->cmd, &msqbuf);
1395 	if (error)
1396 		return (error);
1397 	if (uap->cmd == IPC_STAT) {
1398 		bzero(&msqbuf32, sizeof(msqbuf32));
1399 		freebsd32_ipcperm_old_out(&msqbuf.msg_perm, &msqbuf32.msg_perm);
1400 		PTROUT_CP(msqbuf, msqbuf32, msg_first);
1401 		PTROUT_CP(msqbuf, msqbuf32, msg_last);
1402 		CP(msqbuf, msqbuf32, msg_cbytes);
1403 		CP(msqbuf, msqbuf32, msg_qnum);
1404 		CP(msqbuf, msqbuf32, msg_qbytes);
1405 		CP(msqbuf, msqbuf32, msg_lspid);
1406 		CP(msqbuf, msqbuf32, msg_lrpid);
1407 		CP(msqbuf, msqbuf32, msg_stime);
1408 		CP(msqbuf, msqbuf32, msg_rtime);
1409 		CP(msqbuf, msqbuf32, msg_ctime);
1410 		error = copyout(&msqbuf32, uap->buf, sizeof(struct msqid_ds32));
1411 	}
1412 	return (error);
1413 }
1414 #endif
1415 
1416 int
1417 freebsd32_msgctl(struct thread *td, struct freebsd32_msgctl_args *uap)
1418 {
1419 	struct msqid_ds msqbuf;
1420 	struct msqid_ds32 msqbuf32;
1421 	int error;
1422 
1423 	if (uap->cmd == IPC_SET) {
1424 		error = copyin(uap->buf, &msqbuf32, sizeof(msqbuf32));
1425 		if (error)
1426 			return (error);
1427 		freebsd32_ipcperm_in(&msqbuf32.msg_perm, &msqbuf.msg_perm);
1428 		PTRIN_CP(msqbuf32, msqbuf, msg_first);
1429 		PTRIN_CP(msqbuf32, msqbuf, msg_last);
1430 		CP(msqbuf32, msqbuf, msg_cbytes);
1431 		CP(msqbuf32, msqbuf, msg_qnum);
1432 		CP(msqbuf32, msqbuf, msg_qbytes);
1433 		CP(msqbuf32, msqbuf, msg_lspid);
1434 		CP(msqbuf32, msqbuf, msg_lrpid);
1435 		CP(msqbuf32, msqbuf, msg_stime);
1436 		CP(msqbuf32, msqbuf, msg_rtime);
1437 		CP(msqbuf32, msqbuf, msg_ctime);
1438 	}
1439 	error = kern_msgctl(td, uap->msqid, uap->cmd, &msqbuf);
1440 	if (error)
1441 		return (error);
1442 	if (uap->cmd == IPC_STAT) {
1443 		freebsd32_ipcperm_out(&msqbuf.msg_perm, &msqbuf32.msg_perm);
1444 		PTROUT_CP(msqbuf, msqbuf32, msg_first);
1445 		PTROUT_CP(msqbuf, msqbuf32, msg_last);
1446 		CP(msqbuf, msqbuf32, msg_cbytes);
1447 		CP(msqbuf, msqbuf32, msg_qnum);
1448 		CP(msqbuf, msqbuf32, msg_qbytes);
1449 		CP(msqbuf, msqbuf32, msg_lspid);
1450 		CP(msqbuf, msqbuf32, msg_lrpid);
1451 		CP(msqbuf, msqbuf32, msg_stime);
1452 		CP(msqbuf, msqbuf32, msg_rtime);
1453 		CP(msqbuf, msqbuf32, msg_ctime);
1454 		error = copyout(&msqbuf32, uap->buf, sizeof(struct msqid_ds32));
1455 	}
1456 	return (error);
1457 }
1458 
1459 int
1460 freebsd32_msgsnd(struct thread *td, struct freebsd32_msgsnd_args *uap)
1461 {
1462 	const void *msgp;
1463 	long mtype;
1464 	int32_t mtype32;
1465 	int error;
1466 
1467 	msgp = PTRIN(uap->msgp);
1468 	if ((error = copyin(msgp, &mtype32, sizeof(mtype32))) != 0)
1469 		return (error);
1470 	mtype = mtype32;
1471 	return (kern_msgsnd(td, uap->msqid,
1472 	    (const char *)msgp + sizeof(mtype32),
1473 	    uap->msgsz, uap->msgflg, mtype));
1474 }
1475 
1476 int
1477 freebsd32_msgrcv(struct thread *td, struct freebsd32_msgrcv_args *uap)
1478 {
1479 	void *msgp;
1480 	long mtype;
1481 	int32_t mtype32;
1482 	int error;
1483 
1484 	msgp = PTRIN(uap->msgp);
1485 	if ((error = kern_msgrcv(td, uap->msqid,
1486 	    (char *)msgp + sizeof(mtype32), uap->msgsz,
1487 	    uap->msgtyp, uap->msgflg, &mtype)) != 0)
1488 		return (error);
1489 	mtype32 = (int32_t)mtype;
1490 	return (copyout(&mtype32, msgp, sizeof(mtype32)));
1491 }
1492 #endif
1493 
1494 #if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \
1495     defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7)
1496 
1497 /* XXX casting to (sy_call_t *) is bogus, as usual. */
1498 static sy_call_t *msgcalls[] = {
1499 	(sy_call_t *)freebsd7_msgctl, (sy_call_t *)sys_msgget,
1500 	(sy_call_t *)sys_msgsnd, (sy_call_t *)sys_msgrcv
1501 };
1502 
1503 /*
1504  * Entry point for all MSG calls.
1505  */
1506 int
1507 sys_msgsys(td, uap)
1508 	struct thread *td;
1509 	/* XXX actually varargs. */
1510 	struct msgsys_args /* {
1511 		int	which;
1512 		int	a2;
1513 		int	a3;
1514 		int	a4;
1515 		int	a5;
1516 		int	a6;
1517 	} */ *uap;
1518 {
1519 	int error;
1520 
1521 	if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
1522 		return (ENOSYS);
1523 	if (uap->which < 0 ||
1524 	    uap->which >= sizeof(msgcalls)/sizeof(msgcalls[0]))
1525 		return (EINVAL);
1526 	error = (*msgcalls[uap->which])(td, &uap->a2);
1527 	return (error);
1528 }
1529 
1530 #ifndef CP
1531 #define CP(src, dst, fld)	do { (dst).fld = (src).fld; } while (0)
1532 #endif
1533 
1534 #ifndef _SYS_SYSPROTO_H_
1535 struct freebsd7_msgctl_args {
1536 	int	msqid;
1537 	int	cmd;
1538 	struct	msqid_ds_old *buf;
1539 };
1540 #endif
1541 int
1542 freebsd7_msgctl(td, uap)
1543 	struct thread *td;
1544 	struct freebsd7_msgctl_args *uap;
1545 {
1546 	struct msqid_ds_old msqold;
1547 	struct msqid_ds msqbuf;
1548 	int error;
1549 
1550 	DPRINTF(("call to freebsd7_msgctl(%d, %d, %p)\n", uap->msqid, uap->cmd,
1551 	    uap->buf));
1552 	if (uap->cmd == IPC_SET) {
1553 		error = copyin(uap->buf, &msqold, sizeof(msqold));
1554 		if (error)
1555 			return (error);
1556 		ipcperm_old2new(&msqold.msg_perm, &msqbuf.msg_perm);
1557 		CP(msqold, msqbuf, msg_first);
1558 		CP(msqold, msqbuf, msg_last);
1559 		CP(msqold, msqbuf, msg_cbytes);
1560 		CP(msqold, msqbuf, msg_qnum);
1561 		CP(msqold, msqbuf, msg_qbytes);
1562 		CP(msqold, msqbuf, msg_lspid);
1563 		CP(msqold, msqbuf, msg_lrpid);
1564 		CP(msqold, msqbuf, msg_stime);
1565 		CP(msqold, msqbuf, msg_rtime);
1566 		CP(msqold, msqbuf, msg_ctime);
1567 	}
1568 	error = kern_msgctl(td, uap->msqid, uap->cmd, &msqbuf);
1569 	if (error)
1570 		return (error);
1571 	if (uap->cmd == IPC_STAT) {
1572 		bzero(&msqold, sizeof(msqold));
1573 		ipcperm_new2old(&msqbuf.msg_perm, &msqold.msg_perm);
1574 		CP(msqbuf, msqold, msg_first);
1575 		CP(msqbuf, msqold, msg_last);
1576 		CP(msqbuf, msqold, msg_cbytes);
1577 		CP(msqbuf, msqold, msg_qnum);
1578 		CP(msqbuf, msqold, msg_qbytes);
1579 		CP(msqbuf, msqold, msg_lspid);
1580 		CP(msqbuf, msqold, msg_lrpid);
1581 		CP(msqbuf, msqold, msg_stime);
1582 		CP(msqbuf, msqold, msg_rtime);
1583 		CP(msqbuf, msqold, msg_ctime);
1584 		error = copyout(&msqold, uap->buf, sizeof(struct msqid_ds_old));
1585 	}
1586 	return (error);
1587 }
1588 
1589 #undef CP
1590 
1591 #endif	/* COMPAT_FREEBSD4 || COMPAT_FREEBSD5 || COMPAT_FREEBSD6 ||
1592 	   COMPAT_FREEBSD7 */
1593