xref: /freebsd/sys/kern/sysv_msg.c (revision dce6e6518b85561495cff38a3074a69d29d58a55)
1 /*
2  * Implementation of SVID messages
3  *
4  * Author:  Daniel Boulet
5  *
6  * Copyright 1993 Daniel Boulet and RTMX Inc.
7  *
8  * This system call was implemented by Daniel Boulet under contract from RTMX.
9  *
10  * Redistribution and use in source forms, with and without modification,
11  * are permitted provided that this entire comment appears intact.
12  *
13  * Redistribution in binary form may occur without any restrictions.
14  * Obviously, it would be nice if you gave credit where credit is due
15  * but requiring it would be too onerous.
16  *
17  * This software is provided ``AS IS'' without any warranties of any kind.
18  */
19 
20 #include <sys/cdefs.h>
21 __FBSDID("$FreeBSD$");
22 
23 #include "opt_sysvipc.h"
24 
25 #include <sys/param.h>
26 #include <sys/systm.h>
27 #include <sys/sysproto.h>
28 #include <sys/kernel.h>
29 #include <sys/proc.h>
30 #include <sys/lock.h>
31 #include <sys/mutex.h>
32 #include <sys/msg.h>
33 #include <sys/syscall.h>
34 #include <sys/sysent.h>
35 #include <sys/sysctl.h>
36 #include <sys/malloc.h>
37 #include <sys/jail.h>
38 
39 static MALLOC_DEFINE(M_MSG, "msg", "SVID compatible message queues");
40 
41 static void msginit(void);
42 static int msgunload(void);
43 static int sysvmsg_modload(struct module *, int, void *);
44 
45 #ifdef MSG_DEBUG
46 #define DPRINTF(a)	printf a
47 #else
48 #define DPRINTF(a)
49 #endif
50 
51 static void msg_freehdr(struct msg *msghdr);
52 
53 /* XXX casting to (sy_call_t *) is bogus, as usual. */
54 static sy_call_t *msgcalls[] = {
55 	(sy_call_t *)msgctl, (sy_call_t *)msgget,
56 	(sy_call_t *)msgsnd, (sy_call_t *)msgrcv
57 };
58 
59 struct msg {
60 	struct	msg *msg_next;	/* next msg in the chain */
61 	long	msg_type;	/* type of this message */
62     				/* >0 -> type of this message */
63     				/* 0 -> free header */
64 	u_short	msg_ts;		/* size of this message */
65 	short	msg_spot;	/* location of start of msg in buffer */
66 };
67 
68 
69 #ifndef MSGSSZ
70 #define MSGSSZ	8		/* Each segment must be 2^N long */
71 #endif
72 #ifndef MSGSEG
73 #define MSGSEG	2048		/* must be less than 32767 */
74 #endif
75 #define MSGMAX	(MSGSSZ*MSGSEG)
76 #ifndef MSGMNB
77 #define MSGMNB	2048		/* max # of bytes in a queue */
78 #endif
79 #ifndef MSGMNI
80 #define MSGMNI	40
81 #endif
82 #ifndef MSGTQL
83 #define MSGTQL	40
84 #endif
85 
86 /*
87  * Based on the configuration parameters described in an SVR2 (yes, two)
88  * config(1m) man page.
89  *
90  * Each message is broken up and stored in segments that are msgssz bytes
91  * long.  For efficiency reasons, this should be a power of two.  Also,
92  * it doesn't make sense if it is less than 8 or greater than about 256.
93  * Consequently, msginit in kern/sysv_msg.c checks that msgssz is a power of
94  * two between 8 and 1024 inclusive (and panic's if it isn't).
95  */
96 struct msginfo msginfo = {
97                 MSGMAX,         /* max chars in a message */
98                 MSGMNI,         /* # of message queue identifiers */
99                 MSGMNB,         /* max chars in a queue */
100                 MSGTQL,         /* max messages in system */
101                 MSGSSZ,         /* size of a message segment */
102                 		/* (must be small power of 2 greater than 4) */
103                 MSGSEG          /* number of message segments */
104 };
105 
106 /*
107  * macros to convert between msqid_ds's and msqid's.
108  * (specific to this implementation)
109  */
110 #define MSQID(ix,ds)	((ix) & 0xffff | (((ds).msg_perm.seq << 16) & 0xffff0000))
111 #define MSQID_IX(id)	((id) & 0xffff)
112 #define MSQID_SEQ(id)	(((id) >> 16) & 0xffff)
113 
114 /*
115  * The rest of this file is specific to this particular implementation.
116  */
117 
118 struct msgmap {
119 	short	next;		/* next segment in buffer */
120     				/* -1 -> available */
121     				/* 0..(MSGSEG-1) -> index of next segment */
122 };
123 
124 #define MSG_LOCKED	01000	/* Is this msqid_ds locked? */
125 
126 static int nfree_msgmaps;	/* # of free map entries */
127 static short free_msgmaps;	/* head of linked list of free map entries */
128 static struct msg *free_msghdrs;/* list of free msg headers */
129 static char *msgpool;		/* MSGMAX byte long msg buffer pool */
130 static struct msgmap *msgmaps;	/* MSGSEG msgmap structures */
131 static struct msg *msghdrs;	/* MSGTQL msg headers */
132 static struct msqid_ds *msqids;	/* MSGMNI msqid_ds struct's */
133 static struct mtx msq_mtx;	/* global mutex for message queues. */
134 
135 static void
136 msginit()
137 {
138 	register int i;
139 
140 	TUNABLE_INT_FETCH("kern.ipc.msgseg", &msginfo.msgseg);
141 	TUNABLE_INT_FETCH("kern.ipc.msgssz", &msginfo.msgssz);
142 	msginfo.msgmax = msginfo.msgseg * msginfo.msgssz;
143 	TUNABLE_INT_FETCH("kern.ipc.msgmni", &msginfo.msgmni);
144 
145 	msgpool = malloc(msginfo.msgmax, M_MSG, M_WAITOK);
146 	if (msgpool == NULL)
147 		panic("msgpool is NULL");
148 	msgmaps = malloc(sizeof(struct msgmap) * msginfo.msgseg, M_MSG, M_WAITOK);
149 	if (msgmaps == NULL)
150 		panic("msgmaps is NULL");
151 	msghdrs = malloc(sizeof(struct msg) * msginfo.msgtql, M_MSG, M_WAITOK);
152 	if (msghdrs == NULL)
153 		panic("msghdrs is NULL");
154 	msqids = malloc(sizeof(struct msqid_ds) * msginfo.msgmni, M_MSG, M_WAITOK);
155 	if (msqids == NULL)
156 		panic("msqids is NULL");
157 
158 	/*
159 	 * msginfo.msgssz should be a power of two for efficiency reasons.
160 	 * It is also pretty silly if msginfo.msgssz is less than 8
161 	 * or greater than about 256 so ...
162 	 */
163 
164 	i = 8;
165 	while (i < 1024 && i != msginfo.msgssz)
166 		i <<= 1;
167     	if (i != msginfo.msgssz) {
168 		DPRINTF(("msginfo.msgssz=%d (0x%x)\n", msginfo.msgssz,
169 		    msginfo.msgssz));
170 		panic("msginfo.msgssz not a small power of 2");
171 	}
172 
173 	if (msginfo.msgseg > 32767) {
174 		DPRINTF(("msginfo.msgseg=%d\n", msginfo.msgseg));
175 		panic("msginfo.msgseg > 32767");
176 	}
177 
178 	if (msgmaps == NULL)
179 		panic("msgmaps is NULL");
180 
181 	for (i = 0; i < msginfo.msgseg; i++) {
182 		if (i > 0)
183 			msgmaps[i-1].next = i;
184 		msgmaps[i].next = -1;	/* implies entry is available */
185 	}
186 	free_msgmaps = 0;
187 	nfree_msgmaps = msginfo.msgseg;
188 
189 	if (msghdrs == NULL)
190 		panic("msghdrs is NULL");
191 
192 	for (i = 0; i < msginfo.msgtql; i++) {
193 		msghdrs[i].msg_type = 0;
194 		if (i > 0)
195 			msghdrs[i-1].msg_next = &msghdrs[i];
196 		msghdrs[i].msg_next = NULL;
197     	}
198 	free_msghdrs = &msghdrs[0];
199 
200 	if (msqids == NULL)
201 		panic("msqids is NULL");
202 
203 	for (i = 0; i < msginfo.msgmni; i++) {
204 		msqids[i].msg_qbytes = 0;	/* implies entry is available */
205 		msqids[i].msg_perm.seq = 0;	/* reset to a known value */
206 		msqids[i].msg_perm.mode = 0;
207 	}
208 	mtx_init(&msq_mtx, "msq", NULL, MTX_DEF);
209 }
210 
211 static int
212 msgunload()
213 {
214 	struct msqid_ds *msqptr;
215 	int msqid;
216 
217 	for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
218 		/*
219 		 * Look for an unallocated and unlocked msqid_ds.
220 		 * msqid_ds's can be locked by msgsnd or msgrcv while
221 		 * they are copying the message in/out.  We can't
222 		 * re-use the entry until they release it.
223 		 */
224 		msqptr = &msqids[msqid];
225 		if (msqptr->msg_qbytes != 0 ||
226 		    (msqptr->msg_perm.mode & MSG_LOCKED) != 0)
227 			break;
228 	}
229 	if (msqid != msginfo.msgmni)
230 		return (EBUSY);
231 
232 	free(msgpool, M_MSG);
233 	free(msgmaps, M_MSG);
234 	free(msghdrs, M_MSG);
235 	free(msqids, M_MSG);
236 	mtx_destroy(&msq_mtx);
237 	return (0);
238 }
239 
240 
241 static int
242 sysvmsg_modload(struct module *module, int cmd, void *arg)
243 {
244 	int error = 0;
245 
246 	switch (cmd) {
247 	case MOD_LOAD:
248 		msginit();
249 		break;
250 	case MOD_UNLOAD:
251 		error = msgunload();
252 		break;
253 	case MOD_SHUTDOWN:
254 		break;
255 	default:
256 		error = EINVAL;
257 		break;
258 	}
259 	return (error);
260 }
261 
262 static moduledata_t sysvmsg_mod = {
263 	"sysvmsg",
264 	&sysvmsg_modload,
265 	NULL
266 };
267 
268 SYSCALL_MODULE_HELPER(msgsys);
269 SYSCALL_MODULE_HELPER(msgctl);
270 SYSCALL_MODULE_HELPER(msgget);
271 SYSCALL_MODULE_HELPER(msgsnd);
272 SYSCALL_MODULE_HELPER(msgrcv);
273 
274 DECLARE_MODULE(sysvmsg, sysvmsg_mod,
275 	SI_SUB_SYSV_MSG, SI_ORDER_FIRST);
276 MODULE_VERSION(sysvmsg, 1);
277 
278 /*
279  * Entry point for all MSG calls
280  *
281  * MPSAFE
282  */
283 int
284 msgsys(td, uap)
285 	struct thread *td;
286 	/* XXX actually varargs. */
287 	struct msgsys_args /* {
288 		u_int	which;
289 		int	a2;
290 		int	a3;
291 		int	a4;
292 		int	a5;
293 		int	a6;
294 	} */ *uap;
295 {
296 	int error;
297 
298 	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
299 		return (ENOSYS);
300 	if (uap->which >= sizeof(msgcalls)/sizeof(msgcalls[0]))
301 		return (EINVAL);
302 	error = (*msgcalls[uap->which])(td, &uap->a2);
303 	return (error);
304 }
305 
306 static void
307 msg_freehdr(msghdr)
308 	struct msg *msghdr;
309 {
310 	while (msghdr->msg_ts > 0) {
311 		short next;
312 		if (msghdr->msg_spot < 0 || msghdr->msg_spot >= msginfo.msgseg)
313 			panic("msghdr->msg_spot out of range");
314 		next = msgmaps[msghdr->msg_spot].next;
315 		msgmaps[msghdr->msg_spot].next = free_msgmaps;
316 		free_msgmaps = msghdr->msg_spot;
317 		nfree_msgmaps++;
318 		msghdr->msg_spot = next;
319 		if (msghdr->msg_ts >= msginfo.msgssz)
320 			msghdr->msg_ts -= msginfo.msgssz;
321 		else
322 			msghdr->msg_ts = 0;
323 	}
324 	if (msghdr->msg_spot != -1)
325 		panic("msghdr->msg_spot != -1");
326 	msghdr->msg_next = free_msghdrs;
327 	free_msghdrs = msghdr;
328 }
329 
330 #ifndef _SYS_SYSPROTO_H_
331 struct msgctl_args {
332 	int	msqid;
333 	int	cmd;
334 	struct	msqid_ds *buf;
335 };
336 #endif
337 
338 /*
339  * MPSAFE
340  */
341 int
342 msgctl(td, uap)
343 	struct thread *td;
344 	register struct msgctl_args *uap;
345 {
346 	int msqid = uap->msqid;
347 	int cmd = uap->cmd;
348 	struct msqid_ds *user_msqptr = uap->buf;
349 	int rval, error;
350 	struct msqid_ds msqbuf;
351 	register struct msqid_ds *msqptr;
352 
353 	DPRINTF(("call to msgctl(%d, %d, 0x%x)\n", msqid, cmd, user_msqptr));
354 	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
355 		return (ENOSYS);
356 
357 	msqid = IPCID_TO_IX(msqid);
358 
359 	if (msqid < 0 || msqid >= msginfo.msgmni) {
360 		DPRINTF(("msqid (%d) out of range (0<=msqid<%d)\n", msqid,
361 		    msginfo.msgmni));
362 		return (EINVAL);
363 	}
364 	if (cmd == IPC_SET &&
365 	    (error = copyin(user_msqptr, &msqbuf, sizeof(msqbuf))) != 0)
366 		return (error);
367 
368 	msqptr = &msqids[msqid];
369 
370 	mtx_lock(&msq_mtx);
371 	if (msqptr->msg_qbytes == 0) {
372 		DPRINTF(("no such msqid\n"));
373 		error = EINVAL;
374 		goto done2;
375 	}
376 	if (msqptr->msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
377 		DPRINTF(("wrong sequence number\n"));
378 		error = EINVAL;
379 		goto done2;
380 	}
381 
382 	error = 0;
383 	rval = 0;
384 
385 	switch (cmd) {
386 
387 	case IPC_RMID:
388 	{
389 		struct msg *msghdr;
390 		if ((error = ipcperm(td, &msqptr->msg_perm, IPC_M)))
391 			goto done2;
392 		/* Free the message headers */
393 		msghdr = msqptr->msg_first;
394 		while (msghdr != NULL) {
395 			struct msg *msghdr_tmp;
396 
397 			/* Free the segments of each message */
398 			msqptr->msg_cbytes -= msghdr->msg_ts;
399 			msqptr->msg_qnum--;
400 			msghdr_tmp = msghdr;
401 			msghdr = msghdr->msg_next;
402 			msg_freehdr(msghdr_tmp);
403 		}
404 
405 		if (msqptr->msg_cbytes != 0)
406 			panic("msg_cbytes is screwed up");
407 		if (msqptr->msg_qnum != 0)
408 			panic("msg_qnum is screwed up");
409 
410 		msqptr->msg_qbytes = 0;	/* Mark it as free */
411 
412 		wakeup(msqptr);
413 	}
414 
415 		break;
416 
417 	case IPC_SET:
418 		if ((error = ipcperm(td, &msqptr->msg_perm, IPC_M)))
419 			goto done2;
420 		if (msqbuf.msg_qbytes > msqptr->msg_qbytes) {
421 			error = suser(td);
422 			if (error)
423 				goto done2;
424 		}
425 		if (msqbuf.msg_qbytes > msginfo.msgmnb) {
426 			DPRINTF(("can't increase msg_qbytes beyond %d"
427 			    "(truncating)\n", msginfo.msgmnb));
428 			msqbuf.msg_qbytes = msginfo.msgmnb;	/* silently restrict qbytes to system limit */
429 		}
430 		if (msqbuf.msg_qbytes == 0) {
431 			DPRINTF(("can't reduce msg_qbytes to 0\n"));
432 			error = EINVAL;		/* non-standard errno! */
433 			goto done2;
434 		}
435 		msqptr->msg_perm.uid = msqbuf.msg_perm.uid;	/* change the owner */
436 		msqptr->msg_perm.gid = msqbuf.msg_perm.gid;	/* change the owner */
437 		msqptr->msg_perm.mode = (msqptr->msg_perm.mode & ~0777) |
438 		    (msqbuf.msg_perm.mode & 0777);
439 		msqptr->msg_qbytes = msqbuf.msg_qbytes;
440 		msqptr->msg_ctime = time_second;
441 		break;
442 
443 	case IPC_STAT:
444 		if ((error = ipcperm(td, &msqptr->msg_perm, IPC_R))) {
445 			DPRINTF(("requester doesn't have read access\n"));
446 			goto done2;
447 		}
448 		break;
449 
450 	default:
451 		DPRINTF(("invalid command %d\n", cmd));
452 		error = EINVAL;
453 		goto done2;
454 	}
455 
456 	if (error == 0)
457 		td->td_retval[0] = rval;
458 done2:
459 	mtx_unlock(&msq_mtx);
460 	if (cmd == IPC_STAT && error == 0)
461 		error = copyout(msqptr, user_msqptr, sizeof(struct msqid_ds));
462 	return(error);
463 }
464 
465 #ifndef _SYS_SYSPROTO_H_
466 struct msgget_args {
467 	key_t	key;
468 	int	msgflg;
469 };
470 #endif
471 
472 /*
473  * MPSAFE
474  */
475 int
476 msgget(td, uap)
477 	struct thread *td;
478 	register struct msgget_args *uap;
479 {
480 	int msqid, error = 0;
481 	int key = uap->key;
482 	int msgflg = uap->msgflg;
483 	struct ucred *cred = td->td_ucred;
484 	register struct msqid_ds *msqptr = NULL;
485 
486 	DPRINTF(("msgget(0x%x, 0%o)\n", key, msgflg));
487 
488 	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
489 		return (ENOSYS);
490 
491 	mtx_lock(&msq_mtx);
492 	if (key != IPC_PRIVATE) {
493 		for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
494 			msqptr = &msqids[msqid];
495 			if (msqptr->msg_qbytes != 0 &&
496 			    msqptr->msg_perm.key == key)
497 				break;
498 		}
499 		if (msqid < msginfo.msgmni) {
500 			DPRINTF(("found public key\n"));
501 			if ((msgflg & IPC_CREAT) && (msgflg & IPC_EXCL)) {
502 				DPRINTF(("not exclusive\n"));
503 				error = EEXIST;
504 				goto done2;
505 			}
506 			if ((error = ipcperm(td, &msqptr->msg_perm, msgflg & 0700 ))) {
507 				DPRINTF(("requester doesn't have 0%o access\n",
508 				    msgflg & 0700));
509 				goto done2;
510 			}
511 			goto found;
512 		}
513 	}
514 
515 	DPRINTF(("need to allocate the msqid_ds\n"));
516 	if (key == IPC_PRIVATE || (msgflg & IPC_CREAT)) {
517 		for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
518 			/*
519 			 * Look for an unallocated and unlocked msqid_ds.
520 			 * msqid_ds's can be locked by msgsnd or msgrcv while
521 			 * they are copying the message in/out.  We can't
522 			 * re-use the entry until they release it.
523 			 */
524 			msqptr = &msqids[msqid];
525 			if (msqptr->msg_qbytes == 0 &&
526 			    (msqptr->msg_perm.mode & MSG_LOCKED) == 0)
527 				break;
528 		}
529 		if (msqid == msginfo.msgmni) {
530 			DPRINTF(("no more msqid_ds's available\n"));
531 			error = ENOSPC;
532 			goto done2;
533 		}
534 		DPRINTF(("msqid %d is available\n", msqid));
535 		msqptr->msg_perm.key = key;
536 		msqptr->msg_perm.cuid = cred->cr_uid;
537 		msqptr->msg_perm.uid = cred->cr_uid;
538 		msqptr->msg_perm.cgid = cred->cr_gid;
539 		msqptr->msg_perm.gid = cred->cr_gid;
540 		msqptr->msg_perm.mode = (msgflg & 0777);
541 		/* Make sure that the returned msqid is unique */
542 		msqptr->msg_perm.seq = (msqptr->msg_perm.seq + 1) & 0x7fff;
543 		msqptr->msg_first = NULL;
544 		msqptr->msg_last = NULL;
545 		msqptr->msg_cbytes = 0;
546 		msqptr->msg_qnum = 0;
547 		msqptr->msg_qbytes = msginfo.msgmnb;
548 		msqptr->msg_lspid = 0;
549 		msqptr->msg_lrpid = 0;
550 		msqptr->msg_stime = 0;
551 		msqptr->msg_rtime = 0;
552 		msqptr->msg_ctime = time_second;
553 	} else {
554 		DPRINTF(("didn't find it and wasn't asked to create it\n"));
555 		error = ENOENT;
556 		goto done2;
557 	}
558 
559 found:
560 	/* Construct the unique msqid */
561 	td->td_retval[0] = IXSEQ_TO_IPCID(msqid, msqptr->msg_perm);
562 done2:
563 	mtx_unlock(&msq_mtx);
564 	return (error);
565 }
566 
567 #ifndef _SYS_SYSPROTO_H_
568 struct msgsnd_args {
569 	int	msqid;
570 	const void	*msgp;
571 	size_t	msgsz;
572 	int	msgflg;
573 };
574 #endif
575 
576 /*
577  * MPSAFE
578  */
579 int
580 msgsnd(td, uap)
581 	struct thread *td;
582 	register struct msgsnd_args *uap;
583 {
584 	int msqid = uap->msqid;
585 	const void *user_msgp = uap->msgp;
586 	size_t msgsz = uap->msgsz;
587 	int msgflg = uap->msgflg;
588 	int segs_needed, error = 0;
589 	register struct msqid_ds *msqptr;
590 	register struct msg *msghdr;
591 	short next;
592 
593 	DPRINTF(("call to msgsnd(%d, 0x%x, %d, %d)\n", msqid, user_msgp, msgsz,
594 	    msgflg));
595 	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
596 		return (ENOSYS);
597 
598 	mtx_lock(&msq_mtx);
599 	msqid = IPCID_TO_IX(msqid);
600 
601 	if (msqid < 0 || msqid >= msginfo.msgmni) {
602 		DPRINTF(("msqid (%d) out of range (0<=msqid<%d)\n", msqid,
603 		    msginfo.msgmni));
604 		error = EINVAL;
605 		goto done2;
606 	}
607 
608 	msqptr = &msqids[msqid];
609 	if (msqptr->msg_qbytes == 0) {
610 		DPRINTF(("no such message queue id\n"));
611 		error = EINVAL;
612 		goto done2;
613 	}
614 	if (msqptr->msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
615 		DPRINTF(("wrong sequence number\n"));
616 		error = EINVAL;
617 		goto done2;
618 	}
619 
620 	if ((error = ipcperm(td, &msqptr->msg_perm, IPC_W))) {
621 		DPRINTF(("requester doesn't have write access\n"));
622 		goto done2;
623 	}
624 
625 	segs_needed = (msgsz + msginfo.msgssz - 1) / msginfo.msgssz;
626 	DPRINTF(("msgsz=%d, msgssz=%d, segs_needed=%d\n", msgsz, msginfo.msgssz,
627 	    segs_needed));
628 	for (;;) {
629 		int need_more_resources = 0;
630 
631 		/*
632 		 * check msgsz
633 		 * (inside this loop in case msg_qbytes changes while we sleep)
634 		 */
635 
636 		if (msgsz > msqptr->msg_qbytes) {
637 			DPRINTF(("msgsz > msqptr->msg_qbytes\n"));
638 			error = EINVAL;
639 			goto done2;
640 		}
641 
642 		if (msqptr->msg_perm.mode & MSG_LOCKED) {
643 			DPRINTF(("msqid is locked\n"));
644 			need_more_resources = 1;
645 		}
646 		if (msgsz + msqptr->msg_cbytes > msqptr->msg_qbytes) {
647 			DPRINTF(("msgsz + msg_cbytes > msg_qbytes\n"));
648 			need_more_resources = 1;
649 		}
650 		if (segs_needed > nfree_msgmaps) {
651 			DPRINTF(("segs_needed > nfree_msgmaps\n"));
652 			need_more_resources = 1;
653 		}
654 		if (free_msghdrs == NULL) {
655 			DPRINTF(("no more msghdrs\n"));
656 			need_more_resources = 1;
657 		}
658 
659 		if (need_more_resources) {
660 			int we_own_it;
661 
662 			if ((msgflg & IPC_NOWAIT) != 0) {
663 				DPRINTF(("need more resources but caller "
664 				    "doesn't want to wait\n"));
665 				error = EAGAIN;
666 				goto done2;
667 			}
668 
669 			if ((msqptr->msg_perm.mode & MSG_LOCKED) != 0) {
670 				DPRINTF(("we don't own the msqid_ds\n"));
671 				we_own_it = 0;
672 			} else {
673 				/* Force later arrivals to wait for our
674 				   request */
675 				DPRINTF(("we own the msqid_ds\n"));
676 				msqptr->msg_perm.mode |= MSG_LOCKED;
677 				we_own_it = 1;
678 			}
679 			DPRINTF(("goodnight\n"));
680 			error = msleep(msqptr, &msq_mtx, (PZERO - 4) | PCATCH,
681 			    "msgwait", 0);
682 			DPRINTF(("good morning, error=%d\n", error));
683 			if (we_own_it)
684 				msqptr->msg_perm.mode &= ~MSG_LOCKED;
685 			if (error != 0) {
686 				DPRINTF(("msgsnd:  interrupted system call\n"));
687 				error = EINTR;
688 				goto done2;
689 			}
690 
691 			/*
692 			 * Make sure that the msq queue still exists
693 			 */
694 
695 			if (msqptr->msg_qbytes == 0) {
696 				DPRINTF(("msqid deleted\n"));
697 				error = EIDRM;
698 				goto done2;
699 			}
700 
701 		} else {
702 			DPRINTF(("got all the resources that we need\n"));
703 			break;
704 		}
705 	}
706 
707 	/*
708 	 * We have the resources that we need.
709 	 * Make sure!
710 	 */
711 
712 	if (msqptr->msg_perm.mode & MSG_LOCKED)
713 		panic("msg_perm.mode & MSG_LOCKED");
714 	if (segs_needed > nfree_msgmaps)
715 		panic("segs_needed > nfree_msgmaps");
716 	if (msgsz + msqptr->msg_cbytes > msqptr->msg_qbytes)
717 		panic("msgsz + msg_cbytes > msg_qbytes");
718 	if (free_msghdrs == NULL)
719 		panic("no more msghdrs");
720 
721 	/*
722 	 * Re-lock the msqid_ds in case we page-fault when copying in the
723 	 * message
724 	 */
725 
726 	if ((msqptr->msg_perm.mode & MSG_LOCKED) != 0)
727 		panic("msqid_ds is already locked");
728 	msqptr->msg_perm.mode |= MSG_LOCKED;
729 
730 	/*
731 	 * Allocate a message header
732 	 */
733 
734 	msghdr = free_msghdrs;
735 	free_msghdrs = msghdr->msg_next;
736 	msghdr->msg_spot = -1;
737 	msghdr->msg_ts = msgsz;
738 
739 	/*
740 	 * Allocate space for the message
741 	 */
742 
743 	while (segs_needed > 0) {
744 		if (nfree_msgmaps <= 0)
745 			panic("not enough msgmaps");
746 		if (free_msgmaps == -1)
747 			panic("nil free_msgmaps");
748 		next = free_msgmaps;
749 		if (next <= -1)
750 			panic("next too low #1");
751 		if (next >= msginfo.msgseg)
752 			panic("next out of range #1");
753 		DPRINTF(("allocating segment %d to message\n", next));
754 		free_msgmaps = msgmaps[next].next;
755 		nfree_msgmaps--;
756 		msgmaps[next].next = msghdr->msg_spot;
757 		msghdr->msg_spot = next;
758 		segs_needed--;
759 	}
760 
761 	/*
762 	 * Copy in the message type
763 	 */
764 
765 	mtx_unlock(&msq_mtx);
766 	if ((error = copyin(user_msgp, &msghdr->msg_type,
767 	    sizeof(msghdr->msg_type))) != 0) {
768 		mtx_lock(&msq_mtx);
769 		DPRINTF(("error %d copying the message type\n", error));
770 		msg_freehdr(msghdr);
771 		msqptr->msg_perm.mode &= ~MSG_LOCKED;
772 		wakeup(msqptr);
773 		goto done2;
774 	}
775 	mtx_lock(&msq_mtx);
776 	user_msgp = (const char *)user_msgp + sizeof(msghdr->msg_type);
777 
778 	/*
779 	 * Validate the message type
780 	 */
781 
782 	if (msghdr->msg_type < 1) {
783 		msg_freehdr(msghdr);
784 		msqptr->msg_perm.mode &= ~MSG_LOCKED;
785 		wakeup(msqptr);
786 		DPRINTF(("mtype (%d) < 1\n", msghdr->msg_type));
787 		error = EINVAL;
788 		goto done2;
789 	}
790 
791 	/*
792 	 * Copy in the message body
793 	 */
794 
795 	next = msghdr->msg_spot;
796 	while (msgsz > 0) {
797 		size_t tlen;
798 		if (msgsz > msginfo.msgssz)
799 			tlen = msginfo.msgssz;
800 		else
801 			tlen = msgsz;
802 		if (next <= -1)
803 			panic("next too low #2");
804 		if (next >= msginfo.msgseg)
805 			panic("next out of range #2");
806 		mtx_unlock(&msq_mtx);
807 		if ((error = copyin(user_msgp, &msgpool[next * msginfo.msgssz],
808 		    tlen)) != 0) {
809 			mtx_lock(&msq_mtx);
810 			DPRINTF(("error %d copying in message segment\n",
811 			    error));
812 			msg_freehdr(msghdr);
813 			msqptr->msg_perm.mode &= ~MSG_LOCKED;
814 			wakeup(msqptr);
815 			goto done2;
816 		}
817 		mtx_lock(&msq_mtx);
818 		msgsz -= tlen;
819 		user_msgp = (const char *)user_msgp + tlen;
820 		next = msgmaps[next].next;
821 	}
822 	if (next != -1)
823 		panic("didn't use all the msg segments");
824 
825 	/*
826 	 * We've got the message.  Unlock the msqid_ds.
827 	 */
828 
829 	msqptr->msg_perm.mode &= ~MSG_LOCKED;
830 
831 	/*
832 	 * Make sure that the msqid_ds is still allocated.
833 	 */
834 
835 	if (msqptr->msg_qbytes == 0) {
836 		msg_freehdr(msghdr);
837 		wakeup(msqptr);
838 		error = EIDRM;
839 		goto done2;
840 	}
841 
842 	/*
843 	 * Put the message into the queue
844 	 */
845 
846 	if (msqptr->msg_first == NULL) {
847 		msqptr->msg_first = msghdr;
848 		msqptr->msg_last = msghdr;
849 	} else {
850 		msqptr->msg_last->msg_next = msghdr;
851 		msqptr->msg_last = msghdr;
852 	}
853 	msqptr->msg_last->msg_next = NULL;
854 
855 	msqptr->msg_cbytes += msghdr->msg_ts;
856 	msqptr->msg_qnum++;
857 	msqptr->msg_lspid = td->td_proc->p_pid;
858 	msqptr->msg_stime = time_second;
859 
860 	wakeup(msqptr);
861 	td->td_retval[0] = 0;
862 done2:
863 	mtx_unlock(&msq_mtx);
864 	return (error);
865 }
866 
867 #ifndef _SYS_SYSPROTO_H_
868 struct msgrcv_args {
869 	int	msqid;
870 	void	*msgp;
871 	size_t	msgsz;
872 	long	msgtyp;
873 	int	msgflg;
874 };
875 #endif
876 
877 /*
878  * MPSAFE
879  */
880 int
881 msgrcv(td, uap)
882 	struct thread *td;
883 	register struct msgrcv_args *uap;
884 {
885 	int msqid = uap->msqid;
886 	void *user_msgp = uap->msgp;
887 	size_t msgsz = uap->msgsz;
888 	long msgtyp = uap->msgtyp;
889 	int msgflg = uap->msgflg;
890 	size_t len;
891 	register struct msqid_ds *msqptr;
892 	register struct msg *msghdr;
893 	int error = 0;
894 	short next;
895 
896 	DPRINTF(("call to msgrcv(%d, 0x%x, %d, %ld, %d)\n", msqid, user_msgp,
897 	    msgsz, msgtyp, msgflg));
898 
899 	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
900 		return (ENOSYS);
901 
902 	msqid = IPCID_TO_IX(msqid);
903 
904 	if (msqid < 0 || msqid >= msginfo.msgmni) {
905 		DPRINTF(("msqid (%d) out of range (0<=msqid<%d)\n", msqid,
906 		    msginfo.msgmni));
907 		return (EINVAL);
908 	}
909 
910 	msqptr = &msqids[msqid];
911 	mtx_lock(&msq_mtx);
912 	if (msqptr->msg_qbytes == 0) {
913 		DPRINTF(("no such message queue id\n"));
914 		error = EINVAL;
915 		goto done2;
916 	}
917 	if (msqptr->msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
918 		DPRINTF(("wrong sequence number\n"));
919 		error = EINVAL;
920 		goto done2;
921 	}
922 
923 	if ((error = ipcperm(td, &msqptr->msg_perm, IPC_R))) {
924 		DPRINTF(("requester doesn't have read access\n"));
925 		goto done2;
926 	}
927 
928 	msghdr = NULL;
929 	while (msghdr == NULL) {
930 		if (msgtyp == 0) {
931 			msghdr = msqptr->msg_first;
932 			if (msghdr != NULL) {
933 				if (msgsz < msghdr->msg_ts &&
934 				    (msgflg & MSG_NOERROR) == 0) {
935 					DPRINTF(("first message on the queue "
936 					    "is too big (want %d, got %d)\n",
937 					    msgsz, msghdr->msg_ts));
938 					error = E2BIG;
939 					goto done2;
940 				}
941 				if (msqptr->msg_first == msqptr->msg_last) {
942 					msqptr->msg_first = NULL;
943 					msqptr->msg_last = NULL;
944 				} else {
945 					msqptr->msg_first = msghdr->msg_next;
946 					if (msqptr->msg_first == NULL)
947 						panic("msg_first/last screwed up #1");
948 				}
949 			}
950 		} else {
951 			struct msg *previous;
952 			struct msg **prev;
953 
954 			previous = NULL;
955 			prev = &(msqptr->msg_first);
956 			while ((msghdr = *prev) != NULL) {
957 				/*
958 				 * Is this message's type an exact match or is
959 				 * this message's type less than or equal to
960 				 * the absolute value of a negative msgtyp?
961 				 * Note that the second half of this test can
962 				 * NEVER be true if msgtyp is positive since
963 				 * msg_type is always positive!
964 				 */
965 
966 				if (msgtyp == msghdr->msg_type ||
967 				    msghdr->msg_type <= -msgtyp) {
968 					DPRINTF(("found message type %d, "
969 					    "requested %d\n",
970 					    msghdr->msg_type, msgtyp));
971 					if (msgsz < msghdr->msg_ts &&
972 					    (msgflg & MSG_NOERROR) == 0) {
973 						DPRINTF(("requested message "
974 						    "on the queue is too big "
975 						    "(want %d, got %d)\n",
976 						    msgsz, msghdr->msg_ts));
977 						error = E2BIG;
978 						goto done2;
979 					}
980 					*prev = msghdr->msg_next;
981 					if (msghdr == msqptr->msg_last) {
982 						if (previous == NULL) {
983 							if (prev !=
984 							    &msqptr->msg_first)
985 								panic("msg_first/last screwed up #2");
986 							msqptr->msg_first =
987 							    NULL;
988 							msqptr->msg_last =
989 							    NULL;
990 						} else {
991 							if (prev ==
992 							    &msqptr->msg_first)
993 								panic("msg_first/last screwed up #3");
994 							msqptr->msg_last =
995 							    previous;
996 						}
997 					}
998 					break;
999 				}
1000 				previous = msghdr;
1001 				prev = &(msghdr->msg_next);
1002 			}
1003 		}
1004 
1005 		/*
1006 		 * We've either extracted the msghdr for the appropriate
1007 		 * message or there isn't one.
1008 		 * If there is one then bail out of this loop.
1009 		 */
1010 
1011 		if (msghdr != NULL)
1012 			break;
1013 
1014 		/*
1015 		 * Hmph!  No message found.  Does the user want to wait?
1016 		 */
1017 
1018 		if ((msgflg & IPC_NOWAIT) != 0) {
1019 			DPRINTF(("no appropriate message found (msgtyp=%d)\n",
1020 			    msgtyp));
1021 			/* The SVID says to return ENOMSG. */
1022 			error = ENOMSG;
1023 			goto done2;
1024 		}
1025 
1026 		/*
1027 		 * Wait for something to happen
1028 		 */
1029 
1030 		DPRINTF(("msgrcv:  goodnight\n"));
1031 		error = msleep(msqptr, &msq_mtx, (PZERO - 4) | PCATCH,
1032 		    "msgwait", 0);
1033 		DPRINTF(("msgrcv:  good morning (error=%d)\n", error));
1034 
1035 		if (error != 0) {
1036 			DPRINTF(("msgsnd:  interrupted system call\n"));
1037 			error = EINTR;
1038 			goto done2;
1039 		}
1040 
1041 		/*
1042 		 * Make sure that the msq queue still exists
1043 		 */
1044 
1045 		if (msqptr->msg_qbytes == 0 ||
1046 		    msqptr->msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
1047 			DPRINTF(("msqid deleted\n"));
1048 			error = EIDRM;
1049 			goto done2;
1050 		}
1051 	}
1052 
1053 	/*
1054 	 * Return the message to the user.
1055 	 *
1056 	 * First, do the bookkeeping (before we risk being interrupted).
1057 	 */
1058 
1059 	msqptr->msg_cbytes -= msghdr->msg_ts;
1060 	msqptr->msg_qnum--;
1061 	msqptr->msg_lrpid = td->td_proc->p_pid;
1062 	msqptr->msg_rtime = time_second;
1063 
1064 	/*
1065 	 * Make msgsz the actual amount that we'll be returning.
1066 	 * Note that this effectively truncates the message if it is too long
1067 	 * (since msgsz is never increased).
1068 	 */
1069 
1070 	DPRINTF(("found a message, msgsz=%d, msg_ts=%d\n", msgsz,
1071 	    msghdr->msg_ts));
1072 	if (msgsz > msghdr->msg_ts)
1073 		msgsz = msghdr->msg_ts;
1074 
1075 	/*
1076 	 * Return the type to the user.
1077 	 */
1078 
1079 	mtx_unlock(&msq_mtx);
1080 	error = copyout(&(msghdr->msg_type), user_msgp,
1081 	    sizeof(msghdr->msg_type));
1082 	mtx_lock(&msq_mtx);
1083 	if (error != 0) {
1084 		DPRINTF(("error (%d) copying out message type\n", error));
1085 		msg_freehdr(msghdr);
1086 		wakeup(msqptr);
1087 		goto done2;
1088 	}
1089 	user_msgp = (char *)user_msgp + sizeof(msghdr->msg_type);
1090 
1091 	/*
1092 	 * Return the segments to the user
1093 	 */
1094 
1095 	next = msghdr->msg_spot;
1096 	for (len = 0; len < msgsz; len += msginfo.msgssz) {
1097 		size_t tlen;
1098 
1099 		if (msgsz - len > msginfo.msgssz)
1100 			tlen = msginfo.msgssz;
1101 		else
1102 			tlen = msgsz - len;
1103 		if (next <= -1)
1104 			panic("next too low #3");
1105 		if (next >= msginfo.msgseg)
1106 			panic("next out of range #3");
1107 		mtx_unlock(&msq_mtx);
1108 		error = copyout(&msgpool[next * msginfo.msgssz],
1109 		    user_msgp, tlen);
1110 		mtx_lock(&msq_mtx);
1111 		if (error != 0) {
1112 			DPRINTF(("error (%d) copying out message segment\n",
1113 			    error));
1114 			msg_freehdr(msghdr);
1115 			wakeup(msqptr);
1116 			goto done2;
1117 		}
1118 		user_msgp = (char *)user_msgp + tlen;
1119 		next = msgmaps[next].next;
1120 	}
1121 
1122 	/*
1123 	 * Done, return the actual number of bytes copied out.
1124 	 */
1125 
1126 	msg_freehdr(msghdr);
1127 	wakeup(msqptr);
1128 	td->td_retval[0] = msgsz;
1129 done2:
1130 	mtx_unlock(&msq_mtx);
1131 	return (error);
1132 }
1133 
1134 static int
1135 sysctl_msqids(SYSCTL_HANDLER_ARGS)
1136 {
1137 
1138 	return (SYSCTL_OUT(req, msqids,
1139 	    sizeof(struct msqid_ds) * msginfo.msgmni));
1140 }
1141 
1142 SYSCTL_DECL(_kern_ipc);
1143 SYSCTL_INT(_kern_ipc, OID_AUTO, msgmax, CTLFLAG_RD, &msginfo.msgmax, 0, "");
1144 SYSCTL_INT(_kern_ipc, OID_AUTO, msgmni, CTLFLAG_RD, &msginfo.msgmni, 0, "");
1145 SYSCTL_INT(_kern_ipc, OID_AUTO, msgmnb, CTLFLAG_RD, &msginfo.msgmnb, 0, "");
1146 SYSCTL_INT(_kern_ipc, OID_AUTO, msgtql, CTLFLAG_RD, &msginfo.msgtql, 0, "");
1147 SYSCTL_INT(_kern_ipc, OID_AUTO, msgssz, CTLFLAG_RD, &msginfo.msgssz, 0, "");
1148 SYSCTL_INT(_kern_ipc, OID_AUTO, msgseg, CTLFLAG_RD, &msginfo.msgseg, 0, "");
1149 SYSCTL_PROC(_kern_ipc, OID_AUTO, msqids, CTLFLAG_RD,
1150     NULL, 0, sysctl_msqids, "", "Message queue IDs");
1151