xref: /freebsd/sys/kern/sysv_msg.c (revision a2f88a8b7c91e72fbde3648a90733feadd3a7ca7)
1 /*
2  * Implementation of SVID messages
3  *
4  * Author:  Daniel Boulet
5  *
6  * Copyright 1993 Daniel Boulet and RTMX Inc.
7  *
8  * This system call was implemented by Daniel Boulet under contract from RTMX.
9  *
10  * Redistribution and use in source forms, with and without modification,
11  * are permitted provided that this entire comment appears intact.
12  *
13  * Redistribution in binary form may occur without any restrictions.
14  * Obviously, it would be nice if you gave credit where credit is due
15  * but requiring it would be too onerous.
16  *
17  * This software is provided ``AS IS'' without any warranties of any kind.
18  */
19 
20 #include <sys/cdefs.h>
21 __FBSDID("$FreeBSD$");
22 
23 #include "opt_sysvipc.h"
24 
25 #include <sys/param.h>
26 #include <sys/systm.h>
27 #include <sys/sysproto.h>
28 #include <sys/kernel.h>
29 #include <sys/proc.h>
30 #include <sys/lock.h>
31 #include <sys/mutex.h>
32 #include <sys/msg.h>
33 #include <sys/syscall.h>
34 #include <sys/sysent.h>
35 #include <sys/sysctl.h>
36 #include <sys/malloc.h>
37 #include <sys/jail.h>
38 
39 static MALLOC_DEFINE(M_MSG, "msg", "SVID compatible message queues");
40 
41 static void msginit(void);
42 static int msgunload(void);
43 static int sysvmsg_modload(struct module *, int, void *);
44 
45 #ifdef MSG_DEBUG
46 #define DPRINTF(a)	printf a
47 #else
48 #define DPRINTF(a)
49 #endif
50 
51 static void msg_freehdr(struct msg *msghdr);
52 
53 /* XXX casting to (sy_call_t *) is bogus, as usual. */
54 static sy_call_t *msgcalls[] = {
55 	(sy_call_t *)msgctl, (sy_call_t *)msgget,
56 	(sy_call_t *)msgsnd, (sy_call_t *)msgrcv
57 };
58 
59 struct msg {
60 	struct	msg *msg_next;	/* next msg in the chain */
61 	long	msg_type;	/* type of this message */
62     				/* >0 -> type of this message */
63     				/* 0 -> free header */
64 	u_short	msg_ts;		/* size of this message */
65 	short	msg_spot;	/* location of start of msg in buffer */
66 };
67 
68 
69 #ifndef MSGSSZ
70 #define MSGSSZ	8		/* Each segment must be 2^N long */
71 #endif
72 #ifndef MSGSEG
73 #define MSGSEG	2048		/* must be less than 32767 */
74 #endif
75 #define MSGMAX	(MSGSSZ*MSGSEG)
76 #ifndef MSGMNB
77 #define MSGMNB	2048		/* max # of bytes in a queue */
78 #endif
79 #ifndef MSGMNI
80 #define MSGMNI	40
81 #endif
82 #ifndef MSGTQL
83 #define MSGTQL	40
84 #endif
85 
86 /*
87  * Based on the configuration parameters described in an SVR2 (yes, two)
88  * config(1m) man page.
89  *
90  * Each message is broken up and stored in segments that are msgssz bytes
91  * long.  For efficiency reasons, this should be a power of two.  Also,
92  * it doesn't make sense if it is less than 8 or greater than about 256.
93  * Consequently, msginit in kern/sysv_msg.c checks that msgssz is a power of
94  * two between 8 and 1024 inclusive (and panic's if it isn't).
95  */
96 struct msginfo msginfo = {
97                 MSGMAX,         /* max chars in a message */
98                 MSGMNI,         /* # of message queue identifiers */
99                 MSGMNB,         /* max chars in a queue */
100                 MSGTQL,         /* max messages in system */
101                 MSGSSZ,         /* size of a message segment */
102                 		/* (must be small power of 2 greater than 4) */
103                 MSGSEG          /* number of message segments */
104 };
105 
106 /*
107  * macros to convert between msqid_ds's and msqid's.
108  * (specific to this implementation)
109  */
110 #define MSQID(ix,ds)	((ix) & 0xffff | (((ds).msg_perm.seq << 16) & 0xffff0000))
111 #define MSQID_IX(id)	((id) & 0xffff)
112 #define MSQID_SEQ(id)	(((id) >> 16) & 0xffff)
113 
114 /*
115  * The rest of this file is specific to this particular implementation.
116  */
117 
118 struct msgmap {
119 	short	next;		/* next segment in buffer */
120     				/* -1 -> available */
121     				/* 0..(MSGSEG-1) -> index of next segment */
122 };
123 
124 #define MSG_LOCKED	01000	/* Is this msqid_ds locked? */
125 
126 static int nfree_msgmaps;	/* # of free map entries */
127 static short free_msgmaps;	/* head of linked list of free map entries */
128 static struct msg *free_msghdrs;/* list of free msg headers */
129 static char *msgpool;		/* MSGMAX byte long msg buffer pool */
130 static struct msgmap *msgmaps;	/* MSGSEG msgmap structures */
131 static struct msg *msghdrs;	/* MSGTQL msg headers */
132 static struct msqid_ds *msqids;	/* MSGMNI msqid_ds struct's */
133 static struct mtx msq_mtx;	/* global mutex for message queues. */
134 
135 static void
136 msginit()
137 {
138 	register int i;
139 
140 	TUNABLE_INT_FETCH("kern.ipc.msgseg", &msginfo.msgseg);
141 	TUNABLE_INT_FETCH("kern.ipc.msgssz", &msginfo.msgssz);
142 	msginfo.msgmax = msginfo.msgseg * msginfo.msgssz;
143 	TUNABLE_INT_FETCH("kern.ipc.msgmni", &msginfo.msgmni);
144 
145 	msgpool = malloc(msginfo.msgmax, M_MSG, M_WAITOK);
146 	if (msgpool == NULL)
147 		panic("msgpool is NULL");
148 	msgmaps = malloc(sizeof(struct msgmap) * msginfo.msgseg, M_MSG, M_WAITOK);
149 	if (msgmaps == NULL)
150 		panic("msgmaps is NULL");
151 	msghdrs = malloc(sizeof(struct msg) * msginfo.msgtql, M_MSG, M_WAITOK);
152 	if (msghdrs == NULL)
153 		panic("msghdrs is NULL");
154 	msqids = malloc(sizeof(struct msqid_ds) * msginfo.msgmni, M_MSG, M_WAITOK);
155 	if (msqids == NULL)
156 		panic("msqids is NULL");
157 
158 	/*
159 	 * msginfo.msgssz should be a power of two for efficiency reasons.
160 	 * It is also pretty silly if msginfo.msgssz is less than 8
161 	 * or greater than about 256 so ...
162 	 */
163 
164 	i = 8;
165 	while (i < 1024 && i != msginfo.msgssz)
166 		i <<= 1;
167     	if (i != msginfo.msgssz) {
168 		DPRINTF(("msginfo.msgssz=%d (0x%x)\n", msginfo.msgssz,
169 		    msginfo.msgssz));
170 		panic("msginfo.msgssz not a small power of 2");
171 	}
172 
173 	if (msginfo.msgseg > 32767) {
174 		DPRINTF(("msginfo.msgseg=%d\n", msginfo.msgseg));
175 		panic("msginfo.msgseg > 32767");
176 	}
177 
178 	if (msgmaps == NULL)
179 		panic("msgmaps is NULL");
180 
181 	for (i = 0; i < msginfo.msgseg; i++) {
182 		if (i > 0)
183 			msgmaps[i-1].next = i;
184 		msgmaps[i].next = -1;	/* implies entry is available */
185 	}
186 	free_msgmaps = 0;
187 	nfree_msgmaps = msginfo.msgseg;
188 
189 	if (msghdrs == NULL)
190 		panic("msghdrs is NULL");
191 
192 	for (i = 0; i < msginfo.msgtql; i++) {
193 		msghdrs[i].msg_type = 0;
194 		if (i > 0)
195 			msghdrs[i-1].msg_next = &msghdrs[i];
196 		msghdrs[i].msg_next = NULL;
197     	}
198 	free_msghdrs = &msghdrs[0];
199 
200 	if (msqids == NULL)
201 		panic("msqids is NULL");
202 
203 	for (i = 0; i < msginfo.msgmni; i++) {
204 		msqids[i].msg_qbytes = 0;	/* implies entry is available */
205 		msqids[i].msg_perm.seq = 0;	/* reset to a known value */
206 		msqids[i].msg_perm.mode = 0;
207 	}
208 	mtx_init(&msq_mtx, "msq", NULL, MTX_DEF);
209 }
210 
211 static int
212 msgunload()
213 {
214 	struct msqid_ds *msqptr;
215 	int msqid;
216 
217 	for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
218 		/*
219 		 * Look for an unallocated and unlocked msqid_ds.
220 		 * msqid_ds's can be locked by msgsnd or msgrcv while
221 		 * they are copying the message in/out.  We can't
222 		 * re-use the entry until they release it.
223 		 */
224 		msqptr = &msqids[msqid];
225 		if (msqptr->msg_qbytes != 0 ||
226 		    (msqptr->msg_perm.mode & MSG_LOCKED) != 0)
227 			break;
228 	}
229 	if (msqid != msginfo.msgmni)
230 		return (EBUSY);
231 
232 	free(msgpool, M_MSG);
233 	free(msgmaps, M_MSG);
234 	free(msghdrs, M_MSG);
235 	free(msqids, M_MSG);
236 	mtx_destroy(&msq_mtx);
237 	return (0);
238 }
239 
240 
241 static int
242 sysvmsg_modload(struct module *module, int cmd, void *arg)
243 {
244 	int error = 0;
245 
246 	switch (cmd) {
247 	case MOD_LOAD:
248 		msginit();
249 		break;
250 	case MOD_UNLOAD:
251 		error = msgunload();
252 		break;
253 	case MOD_SHUTDOWN:
254 		break;
255 	default:
256 		error = EINVAL;
257 		break;
258 	}
259 	return (error);
260 }
261 
262 static moduledata_t sysvmsg_mod = {
263 	"sysvmsg",
264 	&sysvmsg_modload,
265 	NULL
266 };
267 
268 SYSCALL_MODULE_HELPER(msgsys);
269 SYSCALL_MODULE_HELPER(msgctl);
270 SYSCALL_MODULE_HELPER(msgget);
271 SYSCALL_MODULE_HELPER(msgsnd);
272 SYSCALL_MODULE_HELPER(msgrcv);
273 
274 DECLARE_MODULE(sysvmsg, sysvmsg_mod,
275 	SI_SUB_SYSV_MSG, SI_ORDER_FIRST);
276 MODULE_VERSION(sysvmsg, 1);
277 
278 /*
279  * Entry point for all MSG calls
280  *
281  * MPSAFE
282  */
283 int
284 msgsys(td, uap)
285 	struct thread *td;
286 	/* XXX actually varargs. */
287 	struct msgsys_args /* {
288 		int	which;
289 		int	a2;
290 		int	a3;
291 		int	a4;
292 		int	a5;
293 		int	a6;
294 	} */ *uap;
295 {
296 	int error;
297 
298 	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
299 		return (ENOSYS);
300 	if (uap->which < 0 ||
301 	    uap->which >= sizeof(msgcalls)/sizeof(msgcalls[0]))
302 		return (EINVAL);
303 	error = (*msgcalls[uap->which])(td, &uap->a2);
304 	return (error);
305 }
306 
307 static void
308 msg_freehdr(msghdr)
309 	struct msg *msghdr;
310 {
311 	while (msghdr->msg_ts > 0) {
312 		short next;
313 		if (msghdr->msg_spot < 0 || msghdr->msg_spot >= msginfo.msgseg)
314 			panic("msghdr->msg_spot out of range");
315 		next = msgmaps[msghdr->msg_spot].next;
316 		msgmaps[msghdr->msg_spot].next = free_msgmaps;
317 		free_msgmaps = msghdr->msg_spot;
318 		nfree_msgmaps++;
319 		msghdr->msg_spot = next;
320 		if (msghdr->msg_ts >= msginfo.msgssz)
321 			msghdr->msg_ts -= msginfo.msgssz;
322 		else
323 			msghdr->msg_ts = 0;
324 	}
325 	if (msghdr->msg_spot != -1)
326 		panic("msghdr->msg_spot != -1");
327 	msghdr->msg_next = free_msghdrs;
328 	free_msghdrs = msghdr;
329 }
330 
331 #ifndef _SYS_SYSPROTO_H_
332 struct msgctl_args {
333 	int	msqid;
334 	int	cmd;
335 	struct	msqid_ds *buf;
336 };
337 #endif
338 
339 /*
340  * MPSAFE
341  */
342 int
343 msgctl(td, uap)
344 	struct thread *td;
345 	register struct msgctl_args *uap;
346 {
347 	int msqid = uap->msqid;
348 	int cmd = uap->cmd;
349 	struct msqid_ds *user_msqptr = uap->buf;
350 	int rval, error;
351 	struct msqid_ds msqbuf;
352 	register struct msqid_ds *msqptr;
353 
354 	DPRINTF(("call to msgctl(%d, %d, 0x%x)\n", msqid, cmd, user_msqptr));
355 	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
356 		return (ENOSYS);
357 
358 	msqid = IPCID_TO_IX(msqid);
359 
360 	if (msqid < 0 || msqid >= msginfo.msgmni) {
361 		DPRINTF(("msqid (%d) out of range (0<=msqid<%d)\n", msqid,
362 		    msginfo.msgmni));
363 		return (EINVAL);
364 	}
365 	if (cmd == IPC_SET &&
366 	    (error = copyin(user_msqptr, &msqbuf, sizeof(msqbuf))) != 0)
367 		return (error);
368 
369 	msqptr = &msqids[msqid];
370 
371 	mtx_lock(&msq_mtx);
372 	if (msqptr->msg_qbytes == 0) {
373 		DPRINTF(("no such msqid\n"));
374 		error = EINVAL;
375 		goto done2;
376 	}
377 	if (msqptr->msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
378 		DPRINTF(("wrong sequence number\n"));
379 		error = EINVAL;
380 		goto done2;
381 	}
382 
383 	error = 0;
384 	rval = 0;
385 
386 	switch (cmd) {
387 
388 	case IPC_RMID:
389 	{
390 		struct msg *msghdr;
391 		if ((error = ipcperm(td, &msqptr->msg_perm, IPC_M)))
392 			goto done2;
393 		/* Free the message headers */
394 		msghdr = msqptr->msg_first;
395 		while (msghdr != NULL) {
396 			struct msg *msghdr_tmp;
397 
398 			/* Free the segments of each message */
399 			msqptr->msg_cbytes -= msghdr->msg_ts;
400 			msqptr->msg_qnum--;
401 			msghdr_tmp = msghdr;
402 			msghdr = msghdr->msg_next;
403 			msg_freehdr(msghdr_tmp);
404 		}
405 
406 		if (msqptr->msg_cbytes != 0)
407 			panic("msg_cbytes is screwed up");
408 		if (msqptr->msg_qnum != 0)
409 			panic("msg_qnum is screwed up");
410 
411 		msqptr->msg_qbytes = 0;	/* Mark it as free */
412 
413 		wakeup(msqptr);
414 	}
415 
416 		break;
417 
418 	case IPC_SET:
419 		if ((error = ipcperm(td, &msqptr->msg_perm, IPC_M)))
420 			goto done2;
421 		if (msqbuf.msg_qbytes > msqptr->msg_qbytes) {
422 			error = suser(td);
423 			if (error)
424 				goto done2;
425 		}
426 		if (msqbuf.msg_qbytes > msginfo.msgmnb) {
427 			DPRINTF(("can't increase msg_qbytes beyond %d"
428 			    "(truncating)\n", msginfo.msgmnb));
429 			msqbuf.msg_qbytes = msginfo.msgmnb;	/* silently restrict qbytes to system limit */
430 		}
431 		if (msqbuf.msg_qbytes == 0) {
432 			DPRINTF(("can't reduce msg_qbytes to 0\n"));
433 			error = EINVAL;		/* non-standard errno! */
434 			goto done2;
435 		}
436 		msqptr->msg_perm.uid = msqbuf.msg_perm.uid;	/* change the owner */
437 		msqptr->msg_perm.gid = msqbuf.msg_perm.gid;	/* change the owner */
438 		msqptr->msg_perm.mode = (msqptr->msg_perm.mode & ~0777) |
439 		    (msqbuf.msg_perm.mode & 0777);
440 		msqptr->msg_qbytes = msqbuf.msg_qbytes;
441 		msqptr->msg_ctime = time_second;
442 		break;
443 
444 	case IPC_STAT:
445 		if ((error = ipcperm(td, &msqptr->msg_perm, IPC_R))) {
446 			DPRINTF(("requester doesn't have read access\n"));
447 			goto done2;
448 		}
449 		break;
450 
451 	default:
452 		DPRINTF(("invalid command %d\n", cmd));
453 		error = EINVAL;
454 		goto done2;
455 	}
456 
457 	if (error == 0)
458 		td->td_retval[0] = rval;
459 done2:
460 	mtx_unlock(&msq_mtx);
461 	if (cmd == IPC_STAT && error == 0)
462 		error = copyout(msqptr, user_msqptr, sizeof(struct msqid_ds));
463 	return(error);
464 }
465 
466 #ifndef _SYS_SYSPROTO_H_
467 struct msgget_args {
468 	key_t	key;
469 	int	msgflg;
470 };
471 #endif
472 
473 /*
474  * MPSAFE
475  */
476 int
477 msgget(td, uap)
478 	struct thread *td;
479 	register struct msgget_args *uap;
480 {
481 	int msqid, error = 0;
482 	int key = uap->key;
483 	int msgflg = uap->msgflg;
484 	struct ucred *cred = td->td_ucred;
485 	register struct msqid_ds *msqptr = NULL;
486 
487 	DPRINTF(("msgget(0x%x, 0%o)\n", key, msgflg));
488 
489 	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
490 		return (ENOSYS);
491 
492 	mtx_lock(&msq_mtx);
493 	if (key != IPC_PRIVATE) {
494 		for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
495 			msqptr = &msqids[msqid];
496 			if (msqptr->msg_qbytes != 0 &&
497 			    msqptr->msg_perm.key == key)
498 				break;
499 		}
500 		if (msqid < msginfo.msgmni) {
501 			DPRINTF(("found public key\n"));
502 			if ((msgflg & IPC_CREAT) && (msgflg & IPC_EXCL)) {
503 				DPRINTF(("not exclusive\n"));
504 				error = EEXIST;
505 				goto done2;
506 			}
507 			if ((error = ipcperm(td, &msqptr->msg_perm, msgflg & 0700))) {
508 				DPRINTF(("requester doesn't have 0%o access\n",
509 				    msgflg & 0700));
510 				goto done2;
511 			}
512 			goto found;
513 		}
514 	}
515 
516 	DPRINTF(("need to allocate the msqid_ds\n"));
517 	if (key == IPC_PRIVATE || (msgflg & IPC_CREAT)) {
518 		for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
519 			/*
520 			 * Look for an unallocated and unlocked msqid_ds.
521 			 * msqid_ds's can be locked by msgsnd or msgrcv while
522 			 * they are copying the message in/out.  We can't
523 			 * re-use the entry until they release it.
524 			 */
525 			msqptr = &msqids[msqid];
526 			if (msqptr->msg_qbytes == 0 &&
527 			    (msqptr->msg_perm.mode & MSG_LOCKED) == 0)
528 				break;
529 		}
530 		if (msqid == msginfo.msgmni) {
531 			DPRINTF(("no more msqid_ds's available\n"));
532 			error = ENOSPC;
533 			goto done2;
534 		}
535 		DPRINTF(("msqid %d is available\n", msqid));
536 		msqptr->msg_perm.key = key;
537 		msqptr->msg_perm.cuid = cred->cr_uid;
538 		msqptr->msg_perm.uid = cred->cr_uid;
539 		msqptr->msg_perm.cgid = cred->cr_gid;
540 		msqptr->msg_perm.gid = cred->cr_gid;
541 		msqptr->msg_perm.mode = (msgflg & 0777);
542 		/* Make sure that the returned msqid is unique */
543 		msqptr->msg_perm.seq = (msqptr->msg_perm.seq + 1) & 0x7fff;
544 		msqptr->msg_first = NULL;
545 		msqptr->msg_last = NULL;
546 		msqptr->msg_cbytes = 0;
547 		msqptr->msg_qnum = 0;
548 		msqptr->msg_qbytes = msginfo.msgmnb;
549 		msqptr->msg_lspid = 0;
550 		msqptr->msg_lrpid = 0;
551 		msqptr->msg_stime = 0;
552 		msqptr->msg_rtime = 0;
553 		msqptr->msg_ctime = time_second;
554 	} else {
555 		DPRINTF(("didn't find it and wasn't asked to create it\n"));
556 		error = ENOENT;
557 		goto done2;
558 	}
559 
560 found:
561 	/* Construct the unique msqid */
562 	td->td_retval[0] = IXSEQ_TO_IPCID(msqid, msqptr->msg_perm);
563 done2:
564 	mtx_unlock(&msq_mtx);
565 	return (error);
566 }
567 
568 #ifndef _SYS_SYSPROTO_H_
569 struct msgsnd_args {
570 	int	msqid;
571 	const void	*msgp;
572 	size_t	msgsz;
573 	int	msgflg;
574 };
575 #endif
576 
577 /*
578  * MPSAFE
579  */
580 int
581 msgsnd(td, uap)
582 	struct thread *td;
583 	register struct msgsnd_args *uap;
584 {
585 	int msqid = uap->msqid;
586 	const void *user_msgp = uap->msgp;
587 	size_t msgsz = uap->msgsz;
588 	int msgflg = uap->msgflg;
589 	int segs_needed, error = 0;
590 	register struct msqid_ds *msqptr;
591 	register struct msg *msghdr;
592 	short next;
593 
594 	DPRINTF(("call to msgsnd(%d, 0x%x, %d, %d)\n", msqid, user_msgp, msgsz,
595 	    msgflg));
596 	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
597 		return (ENOSYS);
598 
599 	mtx_lock(&msq_mtx);
600 	msqid = IPCID_TO_IX(msqid);
601 
602 	if (msqid < 0 || msqid >= msginfo.msgmni) {
603 		DPRINTF(("msqid (%d) out of range (0<=msqid<%d)\n", msqid,
604 		    msginfo.msgmni));
605 		error = EINVAL;
606 		goto done2;
607 	}
608 
609 	msqptr = &msqids[msqid];
610 	if (msqptr->msg_qbytes == 0) {
611 		DPRINTF(("no such message queue id\n"));
612 		error = EINVAL;
613 		goto done2;
614 	}
615 	if (msqptr->msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
616 		DPRINTF(("wrong sequence number\n"));
617 		error = EINVAL;
618 		goto done2;
619 	}
620 
621 	if ((error = ipcperm(td, &msqptr->msg_perm, IPC_W))) {
622 		DPRINTF(("requester doesn't have write access\n"));
623 		goto done2;
624 	}
625 
626 	segs_needed = (msgsz + msginfo.msgssz - 1) / msginfo.msgssz;
627 	DPRINTF(("msgsz=%d, msgssz=%d, segs_needed=%d\n", msgsz, msginfo.msgssz,
628 	    segs_needed));
629 	for (;;) {
630 		int need_more_resources = 0;
631 
632 		/*
633 		 * check msgsz
634 		 * (inside this loop in case msg_qbytes changes while we sleep)
635 		 */
636 
637 		if (msgsz > msqptr->msg_qbytes) {
638 			DPRINTF(("msgsz > msqptr->msg_qbytes\n"));
639 			error = EINVAL;
640 			goto done2;
641 		}
642 
643 		if (msqptr->msg_perm.mode & MSG_LOCKED) {
644 			DPRINTF(("msqid is locked\n"));
645 			need_more_resources = 1;
646 		}
647 		if (msgsz + msqptr->msg_cbytes > msqptr->msg_qbytes) {
648 			DPRINTF(("msgsz + msg_cbytes > msg_qbytes\n"));
649 			need_more_resources = 1;
650 		}
651 		if (segs_needed > nfree_msgmaps) {
652 			DPRINTF(("segs_needed > nfree_msgmaps\n"));
653 			need_more_resources = 1;
654 		}
655 		if (free_msghdrs == NULL) {
656 			DPRINTF(("no more msghdrs\n"));
657 			need_more_resources = 1;
658 		}
659 
660 		if (need_more_resources) {
661 			int we_own_it;
662 
663 			if ((msgflg & IPC_NOWAIT) != 0) {
664 				DPRINTF(("need more resources but caller "
665 				    "doesn't want to wait\n"));
666 				error = EAGAIN;
667 				goto done2;
668 			}
669 
670 			if ((msqptr->msg_perm.mode & MSG_LOCKED) != 0) {
671 				DPRINTF(("we don't own the msqid_ds\n"));
672 				we_own_it = 0;
673 			} else {
674 				/* Force later arrivals to wait for our
675 				   request */
676 				DPRINTF(("we own the msqid_ds\n"));
677 				msqptr->msg_perm.mode |= MSG_LOCKED;
678 				we_own_it = 1;
679 			}
680 			DPRINTF(("goodnight\n"));
681 			error = msleep(msqptr, &msq_mtx, (PZERO - 4) | PCATCH,
682 			    "msgwait", 0);
683 			DPRINTF(("good morning, error=%d\n", error));
684 			if (we_own_it)
685 				msqptr->msg_perm.mode &= ~MSG_LOCKED;
686 			if (error != 0) {
687 				DPRINTF(("msgsnd:  interrupted system call\n"));
688 				error = EINTR;
689 				goto done2;
690 			}
691 
692 			/*
693 			 * Make sure that the msq queue still exists
694 			 */
695 
696 			if (msqptr->msg_qbytes == 0) {
697 				DPRINTF(("msqid deleted\n"));
698 				error = EIDRM;
699 				goto done2;
700 			}
701 
702 		} else {
703 			DPRINTF(("got all the resources that we need\n"));
704 			break;
705 		}
706 	}
707 
708 	/*
709 	 * We have the resources that we need.
710 	 * Make sure!
711 	 */
712 
713 	if (msqptr->msg_perm.mode & MSG_LOCKED)
714 		panic("msg_perm.mode & MSG_LOCKED");
715 	if (segs_needed > nfree_msgmaps)
716 		panic("segs_needed > nfree_msgmaps");
717 	if (msgsz + msqptr->msg_cbytes > msqptr->msg_qbytes)
718 		panic("msgsz + msg_cbytes > msg_qbytes");
719 	if (free_msghdrs == NULL)
720 		panic("no more msghdrs");
721 
722 	/*
723 	 * Re-lock the msqid_ds in case we page-fault when copying in the
724 	 * message
725 	 */
726 
727 	if ((msqptr->msg_perm.mode & MSG_LOCKED) != 0)
728 		panic("msqid_ds is already locked");
729 	msqptr->msg_perm.mode |= MSG_LOCKED;
730 
731 	/*
732 	 * Allocate a message header
733 	 */
734 
735 	msghdr = free_msghdrs;
736 	free_msghdrs = msghdr->msg_next;
737 	msghdr->msg_spot = -1;
738 	msghdr->msg_ts = msgsz;
739 
740 	/*
741 	 * Allocate space for the message
742 	 */
743 
744 	while (segs_needed > 0) {
745 		if (nfree_msgmaps <= 0)
746 			panic("not enough msgmaps");
747 		if (free_msgmaps == -1)
748 			panic("nil free_msgmaps");
749 		next = free_msgmaps;
750 		if (next <= -1)
751 			panic("next too low #1");
752 		if (next >= msginfo.msgseg)
753 			panic("next out of range #1");
754 		DPRINTF(("allocating segment %d to message\n", next));
755 		free_msgmaps = msgmaps[next].next;
756 		nfree_msgmaps--;
757 		msgmaps[next].next = msghdr->msg_spot;
758 		msghdr->msg_spot = next;
759 		segs_needed--;
760 	}
761 
762 	/*
763 	 * Copy in the message type
764 	 */
765 
766 	mtx_unlock(&msq_mtx);
767 	if ((error = copyin(user_msgp, &msghdr->msg_type,
768 	    sizeof(msghdr->msg_type))) != 0) {
769 		mtx_lock(&msq_mtx);
770 		DPRINTF(("error %d copying the message type\n", error));
771 		msg_freehdr(msghdr);
772 		msqptr->msg_perm.mode &= ~MSG_LOCKED;
773 		wakeup(msqptr);
774 		goto done2;
775 	}
776 	mtx_lock(&msq_mtx);
777 	user_msgp = (const char *)user_msgp + sizeof(msghdr->msg_type);
778 
779 	/*
780 	 * Validate the message type
781 	 */
782 
783 	if (msghdr->msg_type < 1) {
784 		msg_freehdr(msghdr);
785 		msqptr->msg_perm.mode &= ~MSG_LOCKED;
786 		wakeup(msqptr);
787 		DPRINTF(("mtype (%d) < 1\n", msghdr->msg_type));
788 		error = EINVAL;
789 		goto done2;
790 	}
791 
792 	/*
793 	 * Copy in the message body
794 	 */
795 
796 	next = msghdr->msg_spot;
797 	while (msgsz > 0) {
798 		size_t tlen;
799 		if (msgsz > msginfo.msgssz)
800 			tlen = msginfo.msgssz;
801 		else
802 			tlen = msgsz;
803 		if (next <= -1)
804 			panic("next too low #2");
805 		if (next >= msginfo.msgseg)
806 			panic("next out of range #2");
807 		mtx_unlock(&msq_mtx);
808 		if ((error = copyin(user_msgp, &msgpool[next * msginfo.msgssz],
809 		    tlen)) != 0) {
810 			mtx_lock(&msq_mtx);
811 			DPRINTF(("error %d copying in message segment\n",
812 			    error));
813 			msg_freehdr(msghdr);
814 			msqptr->msg_perm.mode &= ~MSG_LOCKED;
815 			wakeup(msqptr);
816 			goto done2;
817 		}
818 		mtx_lock(&msq_mtx);
819 		msgsz -= tlen;
820 		user_msgp = (const char *)user_msgp + tlen;
821 		next = msgmaps[next].next;
822 	}
823 	if (next != -1)
824 		panic("didn't use all the msg segments");
825 
826 	/*
827 	 * We've got the message.  Unlock the msqid_ds.
828 	 */
829 
830 	msqptr->msg_perm.mode &= ~MSG_LOCKED;
831 
832 	/*
833 	 * Make sure that the msqid_ds is still allocated.
834 	 */
835 
836 	if (msqptr->msg_qbytes == 0) {
837 		msg_freehdr(msghdr);
838 		wakeup(msqptr);
839 		error = EIDRM;
840 		goto done2;
841 	}
842 
843 	/*
844 	 * Put the message into the queue
845 	 */
846 
847 	if (msqptr->msg_first == NULL) {
848 		msqptr->msg_first = msghdr;
849 		msqptr->msg_last = msghdr;
850 	} else {
851 		msqptr->msg_last->msg_next = msghdr;
852 		msqptr->msg_last = msghdr;
853 	}
854 	msqptr->msg_last->msg_next = NULL;
855 
856 	msqptr->msg_cbytes += msghdr->msg_ts;
857 	msqptr->msg_qnum++;
858 	msqptr->msg_lspid = td->td_proc->p_pid;
859 	msqptr->msg_stime = time_second;
860 
861 	wakeup(msqptr);
862 	td->td_retval[0] = 0;
863 done2:
864 	mtx_unlock(&msq_mtx);
865 	return (error);
866 }
867 
868 #ifndef _SYS_SYSPROTO_H_
869 struct msgrcv_args {
870 	int	msqid;
871 	void	*msgp;
872 	size_t	msgsz;
873 	long	msgtyp;
874 	int	msgflg;
875 };
876 #endif
877 
878 /*
879  * MPSAFE
880  */
881 int
882 msgrcv(td, uap)
883 	struct thread *td;
884 	register struct msgrcv_args *uap;
885 {
886 	int msqid = uap->msqid;
887 	void *user_msgp = uap->msgp;
888 	size_t msgsz = uap->msgsz;
889 	long msgtyp = uap->msgtyp;
890 	int msgflg = uap->msgflg;
891 	size_t len;
892 	register struct msqid_ds *msqptr;
893 	register struct msg *msghdr;
894 	int error = 0;
895 	short next;
896 
897 	DPRINTF(("call to msgrcv(%d, 0x%x, %d, %ld, %d)\n", msqid, user_msgp,
898 	    msgsz, msgtyp, msgflg));
899 
900 	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
901 		return (ENOSYS);
902 
903 	msqid = IPCID_TO_IX(msqid);
904 
905 	if (msqid < 0 || msqid >= msginfo.msgmni) {
906 		DPRINTF(("msqid (%d) out of range (0<=msqid<%d)\n", msqid,
907 		    msginfo.msgmni));
908 		return (EINVAL);
909 	}
910 
911 	msqptr = &msqids[msqid];
912 	mtx_lock(&msq_mtx);
913 	if (msqptr->msg_qbytes == 0) {
914 		DPRINTF(("no such message queue id\n"));
915 		error = EINVAL;
916 		goto done2;
917 	}
918 	if (msqptr->msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
919 		DPRINTF(("wrong sequence number\n"));
920 		error = EINVAL;
921 		goto done2;
922 	}
923 
924 	if ((error = ipcperm(td, &msqptr->msg_perm, IPC_R))) {
925 		DPRINTF(("requester doesn't have read access\n"));
926 		goto done2;
927 	}
928 
929 	msghdr = NULL;
930 	while (msghdr == NULL) {
931 		if (msgtyp == 0) {
932 			msghdr = msqptr->msg_first;
933 			if (msghdr != NULL) {
934 				if (msgsz < msghdr->msg_ts &&
935 				    (msgflg & MSG_NOERROR) == 0) {
936 					DPRINTF(("first message on the queue "
937 					    "is too big (want %d, got %d)\n",
938 					    msgsz, msghdr->msg_ts));
939 					error = E2BIG;
940 					goto done2;
941 				}
942 				if (msqptr->msg_first == msqptr->msg_last) {
943 					msqptr->msg_first = NULL;
944 					msqptr->msg_last = NULL;
945 				} else {
946 					msqptr->msg_first = msghdr->msg_next;
947 					if (msqptr->msg_first == NULL)
948 						panic("msg_first/last screwed up #1");
949 				}
950 			}
951 		} else {
952 			struct msg *previous;
953 			struct msg **prev;
954 
955 			previous = NULL;
956 			prev = &(msqptr->msg_first);
957 			while ((msghdr = *prev) != NULL) {
958 				/*
959 				 * Is this message's type an exact match or is
960 				 * this message's type less than or equal to
961 				 * the absolute value of a negative msgtyp?
962 				 * Note that the second half of this test can
963 				 * NEVER be true if msgtyp is positive since
964 				 * msg_type is always positive!
965 				 */
966 
967 				if (msgtyp == msghdr->msg_type ||
968 				    msghdr->msg_type <= -msgtyp) {
969 					DPRINTF(("found message type %d, "
970 					    "requested %d\n",
971 					    msghdr->msg_type, msgtyp));
972 					if (msgsz < msghdr->msg_ts &&
973 					    (msgflg & MSG_NOERROR) == 0) {
974 						DPRINTF(("requested message "
975 						    "on the queue is too big "
976 						    "(want %d, got %d)\n",
977 						    msgsz, msghdr->msg_ts));
978 						error = E2BIG;
979 						goto done2;
980 					}
981 					*prev = msghdr->msg_next;
982 					if (msghdr == msqptr->msg_last) {
983 						if (previous == NULL) {
984 							if (prev !=
985 							    &msqptr->msg_first)
986 								panic("msg_first/last screwed up #2");
987 							msqptr->msg_first =
988 							    NULL;
989 							msqptr->msg_last =
990 							    NULL;
991 						} else {
992 							if (prev ==
993 							    &msqptr->msg_first)
994 								panic("msg_first/last screwed up #3");
995 							msqptr->msg_last =
996 							    previous;
997 						}
998 					}
999 					break;
1000 				}
1001 				previous = msghdr;
1002 				prev = &(msghdr->msg_next);
1003 			}
1004 		}
1005 
1006 		/*
1007 		 * We've either extracted the msghdr for the appropriate
1008 		 * message or there isn't one.
1009 		 * If there is one then bail out of this loop.
1010 		 */
1011 
1012 		if (msghdr != NULL)
1013 			break;
1014 
1015 		/*
1016 		 * Hmph!  No message found.  Does the user want to wait?
1017 		 */
1018 
1019 		if ((msgflg & IPC_NOWAIT) != 0) {
1020 			DPRINTF(("no appropriate message found (msgtyp=%d)\n",
1021 			    msgtyp));
1022 			/* The SVID says to return ENOMSG. */
1023 			error = ENOMSG;
1024 			goto done2;
1025 		}
1026 
1027 		/*
1028 		 * Wait for something to happen
1029 		 */
1030 
1031 		DPRINTF(("msgrcv:  goodnight\n"));
1032 		error = msleep(msqptr, &msq_mtx, (PZERO - 4) | PCATCH,
1033 		    "msgwait", 0);
1034 		DPRINTF(("msgrcv:  good morning (error=%d)\n", error));
1035 
1036 		if (error != 0) {
1037 			DPRINTF(("msgsnd:  interrupted system call\n"));
1038 			error = EINTR;
1039 			goto done2;
1040 		}
1041 
1042 		/*
1043 		 * Make sure that the msq queue still exists
1044 		 */
1045 
1046 		if (msqptr->msg_qbytes == 0 ||
1047 		    msqptr->msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
1048 			DPRINTF(("msqid deleted\n"));
1049 			error = EIDRM;
1050 			goto done2;
1051 		}
1052 	}
1053 
1054 	/*
1055 	 * Return the message to the user.
1056 	 *
1057 	 * First, do the bookkeeping (before we risk being interrupted).
1058 	 */
1059 
1060 	msqptr->msg_cbytes -= msghdr->msg_ts;
1061 	msqptr->msg_qnum--;
1062 	msqptr->msg_lrpid = td->td_proc->p_pid;
1063 	msqptr->msg_rtime = time_second;
1064 
1065 	/*
1066 	 * Make msgsz the actual amount that we'll be returning.
1067 	 * Note that this effectively truncates the message if it is too long
1068 	 * (since msgsz is never increased).
1069 	 */
1070 
1071 	DPRINTF(("found a message, msgsz=%d, msg_ts=%d\n", msgsz,
1072 	    msghdr->msg_ts));
1073 	if (msgsz > msghdr->msg_ts)
1074 		msgsz = msghdr->msg_ts;
1075 
1076 	/*
1077 	 * Return the type to the user.
1078 	 */
1079 
1080 	mtx_unlock(&msq_mtx);
1081 	error = copyout(&(msghdr->msg_type), user_msgp,
1082 	    sizeof(msghdr->msg_type));
1083 	mtx_lock(&msq_mtx);
1084 	if (error != 0) {
1085 		DPRINTF(("error (%d) copying out message type\n", error));
1086 		msg_freehdr(msghdr);
1087 		wakeup(msqptr);
1088 		goto done2;
1089 	}
1090 	user_msgp = (char *)user_msgp + sizeof(msghdr->msg_type);
1091 
1092 	/*
1093 	 * Return the segments to the user
1094 	 */
1095 
1096 	next = msghdr->msg_spot;
1097 	for (len = 0; len < msgsz; len += msginfo.msgssz) {
1098 		size_t tlen;
1099 
1100 		if (msgsz - len > msginfo.msgssz)
1101 			tlen = msginfo.msgssz;
1102 		else
1103 			tlen = msgsz - len;
1104 		if (next <= -1)
1105 			panic("next too low #3");
1106 		if (next >= msginfo.msgseg)
1107 			panic("next out of range #3");
1108 		mtx_unlock(&msq_mtx);
1109 		error = copyout(&msgpool[next * msginfo.msgssz],
1110 		    user_msgp, tlen);
1111 		mtx_lock(&msq_mtx);
1112 		if (error != 0) {
1113 			DPRINTF(("error (%d) copying out message segment\n",
1114 			    error));
1115 			msg_freehdr(msghdr);
1116 			wakeup(msqptr);
1117 			goto done2;
1118 		}
1119 		user_msgp = (char *)user_msgp + tlen;
1120 		next = msgmaps[next].next;
1121 	}
1122 
1123 	/*
1124 	 * Done, return the actual number of bytes copied out.
1125 	 */
1126 
1127 	msg_freehdr(msghdr);
1128 	wakeup(msqptr);
1129 	td->td_retval[0] = msgsz;
1130 done2:
1131 	mtx_unlock(&msq_mtx);
1132 	return (error);
1133 }
1134 
1135 static int
1136 sysctl_msqids(SYSCTL_HANDLER_ARGS)
1137 {
1138 
1139 	return (SYSCTL_OUT(req, msqids,
1140 	    sizeof(struct msqid_ds) * msginfo.msgmni));
1141 }
1142 
1143 SYSCTL_DECL(_kern_ipc);
1144 SYSCTL_INT(_kern_ipc, OID_AUTO, msgmax, CTLFLAG_RD, &msginfo.msgmax, 0, "");
1145 SYSCTL_INT(_kern_ipc, OID_AUTO, msgmni, CTLFLAG_RDTUN, &msginfo.msgmni, 0, "");
1146 SYSCTL_INT(_kern_ipc, OID_AUTO, msgmnb, CTLFLAG_RD, &msginfo.msgmnb, 0, "");
1147 SYSCTL_INT(_kern_ipc, OID_AUTO, msgtql, CTLFLAG_RD, &msginfo.msgtql, 0, "");
1148 SYSCTL_INT(_kern_ipc, OID_AUTO, msgssz, CTLFLAG_RDTUN, &msginfo.msgssz, 0, "");
1149 SYSCTL_INT(_kern_ipc, OID_AUTO, msgseg, CTLFLAG_RDTUN, &msginfo.msgseg, 0, "");
1150 SYSCTL_PROC(_kern_ipc, OID_AUTO, msqids, CTLFLAG_RD,
1151     NULL, 0, sysctl_msqids, "", "Message queue IDs");
1152