xref: /freebsd/sys/kern/sysv_msg.c (revision b52b9d56d4e96089873a75f9e29062eec19fabba)
1 /* $FreeBSD$ */
2 
3 /*
4  * Implementation of SVID messages
5  *
6  * Author:  Daniel Boulet
7  *
8  * Copyright 1993 Daniel Boulet and RTMX Inc.
9  *
10  * This system call was implemented by Daniel Boulet under contract from RTMX.
11  *
12  * Redistribution and use in source forms, with and without modification,
13  * are permitted provided that this entire comment appears intact.
14  *
15  * Redistribution in binary form may occur without any restrictions.
16  * Obviously, it would be nice if you gave credit where credit is due
17  * but requiring it would be too onerous.
18  *
19  * This software is provided ``AS IS'' without any warranties of any kind.
20  */
21 
22 #include "opt_sysvipc.h"
23 
24 #include <sys/param.h>
25 #include <sys/systm.h>
26 #include <sys/sysproto.h>
27 #include <sys/kernel.h>
28 #include <sys/proc.h>
29 #include <sys/lock.h>
30 #include <sys/mutex.h>
31 #include <sys/msg.h>
32 #include <sys/syscall.h>
33 #include <sys/sysent.h>
34 #include <sys/sysctl.h>
35 #include <sys/malloc.h>
36 #include <sys/jail.h>
37 
38 static MALLOC_DEFINE(M_MSG, "msg", "SVID compatible message queues");
39 
40 static void msginit(void);
41 static int msgunload(void);
42 static int sysvmsg_modload(struct module *, int, void *);
43 
44 #ifdef MSG_DEBUG
45 #define DPRINTF(a)	printf a
46 #else
47 #define DPRINTF(a)
48 #endif
49 
50 static void msg_freehdr(struct msg *msghdr);
51 
52 /* XXX casting to (sy_call_t *) is bogus, as usual. */
53 static sy_call_t *msgcalls[] = {
54 	(sy_call_t *)msgctl, (sy_call_t *)msgget,
55 	(sy_call_t *)msgsnd, (sy_call_t *)msgrcv
56 };
57 
58 struct msg {
59 	struct	msg *msg_next;	/* next msg in the chain */
60 	long	msg_type;	/* type of this message */
61     				/* >0 -> type of this message */
62     				/* 0 -> free header */
63 	u_short	msg_ts;		/* size of this message */
64 	short	msg_spot;	/* location of start of msg in buffer */
65 };
66 
67 
68 #ifndef MSGSSZ
69 #define MSGSSZ	8		/* Each segment must be 2^N long */
70 #endif
71 #ifndef MSGSEG
72 #define MSGSEG	2048		/* must be less than 32767 */
73 #endif
74 #define MSGMAX	(MSGSSZ*MSGSEG)
75 #ifndef MSGMNB
76 #define MSGMNB	2048		/* max # of bytes in a queue */
77 #endif
78 #ifndef MSGMNI
79 #define MSGMNI	40
80 #endif
81 #ifndef MSGTQL
82 #define MSGTQL	40
83 #endif
84 
85 /*
86  * Based on the configuration parameters described in an SVR2 (yes, two)
87  * config(1m) man page.
88  *
89  * Each message is broken up and stored in segments that are msgssz bytes
90  * long.  For efficiency reasons, this should be a power of two.  Also,
91  * it doesn't make sense if it is less than 8 or greater than about 256.
92  * Consequently, msginit in kern/sysv_msg.c checks that msgssz is a power of
93  * two between 8 and 1024 inclusive (and panic's if it isn't).
94  */
95 struct msginfo msginfo = {
96                 MSGMAX,         /* max chars in a message */
97                 MSGMNI,         /* # of message queue identifiers */
98                 MSGMNB,         /* max chars in a queue */
99                 MSGTQL,         /* max messages in system */
100                 MSGSSZ,         /* size of a message segment */
101                 		/* (must be small power of 2 greater than 4) */
102                 MSGSEG          /* number of message segments */
103 };
104 
105 /*
106  * macros to convert between msqid_ds's and msqid's.
107  * (specific to this implementation)
108  */
109 #define MSQID(ix,ds)	((ix) & 0xffff | (((ds).msg_perm.seq << 16) & 0xffff0000))
110 #define MSQID_IX(id)	((id) & 0xffff)
111 #define MSQID_SEQ(id)	(((id) >> 16) & 0xffff)
112 
113 /*
114  * The rest of this file is specific to this particular implementation.
115  */
116 
117 struct msgmap {
118 	short	next;		/* next segment in buffer */
119     				/* -1 -> available */
120     				/* 0..(MSGSEG-1) -> index of next segment */
121 };
122 
123 #define MSG_LOCKED	01000	/* Is this msqid_ds locked? */
124 
125 static int nfree_msgmaps;	/* # of free map entries */
126 static short free_msgmaps;	/* head of linked list of free map entries */
127 static struct msg *free_msghdrs;/* list of free msg headers */
128 static char *msgpool;		/* MSGMAX byte long msg buffer pool */
129 static struct msgmap *msgmaps;	/* MSGSEG msgmap structures */
130 static struct msg *msghdrs;	/* MSGTQL msg headers */
131 static struct msqid_ds *msqids;	/* MSGMNI msqid_ds struct's */
132 
133 static void
134 msginit()
135 {
136 	register int i;
137 
138 	TUNABLE_INT_FETCH("kern.ipc.msgseg", &msginfo.msgseg);
139 	TUNABLE_INT_FETCH("kern.ipc.msgssz", &msginfo.msgssz);
140 	msginfo.msgmax = msginfo.msgseg * msginfo.msgssz;
141 	TUNABLE_INT_FETCH("kern.ipc.msgmni", &msginfo.msgmni);
142 
143 	msgpool = malloc(msginfo.msgmax, M_MSG, M_WAITOK);
144 	if (msgpool == NULL)
145 		panic("msgpool is NULL");
146 	msgmaps = malloc(sizeof(struct msgmap) * msginfo.msgseg, M_MSG, M_WAITOK);
147 	if (msgmaps == NULL)
148 		panic("msgmaps is NULL");
149 	msghdrs = malloc(sizeof(struct msg) * msginfo.msgtql, M_MSG, M_WAITOK);
150 	if (msghdrs == NULL)
151 		panic("msghdrs is NULL");
152 	msqids = malloc(sizeof(struct msqid_ds) * msginfo.msgmni, M_MSG, M_WAITOK);
153 	if (msqids == NULL)
154 		panic("msqids is NULL");
155 
156 	/*
157 	 * msginfo.msgssz should be a power of two for efficiency reasons.
158 	 * It is also pretty silly if msginfo.msgssz is less than 8
159 	 * or greater than about 256 so ...
160 	 */
161 
162 	i = 8;
163 	while (i < 1024 && i != msginfo.msgssz)
164 		i <<= 1;
165     	if (i != msginfo.msgssz) {
166 		DPRINTF(("msginfo.msgssz=%d (0x%x)\n", msginfo.msgssz,
167 		    msginfo.msgssz));
168 		panic("msginfo.msgssz not a small power of 2");
169 	}
170 
171 	if (msginfo.msgseg > 32767) {
172 		DPRINTF(("msginfo.msgseg=%d\n", msginfo.msgseg));
173 		panic("msginfo.msgseg > 32767");
174 	}
175 
176 	if (msgmaps == NULL)
177 		panic("msgmaps is NULL");
178 
179 	for (i = 0; i < msginfo.msgseg; i++) {
180 		if (i > 0)
181 			msgmaps[i-1].next = i;
182 		msgmaps[i].next = -1;	/* implies entry is available */
183 	}
184 	free_msgmaps = 0;
185 	nfree_msgmaps = msginfo.msgseg;
186 
187 	if (msghdrs == NULL)
188 		panic("msghdrs is NULL");
189 
190 	for (i = 0; i < msginfo.msgtql; i++) {
191 		msghdrs[i].msg_type = 0;
192 		if (i > 0)
193 			msghdrs[i-1].msg_next = &msghdrs[i];
194 		msghdrs[i].msg_next = NULL;
195     	}
196 	free_msghdrs = &msghdrs[0];
197 
198 	if (msqids == NULL)
199 		panic("msqids is NULL");
200 
201 	for (i = 0; i < msginfo.msgmni; i++) {
202 		msqids[i].msg_qbytes = 0;	/* implies entry is available */
203 		msqids[i].msg_perm.seq = 0;	/* reset to a known value */
204 		msqids[i].msg_perm.mode = 0;
205 	}
206 }
207 
208 static int
209 msgunload()
210 {
211 	struct msqid_ds *msqptr;
212 	int msqid;
213 
214 	for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
215 		/*
216 		 * Look for an unallocated and unlocked msqid_ds.
217 		 * msqid_ds's can be locked by msgsnd or msgrcv while
218 		 * they are copying the message in/out.  We can't
219 		 * re-use the entry until they release it.
220 		 */
221 		msqptr = &msqids[msqid];
222 		if (msqptr->msg_qbytes != 0 ||
223 		    (msqptr->msg_perm.mode & MSG_LOCKED) != 0)
224 			break;
225 	}
226 	if (msqid != msginfo.msgmni)
227 		return (EBUSY);
228 
229 	free(msgpool, M_MSG);
230 	free(msgmaps, M_MSG);
231 	free(msghdrs, M_MSG);
232 	free(msqids, M_MSG);
233 	return (0);
234 }
235 
236 
237 static int
238 sysvmsg_modload(struct module *module, int cmd, void *arg)
239 {
240 	int error = 0;
241 
242 	switch (cmd) {
243 	case MOD_LOAD:
244 		msginit();
245 		break;
246 	case MOD_UNLOAD:
247 		error = msgunload();
248 		break;
249 	case MOD_SHUTDOWN:
250 		break;
251 	default:
252 		error = EINVAL;
253 		break;
254 	}
255 	return (error);
256 }
257 
258 static moduledata_t sysvmsg_mod = {
259 	"sysvmsg",
260 	&sysvmsg_modload,
261 	NULL
262 };
263 
264 SYSCALL_MODULE_HELPER(msgsys);
265 SYSCALL_MODULE_HELPER(msgctl);
266 SYSCALL_MODULE_HELPER(msgget);
267 SYSCALL_MODULE_HELPER(msgsnd);
268 SYSCALL_MODULE_HELPER(msgrcv);
269 
270 DECLARE_MODULE(sysvmsg, sysvmsg_mod,
271 	SI_SUB_SYSV_MSG, SI_ORDER_FIRST);
272 MODULE_VERSION(sysvmsg, 1);
273 
274 /*
275  * Entry point for all MSG calls
276  *
277  * MPSAFE
278  */
279 int
280 msgsys(td, uap)
281 	struct thread *td;
282 	/* XXX actually varargs. */
283 	struct msgsys_args /* {
284 		u_int	which;
285 		int	a2;
286 		int	a3;
287 		int	a4;
288 		int	a5;
289 		int	a6;
290 	} */ *uap;
291 {
292 	int error;
293 
294 	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
295 		return (ENOSYS);
296 	if (uap->which >= sizeof(msgcalls)/sizeof(msgcalls[0]))
297 		return (EINVAL);
298 	mtx_lock(&Giant);
299 	error = (*msgcalls[uap->which])(td, &uap->a2);
300 	mtx_unlock(&Giant);
301 	return (error);
302 }
303 
304 static void
305 msg_freehdr(msghdr)
306 	struct msg *msghdr;
307 {
308 	while (msghdr->msg_ts > 0) {
309 		short next;
310 		if (msghdr->msg_spot < 0 || msghdr->msg_spot >= msginfo.msgseg)
311 			panic("msghdr->msg_spot out of range");
312 		next = msgmaps[msghdr->msg_spot].next;
313 		msgmaps[msghdr->msg_spot].next = free_msgmaps;
314 		free_msgmaps = msghdr->msg_spot;
315 		nfree_msgmaps++;
316 		msghdr->msg_spot = next;
317 		if (msghdr->msg_ts >= msginfo.msgssz)
318 			msghdr->msg_ts -= msginfo.msgssz;
319 		else
320 			msghdr->msg_ts = 0;
321 	}
322 	if (msghdr->msg_spot != -1)
323 		panic("msghdr->msg_spot != -1");
324 	msghdr->msg_next = free_msghdrs;
325 	free_msghdrs = msghdr;
326 }
327 
328 #ifndef _SYS_SYSPROTO_H_
329 struct msgctl_args {
330 	int	msqid;
331 	int	cmd;
332 	struct	msqid_ds *buf;
333 };
334 #endif
335 
336 /*
337  * MPSAFE
338  */
339 int
340 msgctl(td, uap)
341 	struct thread *td;
342 	register struct msgctl_args *uap;
343 {
344 	int msqid = uap->msqid;
345 	int cmd = uap->cmd;
346 	struct msqid_ds *user_msqptr = uap->buf;
347 	int rval, error;
348 	struct msqid_ds msqbuf;
349 	register struct msqid_ds *msqptr;
350 
351 	DPRINTF(("call to msgctl(%d, %d, 0x%x)\n", msqid, cmd, user_msqptr));
352 	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
353 		return (ENOSYS);
354 
355 	mtx_lock(&Giant);
356 	msqid = IPCID_TO_IX(msqid);
357 
358 	if (msqid < 0 || msqid >= msginfo.msgmni) {
359 		DPRINTF(("msqid (%d) out of range (0<=msqid<%d)\n", msqid,
360 		    msginfo.msgmni));
361 		error = EINVAL;
362 		goto done2;
363 	}
364 
365 	msqptr = &msqids[msqid];
366 
367 	if (msqptr->msg_qbytes == 0) {
368 		DPRINTF(("no such msqid\n"));
369 		error = EINVAL;
370 		goto done2;
371 	}
372 	if (msqptr->msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
373 		DPRINTF(("wrong sequence number\n"));
374 		error = EINVAL;
375 		goto done2;
376 	}
377 
378 	error = 0;
379 	rval = 0;
380 
381 	switch (cmd) {
382 
383 	case IPC_RMID:
384 	{
385 		struct msg *msghdr;
386 		if ((error = ipcperm(td, &msqptr->msg_perm, IPC_M)))
387 			goto done2;
388 		/* Free the message headers */
389 		msghdr = msqptr->msg_first;
390 		while (msghdr != NULL) {
391 			struct msg *msghdr_tmp;
392 
393 			/* Free the segments of each message */
394 			msqptr->msg_cbytes -= msghdr->msg_ts;
395 			msqptr->msg_qnum--;
396 			msghdr_tmp = msghdr;
397 			msghdr = msghdr->msg_next;
398 			msg_freehdr(msghdr_tmp);
399 		}
400 
401 		if (msqptr->msg_cbytes != 0)
402 			panic("msg_cbytes is screwed up");
403 		if (msqptr->msg_qnum != 0)
404 			panic("msg_qnum is screwed up");
405 
406 		msqptr->msg_qbytes = 0;	/* Mark it as free */
407 
408 		wakeup(msqptr);
409 	}
410 
411 		break;
412 
413 	case IPC_SET:
414 		if ((error = ipcperm(td, &msqptr->msg_perm, IPC_M)))
415 			goto done2;
416 		if ((error = copyin(user_msqptr, &msqbuf, sizeof(msqbuf))) != 0)
417 			goto done2;
418 		if (msqbuf.msg_qbytes > msqptr->msg_qbytes) {
419 			error = suser(td);
420 			if (error)
421 				goto done2;
422 		}
423 		if (msqbuf.msg_qbytes > msginfo.msgmnb) {
424 			DPRINTF(("can't increase msg_qbytes beyond %d"
425 			    "(truncating)\n", msginfo.msgmnb));
426 			msqbuf.msg_qbytes = msginfo.msgmnb;	/* silently restrict qbytes to system limit */
427 		}
428 		if (msqbuf.msg_qbytes == 0) {
429 			DPRINTF(("can't reduce msg_qbytes to 0\n"));
430 			error = EINVAL;		/* non-standard errno! */
431 			goto done2;
432 		}
433 		msqptr->msg_perm.uid = msqbuf.msg_perm.uid;	/* change the owner */
434 		msqptr->msg_perm.gid = msqbuf.msg_perm.gid;	/* change the owner */
435 		msqptr->msg_perm.mode = (msqptr->msg_perm.mode & ~0777) |
436 		    (msqbuf.msg_perm.mode & 0777);
437 		msqptr->msg_qbytes = msqbuf.msg_qbytes;
438 		msqptr->msg_ctime = time_second;
439 		break;
440 
441 	case IPC_STAT:
442 		if ((error = ipcperm(td, &msqptr->msg_perm, IPC_R))) {
443 			DPRINTF(("requester doesn't have read access\n"));
444 			goto done2;
445 		}
446 		error = copyout(msqptr, user_msqptr, sizeof(struct msqid_ds));
447 		break;
448 
449 	default:
450 		DPRINTF(("invalid command %d\n", cmd));
451 		error = EINVAL;
452 		goto done2;
453 	}
454 
455 	if (error == 0)
456 		td->td_retval[0] = rval;
457 done2:
458 	mtx_unlock(&Giant);
459 	return(error);
460 }
461 
462 #ifndef _SYS_SYSPROTO_H_
463 struct msgget_args {
464 	key_t	key;
465 	int	msgflg;
466 };
467 #endif
468 
469 /*
470  * MPSAFE
471  */
472 int
473 msgget(td, uap)
474 	struct thread *td;
475 	register struct msgget_args *uap;
476 {
477 	int msqid, error = 0;
478 	int key = uap->key;
479 	int msgflg = uap->msgflg;
480 	struct ucred *cred = td->td_ucred;
481 	register struct msqid_ds *msqptr = NULL;
482 
483 	DPRINTF(("msgget(0x%x, 0%o)\n", key, msgflg));
484 
485 	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
486 		return (ENOSYS);
487 
488 	mtx_lock(&Giant);
489 	if (key != IPC_PRIVATE) {
490 		for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
491 			msqptr = &msqids[msqid];
492 			if (msqptr->msg_qbytes != 0 &&
493 			    msqptr->msg_perm.key == key)
494 				break;
495 		}
496 		if (msqid < msginfo.msgmni) {
497 			DPRINTF(("found public key\n"));
498 			if ((msgflg & IPC_CREAT) && (msgflg & IPC_EXCL)) {
499 				DPRINTF(("not exclusive\n"));
500 				error = EEXIST;
501 				goto done2;
502 			}
503 			if ((error = ipcperm(td, &msqptr->msg_perm, msgflg & 0700 ))) {
504 				DPRINTF(("requester doesn't have 0%o access\n",
505 				    msgflg & 0700));
506 				goto done2;
507 			}
508 			goto found;
509 		}
510 	}
511 
512 	DPRINTF(("need to allocate the msqid_ds\n"));
513 	if (key == IPC_PRIVATE || (msgflg & IPC_CREAT)) {
514 		for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
515 			/*
516 			 * Look for an unallocated and unlocked msqid_ds.
517 			 * msqid_ds's can be locked by msgsnd or msgrcv while
518 			 * they are copying the message in/out.  We can't
519 			 * re-use the entry until they release it.
520 			 */
521 			msqptr = &msqids[msqid];
522 			if (msqptr->msg_qbytes == 0 &&
523 			    (msqptr->msg_perm.mode & MSG_LOCKED) == 0)
524 				break;
525 		}
526 		if (msqid == msginfo.msgmni) {
527 			DPRINTF(("no more msqid_ds's available\n"));
528 			error = ENOSPC;
529 			goto done2;
530 		}
531 		DPRINTF(("msqid %d is available\n", msqid));
532 		msqptr->msg_perm.key = key;
533 		msqptr->msg_perm.cuid = cred->cr_uid;
534 		msqptr->msg_perm.uid = cred->cr_uid;
535 		msqptr->msg_perm.cgid = cred->cr_gid;
536 		msqptr->msg_perm.gid = cred->cr_gid;
537 		msqptr->msg_perm.mode = (msgflg & 0777);
538 		/* Make sure that the returned msqid is unique */
539 		msqptr->msg_perm.seq++;
540 		msqptr->msg_first = NULL;
541 		msqptr->msg_last = NULL;
542 		msqptr->msg_cbytes = 0;
543 		msqptr->msg_qnum = 0;
544 		msqptr->msg_qbytes = msginfo.msgmnb;
545 		msqptr->msg_lspid = 0;
546 		msqptr->msg_lrpid = 0;
547 		msqptr->msg_stime = 0;
548 		msqptr->msg_rtime = 0;
549 		msqptr->msg_ctime = time_second;
550 	} else {
551 		DPRINTF(("didn't find it and wasn't asked to create it\n"));
552 		error = ENOENT;
553 		goto done2;
554 	}
555 
556 found:
557 	/* Construct the unique msqid */
558 	td->td_retval[0] = IXSEQ_TO_IPCID(msqid, msqptr->msg_perm);
559 done2:
560 	mtx_unlock(&Giant);
561 	return (error);
562 }
563 
564 #ifndef _SYS_SYSPROTO_H_
565 struct msgsnd_args {
566 	int	msqid;
567 	void	*msgp;
568 	size_t	msgsz;
569 	int	msgflg;
570 };
571 #endif
572 
573 /*
574  * MPSAFE
575  */
576 int
577 msgsnd(td, uap)
578 	struct thread *td;
579 	register struct msgsnd_args *uap;
580 {
581 	int msqid = uap->msqid;
582 	void *user_msgp = uap->msgp;
583 	size_t msgsz = uap->msgsz;
584 	int msgflg = uap->msgflg;
585 	int segs_needed, error = 0;
586 	register struct msqid_ds *msqptr;
587 	register struct msg *msghdr;
588 	short next;
589 
590 	DPRINTF(("call to msgsnd(%d, 0x%x, %d, %d)\n", msqid, user_msgp, msgsz,
591 	    msgflg));
592 	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
593 		return (ENOSYS);
594 
595 	mtx_lock(&Giant);
596 	msqid = IPCID_TO_IX(msqid);
597 
598 	if (msqid < 0 || msqid >= msginfo.msgmni) {
599 		DPRINTF(("msqid (%d) out of range (0<=msqid<%d)\n", msqid,
600 		    msginfo.msgmni));
601 		error = EINVAL;
602 		goto done2;
603 	}
604 
605 	msqptr = &msqids[msqid];
606 	if (msqptr->msg_qbytes == 0) {
607 		DPRINTF(("no such message queue id\n"));
608 		error = EINVAL;
609 		goto done2;
610 	}
611 	if (msqptr->msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
612 		DPRINTF(("wrong sequence number\n"));
613 		error = EINVAL;
614 		goto done2;
615 	}
616 
617 	if ((error = ipcperm(td, &msqptr->msg_perm, IPC_W))) {
618 		DPRINTF(("requester doesn't have write access\n"));
619 		goto done2;
620 	}
621 
622 	segs_needed = (msgsz + msginfo.msgssz - 1) / msginfo.msgssz;
623 	DPRINTF(("msgsz=%d, msgssz=%d, segs_needed=%d\n", msgsz, msginfo.msgssz,
624 	    segs_needed));
625 	for (;;) {
626 		int need_more_resources = 0;
627 
628 		/*
629 		 * check msgsz
630 		 * (inside this loop in case msg_qbytes changes while we sleep)
631 		 */
632 
633 		if (msgsz > msqptr->msg_qbytes) {
634 			DPRINTF(("msgsz > msqptr->msg_qbytes\n"));
635 			error = EINVAL;
636 			goto done2;
637 		}
638 
639 		if (msqptr->msg_perm.mode & MSG_LOCKED) {
640 			DPRINTF(("msqid is locked\n"));
641 			need_more_resources = 1;
642 		}
643 		if (msgsz + msqptr->msg_cbytes > msqptr->msg_qbytes) {
644 			DPRINTF(("msgsz + msg_cbytes > msg_qbytes\n"));
645 			need_more_resources = 1;
646 		}
647 		if (segs_needed > nfree_msgmaps) {
648 			DPRINTF(("segs_needed > nfree_msgmaps\n"));
649 			need_more_resources = 1;
650 		}
651 		if (free_msghdrs == NULL) {
652 			DPRINTF(("no more msghdrs\n"));
653 			need_more_resources = 1;
654 		}
655 
656 		if (need_more_resources) {
657 			int we_own_it;
658 
659 			if ((msgflg & IPC_NOWAIT) != 0) {
660 				DPRINTF(("need more resources but caller "
661 				    "doesn't want to wait\n"));
662 				error = EAGAIN;
663 				goto done2;
664 			}
665 
666 			if ((msqptr->msg_perm.mode & MSG_LOCKED) != 0) {
667 				DPRINTF(("we don't own the msqid_ds\n"));
668 				we_own_it = 0;
669 			} else {
670 				/* Force later arrivals to wait for our
671 				   request */
672 				DPRINTF(("we own the msqid_ds\n"));
673 				msqptr->msg_perm.mode |= MSG_LOCKED;
674 				we_own_it = 1;
675 			}
676 			DPRINTF(("goodnight\n"));
677 			error = tsleep(msqptr, (PZERO - 4) | PCATCH,
678 			    "msgwait", 0);
679 			DPRINTF(("good morning, error=%d\n", error));
680 			if (we_own_it)
681 				msqptr->msg_perm.mode &= ~MSG_LOCKED;
682 			if (error != 0) {
683 				DPRINTF(("msgsnd:  interrupted system call\n"));
684 				error = EINTR;
685 				goto done2;
686 			}
687 
688 			/*
689 			 * Make sure that the msq queue still exists
690 			 */
691 
692 			if (msqptr->msg_qbytes == 0) {
693 				DPRINTF(("msqid deleted\n"));
694 				error = EIDRM;
695 				goto done2;
696 			}
697 
698 		} else {
699 			DPRINTF(("got all the resources that we need\n"));
700 			break;
701 		}
702 	}
703 
704 	/*
705 	 * We have the resources that we need.
706 	 * Make sure!
707 	 */
708 
709 	if (msqptr->msg_perm.mode & MSG_LOCKED)
710 		panic("msg_perm.mode & MSG_LOCKED");
711 	if (segs_needed > nfree_msgmaps)
712 		panic("segs_needed > nfree_msgmaps");
713 	if (msgsz + msqptr->msg_cbytes > msqptr->msg_qbytes)
714 		panic("msgsz + msg_cbytes > msg_qbytes");
715 	if (free_msghdrs == NULL)
716 		panic("no more msghdrs");
717 
718 	/*
719 	 * Re-lock the msqid_ds in case we page-fault when copying in the
720 	 * message
721 	 */
722 
723 	if ((msqptr->msg_perm.mode & MSG_LOCKED) != 0)
724 		panic("msqid_ds is already locked");
725 	msqptr->msg_perm.mode |= MSG_LOCKED;
726 
727 	/*
728 	 * Allocate a message header
729 	 */
730 
731 	msghdr = free_msghdrs;
732 	free_msghdrs = msghdr->msg_next;
733 	msghdr->msg_spot = -1;
734 	msghdr->msg_ts = msgsz;
735 
736 	/*
737 	 * Allocate space for the message
738 	 */
739 
740 	while (segs_needed > 0) {
741 		if (nfree_msgmaps <= 0)
742 			panic("not enough msgmaps");
743 		if (free_msgmaps == -1)
744 			panic("nil free_msgmaps");
745 		next = free_msgmaps;
746 		if (next <= -1)
747 			panic("next too low #1");
748 		if (next >= msginfo.msgseg)
749 			panic("next out of range #1");
750 		DPRINTF(("allocating segment %d to message\n", next));
751 		free_msgmaps = msgmaps[next].next;
752 		nfree_msgmaps--;
753 		msgmaps[next].next = msghdr->msg_spot;
754 		msghdr->msg_spot = next;
755 		segs_needed--;
756 	}
757 
758 	/*
759 	 * Copy in the message type
760 	 */
761 
762 	if ((error = copyin(user_msgp, &msghdr->msg_type,
763 	    sizeof(msghdr->msg_type))) != 0) {
764 		DPRINTF(("error %d copying the message type\n", error));
765 		msg_freehdr(msghdr);
766 		msqptr->msg_perm.mode &= ~MSG_LOCKED;
767 		wakeup(msqptr);
768 		goto done2;
769 	}
770 	user_msgp = (char *)user_msgp + sizeof(msghdr->msg_type);
771 
772 	/*
773 	 * Validate the message type
774 	 */
775 
776 	if (msghdr->msg_type < 1) {
777 		msg_freehdr(msghdr);
778 		msqptr->msg_perm.mode &= ~MSG_LOCKED;
779 		wakeup(msqptr);
780 		DPRINTF(("mtype (%d) < 1\n", msghdr->msg_type));
781 		error = EINVAL;
782 		goto done2;
783 	}
784 
785 	/*
786 	 * Copy in the message body
787 	 */
788 
789 	next = msghdr->msg_spot;
790 	while (msgsz > 0) {
791 		size_t tlen;
792 		if (msgsz > msginfo.msgssz)
793 			tlen = msginfo.msgssz;
794 		else
795 			tlen = msgsz;
796 		if (next <= -1)
797 			panic("next too low #2");
798 		if (next >= msginfo.msgseg)
799 			panic("next out of range #2");
800 		if ((error = copyin(user_msgp, &msgpool[next * msginfo.msgssz],
801 		    tlen)) != 0) {
802 			DPRINTF(("error %d copying in message segment\n",
803 			    error));
804 			msg_freehdr(msghdr);
805 			msqptr->msg_perm.mode &= ~MSG_LOCKED;
806 			wakeup(msqptr);
807 			goto done2;
808 		}
809 		msgsz -= tlen;
810 		user_msgp = (char *)user_msgp + tlen;
811 		next = msgmaps[next].next;
812 	}
813 	if (next != -1)
814 		panic("didn't use all the msg segments");
815 
816 	/*
817 	 * We've got the message.  Unlock the msqid_ds.
818 	 */
819 
820 	msqptr->msg_perm.mode &= ~MSG_LOCKED;
821 
822 	/*
823 	 * Make sure that the msqid_ds is still allocated.
824 	 */
825 
826 	if (msqptr->msg_qbytes == 0) {
827 		msg_freehdr(msghdr);
828 		wakeup(msqptr);
829 		error = EIDRM;
830 		goto done2;
831 	}
832 
833 	/*
834 	 * Put the message into the queue
835 	 */
836 
837 	if (msqptr->msg_first == NULL) {
838 		msqptr->msg_first = msghdr;
839 		msqptr->msg_last = msghdr;
840 	} else {
841 		msqptr->msg_last->msg_next = msghdr;
842 		msqptr->msg_last = msghdr;
843 	}
844 	msqptr->msg_last->msg_next = NULL;
845 
846 	msqptr->msg_cbytes += msghdr->msg_ts;
847 	msqptr->msg_qnum++;
848 	msqptr->msg_lspid = td->td_proc->p_pid;
849 	msqptr->msg_stime = time_second;
850 
851 	wakeup(msqptr);
852 	td->td_retval[0] = 0;
853 done2:
854 	mtx_unlock(&Giant);
855 	return (error);
856 }
857 
858 #ifndef _SYS_SYSPROTO_H_
859 struct msgrcv_args {
860 	int	msqid;
861 	void	*msgp;
862 	size_t	msgsz;
863 	long	msgtyp;
864 	int	msgflg;
865 };
866 #endif
867 
868 /*
869  * MPSAFE
870  */
871 int
872 msgrcv(td, uap)
873 	struct thread *td;
874 	register struct msgrcv_args *uap;
875 {
876 	int msqid = uap->msqid;
877 	void *user_msgp = uap->msgp;
878 	size_t msgsz = uap->msgsz;
879 	long msgtyp = uap->msgtyp;
880 	int msgflg = uap->msgflg;
881 	size_t len;
882 	register struct msqid_ds *msqptr;
883 	register struct msg *msghdr;
884 	int error = 0;
885 	short next;
886 
887 	DPRINTF(("call to msgrcv(%d, 0x%x, %d, %ld, %d)\n", msqid, user_msgp,
888 	    msgsz, msgtyp, msgflg));
889 
890 	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
891 		return (ENOSYS);
892 
893 	mtx_lock(&Giant);
894 	msqid = IPCID_TO_IX(msqid);
895 
896 	if (msqid < 0 || msqid >= msginfo.msgmni) {
897 		DPRINTF(("msqid (%d) out of range (0<=msqid<%d)\n", msqid,
898 		    msginfo.msgmni));
899 		error = EINVAL;
900 		goto done2;
901 	}
902 
903 	msqptr = &msqids[msqid];
904 	if (msqptr->msg_qbytes == 0) {
905 		DPRINTF(("no such message queue id\n"));
906 		error = EINVAL;
907 		goto done2;
908 	}
909 	if (msqptr->msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
910 		DPRINTF(("wrong sequence number\n"));
911 		error = EINVAL;
912 		goto done2;
913 	}
914 
915 	if ((error = ipcperm(td, &msqptr->msg_perm, IPC_R))) {
916 		DPRINTF(("requester doesn't have read access\n"));
917 		goto done2;
918 	}
919 
920 	msghdr = NULL;
921 	while (msghdr == NULL) {
922 		if (msgtyp == 0) {
923 			msghdr = msqptr->msg_first;
924 			if (msghdr != NULL) {
925 				if (msgsz < msghdr->msg_ts &&
926 				    (msgflg & MSG_NOERROR) == 0) {
927 					DPRINTF(("first message on the queue "
928 					    "is too big (want %d, got %d)\n",
929 					    msgsz, msghdr->msg_ts));
930 					error = E2BIG;
931 					goto done2;
932 				}
933 				if (msqptr->msg_first == msqptr->msg_last) {
934 					msqptr->msg_first = NULL;
935 					msqptr->msg_last = NULL;
936 				} else {
937 					msqptr->msg_first = msghdr->msg_next;
938 					if (msqptr->msg_first == NULL)
939 						panic("msg_first/last screwed up #1");
940 				}
941 			}
942 		} else {
943 			struct msg *previous;
944 			struct msg **prev;
945 
946 			previous = NULL;
947 			prev = &(msqptr->msg_first);
948 			while ((msghdr = *prev) != NULL) {
949 				/*
950 				 * Is this message's type an exact match or is
951 				 * this message's type less than or equal to
952 				 * the absolute value of a negative msgtyp?
953 				 * Note that the second half of this test can
954 				 * NEVER be true if msgtyp is positive since
955 				 * msg_type is always positive!
956 				 */
957 
958 				if (msgtyp == msghdr->msg_type ||
959 				    msghdr->msg_type <= -msgtyp) {
960 					DPRINTF(("found message type %d, "
961 					    "requested %d\n",
962 					    msghdr->msg_type, msgtyp));
963 					if (msgsz < msghdr->msg_ts &&
964 					    (msgflg & MSG_NOERROR) == 0) {
965 						DPRINTF(("requested message "
966 						    "on the queue is too big "
967 						    "(want %d, got %d)\n",
968 						    msgsz, msghdr->msg_ts));
969 						error = E2BIG;
970 						goto done2;
971 					}
972 					*prev = msghdr->msg_next;
973 					if (msghdr == msqptr->msg_last) {
974 						if (previous == NULL) {
975 							if (prev !=
976 							    &msqptr->msg_first)
977 								panic("msg_first/last screwed up #2");
978 							msqptr->msg_first =
979 							    NULL;
980 							msqptr->msg_last =
981 							    NULL;
982 						} else {
983 							if (prev ==
984 							    &msqptr->msg_first)
985 								panic("msg_first/last screwed up #3");
986 							msqptr->msg_last =
987 							    previous;
988 						}
989 					}
990 					break;
991 				}
992 				previous = msghdr;
993 				prev = &(msghdr->msg_next);
994 			}
995 		}
996 
997 		/*
998 		 * We've either extracted the msghdr for the appropriate
999 		 * message or there isn't one.
1000 		 * If there is one then bail out of this loop.
1001 		 */
1002 
1003 		if (msghdr != NULL)
1004 			break;
1005 
1006 		/*
1007 		 * Hmph!  No message found.  Does the user want to wait?
1008 		 */
1009 
1010 		if ((msgflg & IPC_NOWAIT) != 0) {
1011 			DPRINTF(("no appropriate message found (msgtyp=%d)\n",
1012 			    msgtyp));
1013 			/* The SVID says to return ENOMSG. */
1014 			error = ENOMSG;
1015 			goto done2;
1016 		}
1017 
1018 		/*
1019 		 * Wait for something to happen
1020 		 */
1021 
1022 		DPRINTF(("msgrcv:  goodnight\n"));
1023 		error = tsleep(msqptr, (PZERO - 4) | PCATCH, "msgwait", 0);
1024 		DPRINTF(("msgrcv:  good morning (error=%d)\n", error));
1025 
1026 		if (error != 0) {
1027 			DPRINTF(("msgsnd:  interrupted system call\n"));
1028 			error = EINTR;
1029 			goto done2;
1030 		}
1031 
1032 		/*
1033 		 * Make sure that the msq queue still exists
1034 		 */
1035 
1036 		if (msqptr->msg_qbytes == 0 ||
1037 		    msqptr->msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
1038 			DPRINTF(("msqid deleted\n"));
1039 			error = EIDRM;
1040 			goto done2;
1041 		}
1042 	}
1043 
1044 	/*
1045 	 * Return the message to the user.
1046 	 *
1047 	 * First, do the bookkeeping (before we risk being interrupted).
1048 	 */
1049 
1050 	msqptr->msg_cbytes -= msghdr->msg_ts;
1051 	msqptr->msg_qnum--;
1052 	msqptr->msg_lrpid = td->td_proc->p_pid;
1053 	msqptr->msg_rtime = time_second;
1054 
1055 	/*
1056 	 * Make msgsz the actual amount that we'll be returning.
1057 	 * Note that this effectively truncates the message if it is too long
1058 	 * (since msgsz is never increased).
1059 	 */
1060 
1061 	DPRINTF(("found a message, msgsz=%d, msg_ts=%d\n", msgsz,
1062 	    msghdr->msg_ts));
1063 	if (msgsz > msghdr->msg_ts)
1064 		msgsz = msghdr->msg_ts;
1065 
1066 	/*
1067 	 * Return the type to the user.
1068 	 */
1069 
1070 	error = copyout(&(msghdr->msg_type), user_msgp,
1071 	    sizeof(msghdr->msg_type));
1072 	if (error != 0) {
1073 		DPRINTF(("error (%d) copying out message type\n", error));
1074 		msg_freehdr(msghdr);
1075 		wakeup(msqptr);
1076 		goto done2;
1077 	}
1078 	user_msgp = (char *)user_msgp + sizeof(msghdr->msg_type);
1079 
1080 	/*
1081 	 * Return the segments to the user
1082 	 */
1083 
1084 	next = msghdr->msg_spot;
1085 	for (len = 0; len < msgsz; len += msginfo.msgssz) {
1086 		size_t tlen;
1087 
1088 		if (msgsz - len > msginfo.msgssz)
1089 			tlen = msginfo.msgssz;
1090 		else
1091 			tlen = msgsz - len;
1092 		if (next <= -1)
1093 			panic("next too low #3");
1094 		if (next >= msginfo.msgseg)
1095 			panic("next out of range #3");
1096 		error = copyout(&msgpool[next * msginfo.msgssz],
1097 		    user_msgp, tlen);
1098 		if (error != 0) {
1099 			DPRINTF(("error (%d) copying out message segment\n",
1100 			    error));
1101 			msg_freehdr(msghdr);
1102 			wakeup(msqptr);
1103 			goto done2;
1104 		}
1105 		user_msgp = (char *)user_msgp + tlen;
1106 		next = msgmaps[next].next;
1107 	}
1108 
1109 	/*
1110 	 * Done, return the actual number of bytes copied out.
1111 	 */
1112 
1113 	msg_freehdr(msghdr);
1114 	wakeup(msqptr);
1115 	td->td_retval[0] = msgsz;
1116 done2:
1117 	mtx_unlock(&Giant);
1118 	return (error);
1119 }
1120 
1121 static int
1122 sysctl_msqids(SYSCTL_HANDLER_ARGS)
1123 {
1124 
1125 	return (SYSCTL_OUT(req, msqids,
1126 	    sizeof(struct msqid_ds) * msginfo.msgmni));
1127 }
1128 
1129 SYSCTL_DECL(_kern_ipc);
1130 SYSCTL_INT(_kern_ipc, OID_AUTO, msgmax, CTLFLAG_RD, &msginfo.msgmax, 0, "");
1131 SYSCTL_INT(_kern_ipc, OID_AUTO, msgmni, CTLFLAG_RD, &msginfo.msgmni, 0, "");
1132 SYSCTL_INT(_kern_ipc, OID_AUTO, msgmnb, CTLFLAG_RD, &msginfo.msgmnb, 0, "");
1133 SYSCTL_INT(_kern_ipc, OID_AUTO, msgtql, CTLFLAG_RD, &msginfo.msgtql, 0, "");
1134 SYSCTL_INT(_kern_ipc, OID_AUTO, msgssz, CTLFLAG_RD, &msginfo.msgssz, 0, "");
1135 SYSCTL_INT(_kern_ipc, OID_AUTO, msgseg, CTLFLAG_RD, &msginfo.msgseg, 0, "");
1136 SYSCTL_PROC(_kern_ipc, OID_AUTO, msqids, CTLFLAG_RD,
1137     NULL, 0, sysctl_msqids, "", "Message queue IDs");
1138