xref: /freebsd/sys/kern/sysv_msg.c (revision 2546665afcaf0d53dc2c7058fee96354b3680f5a)
1 /*
2  * Implementation of SVID messages
3  *
4  * Author:  Daniel Boulet
5  *
6  * Copyright 1993 Daniel Boulet and RTMX Inc.
7  *
8  * This system call was implemented by Daniel Boulet under contract from RTMX.
9  *
10  * Redistribution and use in source forms, with and without modification,
11  * are permitted provided that this entire comment appears intact.
12  *
13  * Redistribution in binary form may occur without any restrictions.
14  * Obviously, it would be nice if you gave credit where credit is due
15  * but requiring it would be too onerous.
16  *
17  * This software is provided ``AS IS'' without any warranties of any kind.
18  */
19 
20 #include <sys/cdefs.h>
21 __FBSDID("$FreeBSD$");
22 
23 #include "opt_sysvipc.h"
24 
25 #include <sys/param.h>
26 #include <sys/systm.h>
27 #include <sys/sysproto.h>
28 #include <sys/kernel.h>
29 #include <sys/proc.h>
30 #include <sys/lock.h>
31 #include <sys/mutex.h>
32 #include <sys/module.h>
33 #include <sys/msg.h>
34 #include <sys/syscall.h>
35 #include <sys/sysent.h>
36 #include <sys/sysctl.h>
37 #include <sys/malloc.h>
38 #include <sys/jail.h>
39 
40 static MALLOC_DEFINE(M_MSG, "msg", "SVID compatible message queues");
41 
42 static void msginit(void);
43 static int msgunload(void);
44 static int sysvmsg_modload(struct module *, int, void *);
45 
46 #ifdef MSG_DEBUG
47 #define DPRINTF(a)	printf a
48 #else
49 #define DPRINTF(a)
50 #endif
51 
52 static void msg_freehdr(struct msg *msghdr);
53 
54 /* XXX casting to (sy_call_t *) is bogus, as usual. */
55 static sy_call_t *msgcalls[] = {
56 	(sy_call_t *)msgctl, (sy_call_t *)msgget,
57 	(sy_call_t *)msgsnd, (sy_call_t *)msgrcv
58 };
59 
60 struct msg {
61 	struct	msg *msg_next;	/* next msg in the chain */
62 	long	msg_type;	/* type of this message */
63     				/* >0 -> type of this message */
64     				/* 0 -> free header */
65 	u_short	msg_ts;		/* size of this message */
66 	short	msg_spot;	/* location of start of msg in buffer */
67 };
68 
69 
70 #ifndef MSGSSZ
71 #define MSGSSZ	8		/* Each segment must be 2^N long */
72 #endif
73 #ifndef MSGSEG
74 #define MSGSEG	2048		/* must be less than 32767 */
75 #endif
76 #define MSGMAX	(MSGSSZ*MSGSEG)
77 #ifndef MSGMNB
78 #define MSGMNB	2048		/* max # of bytes in a queue */
79 #endif
80 #ifndef MSGMNI
81 #define MSGMNI	40
82 #endif
83 #ifndef MSGTQL
84 #define MSGTQL	40
85 #endif
86 
87 /*
88  * Based on the configuration parameters described in an SVR2 (yes, two)
89  * config(1m) man page.
90  *
91  * Each message is broken up and stored in segments that are msgssz bytes
92  * long.  For efficiency reasons, this should be a power of two.  Also,
93  * it doesn't make sense if it is less than 8 or greater than about 256.
94  * Consequently, msginit in kern/sysv_msg.c checks that msgssz is a power of
95  * two between 8 and 1024 inclusive (and panic's if it isn't).
96  */
97 struct msginfo msginfo = {
98                 MSGMAX,         /* max chars in a message */
99                 MSGMNI,         /* # of message queue identifiers */
100                 MSGMNB,         /* max chars in a queue */
101                 MSGTQL,         /* max messages in system */
102                 MSGSSZ,         /* size of a message segment */
103                 		/* (must be small power of 2 greater than 4) */
104                 MSGSEG          /* number of message segments */
105 };
106 
107 /*
108  * macros to convert between msqid_ds's and msqid's.
109  * (specific to this implementation)
110  */
111 #define MSQID(ix,ds)	((ix) & 0xffff | (((ds).msg_perm.seq << 16) & 0xffff0000))
112 #define MSQID_IX(id)	((id) & 0xffff)
113 #define MSQID_SEQ(id)	(((id) >> 16) & 0xffff)
114 
115 /*
116  * The rest of this file is specific to this particular implementation.
117  */
118 
119 struct msgmap {
120 	short	next;		/* next segment in buffer */
121     				/* -1 -> available */
122     				/* 0..(MSGSEG-1) -> index of next segment */
123 };
124 
125 #define MSG_LOCKED	01000	/* Is this msqid_ds locked? */
126 
127 static int nfree_msgmaps;	/* # of free map entries */
128 static short free_msgmaps;	/* head of linked list of free map entries */
129 static struct msg *free_msghdrs;/* list of free msg headers */
130 static char *msgpool;		/* MSGMAX byte long msg buffer pool */
131 static struct msgmap *msgmaps;	/* MSGSEG msgmap structures */
132 static struct msg *msghdrs;	/* MSGTQL msg headers */
133 static struct msqid_ds *msqids;	/* MSGMNI msqid_ds struct's */
134 static struct mtx msq_mtx;	/* global mutex for message queues. */
135 
136 static void
137 msginit()
138 {
139 	register int i;
140 
141 	TUNABLE_INT_FETCH("kern.ipc.msgseg", &msginfo.msgseg);
142 	TUNABLE_INT_FETCH("kern.ipc.msgssz", &msginfo.msgssz);
143 	msginfo.msgmax = msginfo.msgseg * msginfo.msgssz;
144 	TUNABLE_INT_FETCH("kern.ipc.msgmni", &msginfo.msgmni);
145 
146 	msgpool = malloc(msginfo.msgmax, M_MSG, M_WAITOK);
147 	if (msgpool == NULL)
148 		panic("msgpool is NULL");
149 	msgmaps = malloc(sizeof(struct msgmap) * msginfo.msgseg, M_MSG, M_WAITOK);
150 	if (msgmaps == NULL)
151 		panic("msgmaps is NULL");
152 	msghdrs = malloc(sizeof(struct msg) * msginfo.msgtql, M_MSG, M_WAITOK);
153 	if (msghdrs == NULL)
154 		panic("msghdrs is NULL");
155 	msqids = malloc(sizeof(struct msqid_ds) * msginfo.msgmni, M_MSG, M_WAITOK);
156 	if (msqids == NULL)
157 		panic("msqids is NULL");
158 
159 	/*
160 	 * msginfo.msgssz should be a power of two for efficiency reasons.
161 	 * It is also pretty silly if msginfo.msgssz is less than 8
162 	 * or greater than about 256 so ...
163 	 */
164 
165 	i = 8;
166 	while (i < 1024 && i != msginfo.msgssz)
167 		i <<= 1;
168     	if (i != msginfo.msgssz) {
169 		DPRINTF(("msginfo.msgssz=%d (0x%x)\n", msginfo.msgssz,
170 		    msginfo.msgssz));
171 		panic("msginfo.msgssz not a small power of 2");
172 	}
173 
174 	if (msginfo.msgseg > 32767) {
175 		DPRINTF(("msginfo.msgseg=%d\n", msginfo.msgseg));
176 		panic("msginfo.msgseg > 32767");
177 	}
178 
179 	if (msgmaps == NULL)
180 		panic("msgmaps is NULL");
181 
182 	for (i = 0; i < msginfo.msgseg; i++) {
183 		if (i > 0)
184 			msgmaps[i-1].next = i;
185 		msgmaps[i].next = -1;	/* implies entry is available */
186 	}
187 	free_msgmaps = 0;
188 	nfree_msgmaps = msginfo.msgseg;
189 
190 	if (msghdrs == NULL)
191 		panic("msghdrs is NULL");
192 
193 	for (i = 0; i < msginfo.msgtql; i++) {
194 		msghdrs[i].msg_type = 0;
195 		if (i > 0)
196 			msghdrs[i-1].msg_next = &msghdrs[i];
197 		msghdrs[i].msg_next = NULL;
198     	}
199 	free_msghdrs = &msghdrs[0];
200 
201 	if (msqids == NULL)
202 		panic("msqids is NULL");
203 
204 	for (i = 0; i < msginfo.msgmni; i++) {
205 		msqids[i].msg_qbytes = 0;	/* implies entry is available */
206 		msqids[i].msg_perm.seq = 0;	/* reset to a known value */
207 		msqids[i].msg_perm.mode = 0;
208 	}
209 	mtx_init(&msq_mtx, "msq", NULL, MTX_DEF);
210 }
211 
212 static int
213 msgunload()
214 {
215 	struct msqid_ds *msqptr;
216 	int msqid;
217 
218 	for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
219 		/*
220 		 * Look for an unallocated and unlocked msqid_ds.
221 		 * msqid_ds's can be locked by msgsnd or msgrcv while
222 		 * they are copying the message in/out.  We can't
223 		 * re-use the entry until they release it.
224 		 */
225 		msqptr = &msqids[msqid];
226 		if (msqptr->msg_qbytes != 0 ||
227 		    (msqptr->msg_perm.mode & MSG_LOCKED) != 0)
228 			break;
229 	}
230 	if (msqid != msginfo.msgmni)
231 		return (EBUSY);
232 
233 	free(msgpool, M_MSG);
234 	free(msgmaps, M_MSG);
235 	free(msghdrs, M_MSG);
236 	free(msqids, M_MSG);
237 	mtx_destroy(&msq_mtx);
238 	return (0);
239 }
240 
241 
242 static int
243 sysvmsg_modload(struct module *module, int cmd, void *arg)
244 {
245 	int error = 0;
246 
247 	switch (cmd) {
248 	case MOD_LOAD:
249 		msginit();
250 		break;
251 	case MOD_UNLOAD:
252 		error = msgunload();
253 		break;
254 	case MOD_SHUTDOWN:
255 		break;
256 	default:
257 		error = EINVAL;
258 		break;
259 	}
260 	return (error);
261 }
262 
263 static moduledata_t sysvmsg_mod = {
264 	"sysvmsg",
265 	&sysvmsg_modload,
266 	NULL
267 };
268 
269 SYSCALL_MODULE_HELPER(msgsys);
270 SYSCALL_MODULE_HELPER(msgctl);
271 SYSCALL_MODULE_HELPER(msgget);
272 SYSCALL_MODULE_HELPER(msgsnd);
273 SYSCALL_MODULE_HELPER(msgrcv);
274 
275 DECLARE_MODULE(sysvmsg, sysvmsg_mod,
276 	SI_SUB_SYSV_MSG, SI_ORDER_FIRST);
277 MODULE_VERSION(sysvmsg, 1);
278 
279 /*
280  * Entry point for all MSG calls
281  *
282  * MPSAFE
283  */
284 int
285 msgsys(td, uap)
286 	struct thread *td;
287 	/* XXX actually varargs. */
288 	struct msgsys_args /* {
289 		int	which;
290 		int	a2;
291 		int	a3;
292 		int	a4;
293 		int	a5;
294 		int	a6;
295 	} */ *uap;
296 {
297 	int error;
298 
299 	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
300 		return (ENOSYS);
301 	if (uap->which < 0 ||
302 	    uap->which >= sizeof(msgcalls)/sizeof(msgcalls[0]))
303 		return (EINVAL);
304 	error = (*msgcalls[uap->which])(td, &uap->a2);
305 	return (error);
306 }
307 
308 static void
309 msg_freehdr(msghdr)
310 	struct msg *msghdr;
311 {
312 	while (msghdr->msg_ts > 0) {
313 		short next;
314 		if (msghdr->msg_spot < 0 || msghdr->msg_spot >= msginfo.msgseg)
315 			panic("msghdr->msg_spot out of range");
316 		next = msgmaps[msghdr->msg_spot].next;
317 		msgmaps[msghdr->msg_spot].next = free_msgmaps;
318 		free_msgmaps = msghdr->msg_spot;
319 		nfree_msgmaps++;
320 		msghdr->msg_spot = next;
321 		if (msghdr->msg_ts >= msginfo.msgssz)
322 			msghdr->msg_ts -= msginfo.msgssz;
323 		else
324 			msghdr->msg_ts = 0;
325 	}
326 	if (msghdr->msg_spot != -1)
327 		panic("msghdr->msg_spot != -1");
328 	msghdr->msg_next = free_msghdrs;
329 	free_msghdrs = msghdr;
330 }
331 
332 #ifndef _SYS_SYSPROTO_H_
333 struct msgctl_args {
334 	int	msqid;
335 	int	cmd;
336 	struct	msqid_ds *buf;
337 };
338 #endif
339 
340 /*
341  * MPSAFE
342  */
343 int
344 msgctl(td, uap)
345 	struct thread *td;
346 	register struct msgctl_args *uap;
347 {
348 	int msqid = uap->msqid;
349 	int cmd = uap->cmd;
350 	struct msqid_ds *user_msqptr = uap->buf;
351 	int rval, error;
352 	struct msqid_ds msqbuf;
353 	register struct msqid_ds *msqptr;
354 
355 	DPRINTF(("call to msgctl(%d, %d, 0x%x)\n", msqid, cmd, user_msqptr));
356 	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
357 		return (ENOSYS);
358 
359 	msqid = IPCID_TO_IX(msqid);
360 
361 	if (msqid < 0 || msqid >= msginfo.msgmni) {
362 		DPRINTF(("msqid (%d) out of range (0<=msqid<%d)\n", msqid,
363 		    msginfo.msgmni));
364 		return (EINVAL);
365 	}
366 	if (cmd == IPC_SET &&
367 	    (error = copyin(user_msqptr, &msqbuf, sizeof(msqbuf))) != 0)
368 		return (error);
369 
370 	msqptr = &msqids[msqid];
371 
372 	mtx_lock(&msq_mtx);
373 	if (msqptr->msg_qbytes == 0) {
374 		DPRINTF(("no such msqid\n"));
375 		error = EINVAL;
376 		goto done2;
377 	}
378 	if (msqptr->msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
379 		DPRINTF(("wrong sequence number\n"));
380 		error = EINVAL;
381 		goto done2;
382 	}
383 
384 	error = 0;
385 	rval = 0;
386 
387 	switch (cmd) {
388 
389 	case IPC_RMID:
390 	{
391 		struct msg *msghdr;
392 		if ((error = ipcperm(td, &msqptr->msg_perm, IPC_M)))
393 			goto done2;
394 		/* Free the message headers */
395 		msghdr = msqptr->msg_first;
396 		while (msghdr != NULL) {
397 			struct msg *msghdr_tmp;
398 
399 			/* Free the segments of each message */
400 			msqptr->msg_cbytes -= msghdr->msg_ts;
401 			msqptr->msg_qnum--;
402 			msghdr_tmp = msghdr;
403 			msghdr = msghdr->msg_next;
404 			msg_freehdr(msghdr_tmp);
405 		}
406 
407 		if (msqptr->msg_cbytes != 0)
408 			panic("msg_cbytes is screwed up");
409 		if (msqptr->msg_qnum != 0)
410 			panic("msg_qnum is screwed up");
411 
412 		msqptr->msg_qbytes = 0;	/* Mark it as free */
413 
414 		wakeup(msqptr);
415 	}
416 
417 		break;
418 
419 	case IPC_SET:
420 		if ((error = ipcperm(td, &msqptr->msg_perm, IPC_M)))
421 			goto done2;
422 		if (msqbuf.msg_qbytes > msqptr->msg_qbytes) {
423 			error = suser(td);
424 			if (error)
425 				goto done2;
426 		}
427 		if (msqbuf.msg_qbytes > msginfo.msgmnb) {
428 			DPRINTF(("can't increase msg_qbytes beyond %d"
429 			    "(truncating)\n", msginfo.msgmnb));
430 			msqbuf.msg_qbytes = msginfo.msgmnb;	/* silently restrict qbytes to system limit */
431 		}
432 		if (msqbuf.msg_qbytes == 0) {
433 			DPRINTF(("can't reduce msg_qbytes to 0\n"));
434 			error = EINVAL;		/* non-standard errno! */
435 			goto done2;
436 		}
437 		msqptr->msg_perm.uid = msqbuf.msg_perm.uid;	/* change the owner */
438 		msqptr->msg_perm.gid = msqbuf.msg_perm.gid;	/* change the owner */
439 		msqptr->msg_perm.mode = (msqptr->msg_perm.mode & ~0777) |
440 		    (msqbuf.msg_perm.mode & 0777);
441 		msqptr->msg_qbytes = msqbuf.msg_qbytes;
442 		msqptr->msg_ctime = time_second;
443 		break;
444 
445 	case IPC_STAT:
446 		if ((error = ipcperm(td, &msqptr->msg_perm, IPC_R))) {
447 			DPRINTF(("requester doesn't have read access\n"));
448 			goto done2;
449 		}
450 		break;
451 
452 	default:
453 		DPRINTF(("invalid command %d\n", cmd));
454 		error = EINVAL;
455 		goto done2;
456 	}
457 
458 	if (error == 0)
459 		td->td_retval[0] = rval;
460 done2:
461 	mtx_unlock(&msq_mtx);
462 	if (cmd == IPC_STAT && error == 0)
463 		error = copyout(msqptr, user_msqptr, sizeof(struct msqid_ds));
464 	return(error);
465 }
466 
467 #ifndef _SYS_SYSPROTO_H_
468 struct msgget_args {
469 	key_t	key;
470 	int	msgflg;
471 };
472 #endif
473 
474 /*
475  * MPSAFE
476  */
477 int
478 msgget(td, uap)
479 	struct thread *td;
480 	register struct msgget_args *uap;
481 {
482 	int msqid, error = 0;
483 	int key = uap->key;
484 	int msgflg = uap->msgflg;
485 	struct ucred *cred = td->td_ucred;
486 	register struct msqid_ds *msqptr = NULL;
487 
488 	DPRINTF(("msgget(0x%x, 0%o)\n", key, msgflg));
489 
490 	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
491 		return (ENOSYS);
492 
493 	mtx_lock(&msq_mtx);
494 	if (key != IPC_PRIVATE) {
495 		for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
496 			msqptr = &msqids[msqid];
497 			if (msqptr->msg_qbytes != 0 &&
498 			    msqptr->msg_perm.key == key)
499 				break;
500 		}
501 		if (msqid < msginfo.msgmni) {
502 			DPRINTF(("found public key\n"));
503 			if ((msgflg & IPC_CREAT) && (msgflg & IPC_EXCL)) {
504 				DPRINTF(("not exclusive\n"));
505 				error = EEXIST;
506 				goto done2;
507 			}
508 			if ((error = ipcperm(td, &msqptr->msg_perm, msgflg & 0700))) {
509 				DPRINTF(("requester doesn't have 0%o access\n",
510 				    msgflg & 0700));
511 				goto done2;
512 			}
513 			goto found;
514 		}
515 	}
516 
517 	DPRINTF(("need to allocate the msqid_ds\n"));
518 	if (key == IPC_PRIVATE || (msgflg & IPC_CREAT)) {
519 		for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
520 			/*
521 			 * Look for an unallocated and unlocked msqid_ds.
522 			 * msqid_ds's can be locked by msgsnd or msgrcv while
523 			 * they are copying the message in/out.  We can't
524 			 * re-use the entry until they release it.
525 			 */
526 			msqptr = &msqids[msqid];
527 			if (msqptr->msg_qbytes == 0 &&
528 			    (msqptr->msg_perm.mode & MSG_LOCKED) == 0)
529 				break;
530 		}
531 		if (msqid == msginfo.msgmni) {
532 			DPRINTF(("no more msqid_ds's available\n"));
533 			error = ENOSPC;
534 			goto done2;
535 		}
536 		DPRINTF(("msqid %d is available\n", msqid));
537 		msqptr->msg_perm.key = key;
538 		msqptr->msg_perm.cuid = cred->cr_uid;
539 		msqptr->msg_perm.uid = cred->cr_uid;
540 		msqptr->msg_perm.cgid = cred->cr_gid;
541 		msqptr->msg_perm.gid = cred->cr_gid;
542 		msqptr->msg_perm.mode = (msgflg & 0777);
543 		/* Make sure that the returned msqid is unique */
544 		msqptr->msg_perm.seq = (msqptr->msg_perm.seq + 1) & 0x7fff;
545 		msqptr->msg_first = NULL;
546 		msqptr->msg_last = NULL;
547 		msqptr->msg_cbytes = 0;
548 		msqptr->msg_qnum = 0;
549 		msqptr->msg_qbytes = msginfo.msgmnb;
550 		msqptr->msg_lspid = 0;
551 		msqptr->msg_lrpid = 0;
552 		msqptr->msg_stime = 0;
553 		msqptr->msg_rtime = 0;
554 		msqptr->msg_ctime = time_second;
555 	} else {
556 		DPRINTF(("didn't find it and wasn't asked to create it\n"));
557 		error = ENOENT;
558 		goto done2;
559 	}
560 
561 found:
562 	/* Construct the unique msqid */
563 	td->td_retval[0] = IXSEQ_TO_IPCID(msqid, msqptr->msg_perm);
564 done2:
565 	mtx_unlock(&msq_mtx);
566 	return (error);
567 }
568 
569 #ifndef _SYS_SYSPROTO_H_
570 struct msgsnd_args {
571 	int	msqid;
572 	const void	*msgp;
573 	size_t	msgsz;
574 	int	msgflg;
575 };
576 #endif
577 
578 /*
579  * MPSAFE
580  */
581 int
582 msgsnd(td, uap)
583 	struct thread *td;
584 	register struct msgsnd_args *uap;
585 {
586 	int msqid = uap->msqid;
587 	const void *user_msgp = uap->msgp;
588 	size_t msgsz = uap->msgsz;
589 	int msgflg = uap->msgflg;
590 	int segs_needed, error = 0;
591 	register struct msqid_ds *msqptr;
592 	register struct msg *msghdr;
593 	short next;
594 
595 	DPRINTF(("call to msgsnd(%d, 0x%x, %d, %d)\n", msqid, user_msgp, msgsz,
596 	    msgflg));
597 	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
598 		return (ENOSYS);
599 
600 	mtx_lock(&msq_mtx);
601 	msqid = IPCID_TO_IX(msqid);
602 
603 	if (msqid < 0 || msqid >= msginfo.msgmni) {
604 		DPRINTF(("msqid (%d) out of range (0<=msqid<%d)\n", msqid,
605 		    msginfo.msgmni));
606 		error = EINVAL;
607 		goto done2;
608 	}
609 
610 	msqptr = &msqids[msqid];
611 	if (msqptr->msg_qbytes == 0) {
612 		DPRINTF(("no such message queue id\n"));
613 		error = EINVAL;
614 		goto done2;
615 	}
616 	if (msqptr->msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
617 		DPRINTF(("wrong sequence number\n"));
618 		error = EINVAL;
619 		goto done2;
620 	}
621 
622 	if ((error = ipcperm(td, &msqptr->msg_perm, IPC_W))) {
623 		DPRINTF(("requester doesn't have write access\n"));
624 		goto done2;
625 	}
626 
627 	segs_needed = (msgsz + msginfo.msgssz - 1) / msginfo.msgssz;
628 	DPRINTF(("msgsz=%d, msgssz=%d, segs_needed=%d\n", msgsz, msginfo.msgssz,
629 	    segs_needed));
630 	for (;;) {
631 		int need_more_resources = 0;
632 
633 		/*
634 		 * check msgsz
635 		 * (inside this loop in case msg_qbytes changes while we sleep)
636 		 */
637 
638 		if (msgsz > msqptr->msg_qbytes) {
639 			DPRINTF(("msgsz > msqptr->msg_qbytes\n"));
640 			error = EINVAL;
641 			goto done2;
642 		}
643 
644 		if (msqptr->msg_perm.mode & MSG_LOCKED) {
645 			DPRINTF(("msqid is locked\n"));
646 			need_more_resources = 1;
647 		}
648 		if (msgsz + msqptr->msg_cbytes > msqptr->msg_qbytes) {
649 			DPRINTF(("msgsz + msg_cbytes > msg_qbytes\n"));
650 			need_more_resources = 1;
651 		}
652 		if (segs_needed > nfree_msgmaps) {
653 			DPRINTF(("segs_needed > nfree_msgmaps\n"));
654 			need_more_resources = 1;
655 		}
656 		if (free_msghdrs == NULL) {
657 			DPRINTF(("no more msghdrs\n"));
658 			need_more_resources = 1;
659 		}
660 
661 		if (need_more_resources) {
662 			int we_own_it;
663 
664 			if ((msgflg & IPC_NOWAIT) != 0) {
665 				DPRINTF(("need more resources but caller "
666 				    "doesn't want to wait\n"));
667 				error = EAGAIN;
668 				goto done2;
669 			}
670 
671 			if ((msqptr->msg_perm.mode & MSG_LOCKED) != 0) {
672 				DPRINTF(("we don't own the msqid_ds\n"));
673 				we_own_it = 0;
674 			} else {
675 				/* Force later arrivals to wait for our
676 				   request */
677 				DPRINTF(("we own the msqid_ds\n"));
678 				msqptr->msg_perm.mode |= MSG_LOCKED;
679 				we_own_it = 1;
680 			}
681 			DPRINTF(("goodnight\n"));
682 			error = msleep(msqptr, &msq_mtx, (PZERO - 4) | PCATCH,
683 			    "msgwait", 0);
684 			DPRINTF(("good morning, error=%d\n", error));
685 			if (we_own_it)
686 				msqptr->msg_perm.mode &= ~MSG_LOCKED;
687 			if (error != 0) {
688 				DPRINTF(("msgsnd:  interrupted system call\n"));
689 				error = EINTR;
690 				goto done2;
691 			}
692 
693 			/*
694 			 * Make sure that the msq queue still exists
695 			 */
696 
697 			if (msqptr->msg_qbytes == 0) {
698 				DPRINTF(("msqid deleted\n"));
699 				error = EIDRM;
700 				goto done2;
701 			}
702 
703 		} else {
704 			DPRINTF(("got all the resources that we need\n"));
705 			break;
706 		}
707 	}
708 
709 	/*
710 	 * We have the resources that we need.
711 	 * Make sure!
712 	 */
713 
714 	if (msqptr->msg_perm.mode & MSG_LOCKED)
715 		panic("msg_perm.mode & MSG_LOCKED");
716 	if (segs_needed > nfree_msgmaps)
717 		panic("segs_needed > nfree_msgmaps");
718 	if (msgsz + msqptr->msg_cbytes > msqptr->msg_qbytes)
719 		panic("msgsz + msg_cbytes > msg_qbytes");
720 	if (free_msghdrs == NULL)
721 		panic("no more msghdrs");
722 
723 	/*
724 	 * Re-lock the msqid_ds in case we page-fault when copying in the
725 	 * message
726 	 */
727 
728 	if ((msqptr->msg_perm.mode & MSG_LOCKED) != 0)
729 		panic("msqid_ds is already locked");
730 	msqptr->msg_perm.mode |= MSG_LOCKED;
731 
732 	/*
733 	 * Allocate a message header
734 	 */
735 
736 	msghdr = free_msghdrs;
737 	free_msghdrs = msghdr->msg_next;
738 	msghdr->msg_spot = -1;
739 	msghdr->msg_ts = msgsz;
740 
741 	/*
742 	 * Allocate space for the message
743 	 */
744 
745 	while (segs_needed > 0) {
746 		if (nfree_msgmaps <= 0)
747 			panic("not enough msgmaps");
748 		if (free_msgmaps == -1)
749 			panic("nil free_msgmaps");
750 		next = free_msgmaps;
751 		if (next <= -1)
752 			panic("next too low #1");
753 		if (next >= msginfo.msgseg)
754 			panic("next out of range #1");
755 		DPRINTF(("allocating segment %d to message\n", next));
756 		free_msgmaps = msgmaps[next].next;
757 		nfree_msgmaps--;
758 		msgmaps[next].next = msghdr->msg_spot;
759 		msghdr->msg_spot = next;
760 		segs_needed--;
761 	}
762 
763 	/*
764 	 * Copy in the message type
765 	 */
766 
767 	mtx_unlock(&msq_mtx);
768 	if ((error = copyin(user_msgp, &msghdr->msg_type,
769 	    sizeof(msghdr->msg_type))) != 0) {
770 		mtx_lock(&msq_mtx);
771 		DPRINTF(("error %d copying the message type\n", error));
772 		msg_freehdr(msghdr);
773 		msqptr->msg_perm.mode &= ~MSG_LOCKED;
774 		wakeup(msqptr);
775 		goto done2;
776 	}
777 	mtx_lock(&msq_mtx);
778 	user_msgp = (const char *)user_msgp + sizeof(msghdr->msg_type);
779 
780 	/*
781 	 * Validate the message type
782 	 */
783 
784 	if (msghdr->msg_type < 1) {
785 		msg_freehdr(msghdr);
786 		msqptr->msg_perm.mode &= ~MSG_LOCKED;
787 		wakeup(msqptr);
788 		DPRINTF(("mtype (%d) < 1\n", msghdr->msg_type));
789 		error = EINVAL;
790 		goto done2;
791 	}
792 
793 	/*
794 	 * Copy in the message body
795 	 */
796 
797 	next = msghdr->msg_spot;
798 	while (msgsz > 0) {
799 		size_t tlen;
800 		if (msgsz > msginfo.msgssz)
801 			tlen = msginfo.msgssz;
802 		else
803 			tlen = msgsz;
804 		if (next <= -1)
805 			panic("next too low #2");
806 		if (next >= msginfo.msgseg)
807 			panic("next out of range #2");
808 		mtx_unlock(&msq_mtx);
809 		if ((error = copyin(user_msgp, &msgpool[next * msginfo.msgssz],
810 		    tlen)) != 0) {
811 			mtx_lock(&msq_mtx);
812 			DPRINTF(("error %d copying in message segment\n",
813 			    error));
814 			msg_freehdr(msghdr);
815 			msqptr->msg_perm.mode &= ~MSG_LOCKED;
816 			wakeup(msqptr);
817 			goto done2;
818 		}
819 		mtx_lock(&msq_mtx);
820 		msgsz -= tlen;
821 		user_msgp = (const char *)user_msgp + tlen;
822 		next = msgmaps[next].next;
823 	}
824 	if (next != -1)
825 		panic("didn't use all the msg segments");
826 
827 	/*
828 	 * We've got the message.  Unlock the msqid_ds.
829 	 */
830 
831 	msqptr->msg_perm.mode &= ~MSG_LOCKED;
832 
833 	/*
834 	 * Make sure that the msqid_ds is still allocated.
835 	 */
836 
837 	if (msqptr->msg_qbytes == 0) {
838 		msg_freehdr(msghdr);
839 		wakeup(msqptr);
840 		error = EIDRM;
841 		goto done2;
842 	}
843 
844 	/*
845 	 * Put the message into the queue
846 	 */
847 
848 	if (msqptr->msg_first == NULL) {
849 		msqptr->msg_first = msghdr;
850 		msqptr->msg_last = msghdr;
851 	} else {
852 		msqptr->msg_last->msg_next = msghdr;
853 		msqptr->msg_last = msghdr;
854 	}
855 	msqptr->msg_last->msg_next = NULL;
856 
857 	msqptr->msg_cbytes += msghdr->msg_ts;
858 	msqptr->msg_qnum++;
859 	msqptr->msg_lspid = td->td_proc->p_pid;
860 	msqptr->msg_stime = time_second;
861 
862 	wakeup(msqptr);
863 	td->td_retval[0] = 0;
864 done2:
865 	mtx_unlock(&msq_mtx);
866 	return (error);
867 }
868 
869 #ifndef _SYS_SYSPROTO_H_
870 struct msgrcv_args {
871 	int	msqid;
872 	void	*msgp;
873 	size_t	msgsz;
874 	long	msgtyp;
875 	int	msgflg;
876 };
877 #endif
878 
879 /*
880  * MPSAFE
881  */
882 int
883 msgrcv(td, uap)
884 	struct thread *td;
885 	register struct msgrcv_args *uap;
886 {
887 	int msqid = uap->msqid;
888 	void *user_msgp = uap->msgp;
889 	size_t msgsz = uap->msgsz;
890 	long msgtyp = uap->msgtyp;
891 	int msgflg = uap->msgflg;
892 	size_t len;
893 	register struct msqid_ds *msqptr;
894 	register struct msg *msghdr;
895 	int error = 0;
896 	short next;
897 
898 	DPRINTF(("call to msgrcv(%d, 0x%x, %d, %ld, %d)\n", msqid, user_msgp,
899 	    msgsz, msgtyp, msgflg));
900 
901 	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
902 		return (ENOSYS);
903 
904 	msqid = IPCID_TO_IX(msqid);
905 
906 	if (msqid < 0 || msqid >= msginfo.msgmni) {
907 		DPRINTF(("msqid (%d) out of range (0<=msqid<%d)\n", msqid,
908 		    msginfo.msgmni));
909 		return (EINVAL);
910 	}
911 
912 	msqptr = &msqids[msqid];
913 	mtx_lock(&msq_mtx);
914 	if (msqptr->msg_qbytes == 0) {
915 		DPRINTF(("no such message queue id\n"));
916 		error = EINVAL;
917 		goto done2;
918 	}
919 	if (msqptr->msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
920 		DPRINTF(("wrong sequence number\n"));
921 		error = EINVAL;
922 		goto done2;
923 	}
924 
925 	if ((error = ipcperm(td, &msqptr->msg_perm, IPC_R))) {
926 		DPRINTF(("requester doesn't have read access\n"));
927 		goto done2;
928 	}
929 
930 	msghdr = NULL;
931 	while (msghdr == NULL) {
932 		if (msgtyp == 0) {
933 			msghdr = msqptr->msg_first;
934 			if (msghdr != NULL) {
935 				if (msgsz < msghdr->msg_ts &&
936 				    (msgflg & MSG_NOERROR) == 0) {
937 					DPRINTF(("first message on the queue "
938 					    "is too big (want %d, got %d)\n",
939 					    msgsz, msghdr->msg_ts));
940 					error = E2BIG;
941 					goto done2;
942 				}
943 				if (msqptr->msg_first == msqptr->msg_last) {
944 					msqptr->msg_first = NULL;
945 					msqptr->msg_last = NULL;
946 				} else {
947 					msqptr->msg_first = msghdr->msg_next;
948 					if (msqptr->msg_first == NULL)
949 						panic("msg_first/last screwed up #1");
950 				}
951 			}
952 		} else {
953 			struct msg *previous;
954 			struct msg **prev;
955 
956 			previous = NULL;
957 			prev = &(msqptr->msg_first);
958 			while ((msghdr = *prev) != NULL) {
959 				/*
960 				 * Is this message's type an exact match or is
961 				 * this message's type less than or equal to
962 				 * the absolute value of a negative msgtyp?
963 				 * Note that the second half of this test can
964 				 * NEVER be true if msgtyp is positive since
965 				 * msg_type is always positive!
966 				 */
967 
968 				if (msgtyp == msghdr->msg_type ||
969 				    msghdr->msg_type <= -msgtyp) {
970 					DPRINTF(("found message type %d, "
971 					    "requested %d\n",
972 					    msghdr->msg_type, msgtyp));
973 					if (msgsz < msghdr->msg_ts &&
974 					    (msgflg & MSG_NOERROR) == 0) {
975 						DPRINTF(("requested message "
976 						    "on the queue is too big "
977 						    "(want %d, got %d)\n",
978 						    msgsz, msghdr->msg_ts));
979 						error = E2BIG;
980 						goto done2;
981 					}
982 					*prev = msghdr->msg_next;
983 					if (msghdr == msqptr->msg_last) {
984 						if (previous == NULL) {
985 							if (prev !=
986 							    &msqptr->msg_first)
987 								panic("msg_first/last screwed up #2");
988 							msqptr->msg_first =
989 							    NULL;
990 							msqptr->msg_last =
991 							    NULL;
992 						} else {
993 							if (prev ==
994 							    &msqptr->msg_first)
995 								panic("msg_first/last screwed up #3");
996 							msqptr->msg_last =
997 							    previous;
998 						}
999 					}
1000 					break;
1001 				}
1002 				previous = msghdr;
1003 				prev = &(msghdr->msg_next);
1004 			}
1005 		}
1006 
1007 		/*
1008 		 * We've either extracted the msghdr for the appropriate
1009 		 * message or there isn't one.
1010 		 * If there is one then bail out of this loop.
1011 		 */
1012 
1013 		if (msghdr != NULL)
1014 			break;
1015 
1016 		/*
1017 		 * Hmph!  No message found.  Does the user want to wait?
1018 		 */
1019 
1020 		if ((msgflg & IPC_NOWAIT) != 0) {
1021 			DPRINTF(("no appropriate message found (msgtyp=%d)\n",
1022 			    msgtyp));
1023 			/* The SVID says to return ENOMSG. */
1024 			error = ENOMSG;
1025 			goto done2;
1026 		}
1027 
1028 		/*
1029 		 * Wait for something to happen
1030 		 */
1031 
1032 		DPRINTF(("msgrcv:  goodnight\n"));
1033 		error = msleep(msqptr, &msq_mtx, (PZERO - 4) | PCATCH,
1034 		    "msgwait", 0);
1035 		DPRINTF(("msgrcv:  good morning (error=%d)\n", error));
1036 
1037 		if (error != 0) {
1038 			DPRINTF(("msgsnd:  interrupted system call\n"));
1039 			error = EINTR;
1040 			goto done2;
1041 		}
1042 
1043 		/*
1044 		 * Make sure that the msq queue still exists
1045 		 */
1046 
1047 		if (msqptr->msg_qbytes == 0 ||
1048 		    msqptr->msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
1049 			DPRINTF(("msqid deleted\n"));
1050 			error = EIDRM;
1051 			goto done2;
1052 		}
1053 	}
1054 
1055 	/*
1056 	 * Return the message to the user.
1057 	 *
1058 	 * First, do the bookkeeping (before we risk being interrupted).
1059 	 */
1060 
1061 	msqptr->msg_cbytes -= msghdr->msg_ts;
1062 	msqptr->msg_qnum--;
1063 	msqptr->msg_lrpid = td->td_proc->p_pid;
1064 	msqptr->msg_rtime = time_second;
1065 
1066 	/*
1067 	 * Make msgsz the actual amount that we'll be returning.
1068 	 * Note that this effectively truncates the message if it is too long
1069 	 * (since msgsz is never increased).
1070 	 */
1071 
1072 	DPRINTF(("found a message, msgsz=%d, msg_ts=%d\n", msgsz,
1073 	    msghdr->msg_ts));
1074 	if (msgsz > msghdr->msg_ts)
1075 		msgsz = msghdr->msg_ts;
1076 
1077 	/*
1078 	 * Return the type to the user.
1079 	 */
1080 
1081 	mtx_unlock(&msq_mtx);
1082 	error = copyout(&(msghdr->msg_type), user_msgp,
1083 	    sizeof(msghdr->msg_type));
1084 	mtx_lock(&msq_mtx);
1085 	if (error != 0) {
1086 		DPRINTF(("error (%d) copying out message type\n", error));
1087 		msg_freehdr(msghdr);
1088 		wakeup(msqptr);
1089 		goto done2;
1090 	}
1091 	user_msgp = (char *)user_msgp + sizeof(msghdr->msg_type);
1092 
1093 	/*
1094 	 * Return the segments to the user
1095 	 */
1096 
1097 	next = msghdr->msg_spot;
1098 	for (len = 0; len < msgsz; len += msginfo.msgssz) {
1099 		size_t tlen;
1100 
1101 		if (msgsz - len > msginfo.msgssz)
1102 			tlen = msginfo.msgssz;
1103 		else
1104 			tlen = msgsz - len;
1105 		if (next <= -1)
1106 			panic("next too low #3");
1107 		if (next >= msginfo.msgseg)
1108 			panic("next out of range #3");
1109 		mtx_unlock(&msq_mtx);
1110 		error = copyout(&msgpool[next * msginfo.msgssz],
1111 		    user_msgp, tlen);
1112 		mtx_lock(&msq_mtx);
1113 		if (error != 0) {
1114 			DPRINTF(("error (%d) copying out message segment\n",
1115 			    error));
1116 			msg_freehdr(msghdr);
1117 			wakeup(msqptr);
1118 			goto done2;
1119 		}
1120 		user_msgp = (char *)user_msgp + tlen;
1121 		next = msgmaps[next].next;
1122 	}
1123 
1124 	/*
1125 	 * Done, return the actual number of bytes copied out.
1126 	 */
1127 
1128 	msg_freehdr(msghdr);
1129 	wakeup(msqptr);
1130 	td->td_retval[0] = msgsz;
1131 done2:
1132 	mtx_unlock(&msq_mtx);
1133 	return (error);
1134 }
1135 
1136 static int
1137 sysctl_msqids(SYSCTL_HANDLER_ARGS)
1138 {
1139 
1140 	return (SYSCTL_OUT(req, msqids,
1141 	    sizeof(struct msqid_ds) * msginfo.msgmni));
1142 }
1143 
1144 SYSCTL_DECL(_kern_ipc);
1145 SYSCTL_INT(_kern_ipc, OID_AUTO, msgmax, CTLFLAG_RD, &msginfo.msgmax, 0, "");
1146 SYSCTL_INT(_kern_ipc, OID_AUTO, msgmni, CTLFLAG_RDTUN, &msginfo.msgmni, 0, "");
1147 SYSCTL_INT(_kern_ipc, OID_AUTO, msgmnb, CTLFLAG_RD, &msginfo.msgmnb, 0, "");
1148 SYSCTL_INT(_kern_ipc, OID_AUTO, msgtql, CTLFLAG_RD, &msginfo.msgtql, 0, "");
1149 SYSCTL_INT(_kern_ipc, OID_AUTO, msgssz, CTLFLAG_RDTUN, &msginfo.msgssz, 0, "");
1150 SYSCTL_INT(_kern_ipc, OID_AUTO, msgseg, CTLFLAG_RDTUN, &msginfo.msgseg, 0, "");
1151 SYSCTL_PROC(_kern_ipc, OID_AUTO, msqids, CTLFLAG_RD,
1152     NULL, 0, sysctl_msqids, "", "Message queue IDs");
1153