xref: /freebsd/sys/kern/sysv_msg.c (revision 63f9a4cb2684a303e3eb2ffed39c03a2e2b28ae0)
1 /*
2  * Implementation of SVID messages
3  *
4  * Author:  Daniel Boulet
5  *
6  * Copyright 1993 Daniel Boulet and RTMX Inc.
7  *
8  * This system call was implemented by Daniel Boulet under contract from RTMX.
9  *
10  * Redistribution and use in source forms, with and without modification,
11  * are permitted provided that this entire comment appears intact.
12  *
13  * Redistribution in binary form may occur without any restrictions.
14  * Obviously, it would be nice if you gave credit where credit is due
15  * but requiring it would be too onerous.
16  *
17  * This software is provided ``AS IS'' without any warranties of any kind.
18  */
19 
20 #include <sys/cdefs.h>
21 __FBSDID("$FreeBSD$");
22 
23 #include "opt_sysvipc.h"
24 
25 #include <sys/param.h>
26 #include <sys/systm.h>
27 #include <sys/sysproto.h>
28 #include <sys/kernel.h>
29 #include <sys/proc.h>
30 #include <sys/lock.h>
31 #include <sys/mutex.h>
32 #include <sys/module.h>
33 #include <sys/msg.h>
34 #include <sys/syscall.h>
35 #include <sys/sysent.h>
36 #include <sys/sysctl.h>
37 #include <sys/malloc.h>
38 #include <sys/jail.h>
39 
40 static MALLOC_DEFINE(M_MSG, "msg", "SVID compatible message queues");
41 
42 static void msginit(void);
43 static int msgunload(void);
44 static int sysvmsg_modload(struct module *, int, void *);
45 
46 #ifdef MSG_DEBUG
47 #define DPRINTF(a)	printf a
48 #else
49 #define DPRINTF(a)
50 #endif
51 
52 static void msg_freehdr(struct msg *msghdr);
53 
54 /* XXX casting to (sy_call_t *) is bogus, as usual. */
55 static sy_call_t *msgcalls[] = {
56 	(sy_call_t *)msgctl, (sy_call_t *)msgget,
57 	(sy_call_t *)msgsnd, (sy_call_t *)msgrcv
58 };
59 
60 #ifndef MSGSSZ
61 #define MSGSSZ	8		/* Each segment must be 2^N long */
62 #endif
63 #ifndef MSGSEG
64 #define MSGSEG	2048		/* must be less than 32767 */
65 #endif
66 #define MSGMAX	(MSGSSZ*MSGSEG)
67 #ifndef MSGMNB
68 #define MSGMNB	2048		/* max # of bytes in a queue */
69 #endif
70 #ifndef MSGMNI
71 #define MSGMNI	40
72 #endif
73 #ifndef MSGTQL
74 #define MSGTQL	40
75 #endif
76 
77 /*
78  * Based on the configuration parameters described in an SVR2 (yes, two)
79  * config(1m) man page.
80  *
81  * Each message is broken up and stored in segments that are msgssz bytes
82  * long.  For efficiency reasons, this should be a power of two.  Also,
83  * it doesn't make sense if it is less than 8 or greater than about 256.
84  * Consequently, msginit in kern/sysv_msg.c checks that msgssz is a power of
85  * two between 8 and 1024 inclusive (and panic's if it isn't).
86  */
87 struct msginfo msginfo = {
88                 MSGMAX,         /* max chars in a message */
89                 MSGMNI,         /* # of message queue identifiers */
90                 MSGMNB,         /* max chars in a queue */
91                 MSGTQL,         /* max messages in system */
92                 MSGSSZ,         /* size of a message segment */
93                 		/* (must be small power of 2 greater than 4) */
94                 MSGSEG          /* number of message segments */
95 };
96 
97 /*
98  * macros to convert between msqid_ds's and msqid's.
99  * (specific to this implementation)
100  */
101 #define MSQID(ix,ds)	((ix) & 0xffff | (((ds).msg_perm.seq << 16) & 0xffff0000))
102 #define MSQID_IX(id)	((id) & 0xffff)
103 #define MSQID_SEQ(id)	(((id) >> 16) & 0xffff)
104 
105 /*
106  * The rest of this file is specific to this particular implementation.
107  */
108 
109 struct msgmap {
110 	short	next;		/* next segment in buffer */
111     				/* -1 -> available */
112     				/* 0..(MSGSEG-1) -> index of next segment */
113 };
114 
115 #define MSG_LOCKED	01000	/* Is this msqid_ds locked? */
116 
117 static int nfree_msgmaps;	/* # of free map entries */
118 static short free_msgmaps;	/* head of linked list of free map entries */
119 static struct msg *free_msghdrs;/* list of free msg headers */
120 static char *msgpool;		/* MSGMAX byte long msg buffer pool */
121 static struct msgmap *msgmaps;	/* MSGSEG msgmap structures */
122 static struct msg *msghdrs;	/* MSGTQL msg headers */
123 static struct msqid_kernel *msqids;	/* MSGMNI msqid_kernel struct's */
124 static struct mtx msq_mtx;	/* global mutex for message queues. */
125 
126 static void
127 msginit()
128 {
129 	register int i;
130 
131 	TUNABLE_INT_FETCH("kern.ipc.msgseg", &msginfo.msgseg);
132 	TUNABLE_INT_FETCH("kern.ipc.msgssz", &msginfo.msgssz);
133 	msginfo.msgmax = msginfo.msgseg * msginfo.msgssz;
134 	TUNABLE_INT_FETCH("kern.ipc.msgmni", &msginfo.msgmni);
135 
136 	msgpool = malloc(msginfo.msgmax, M_MSG, M_WAITOK);
137 	if (msgpool == NULL)
138 		panic("msgpool is NULL");
139 	msgmaps = malloc(sizeof(struct msgmap) * msginfo.msgseg, M_MSG, M_WAITOK);
140 	if (msgmaps == NULL)
141 		panic("msgmaps is NULL");
142 	msghdrs = malloc(sizeof(struct msg) * msginfo.msgtql, M_MSG, M_WAITOK);
143 	if (msghdrs == NULL)
144 		panic("msghdrs is NULL");
145 	msqids = malloc(sizeof(struct msqid_kernel) * msginfo.msgmni, M_MSG,
146 	    M_WAITOK);
147 	if (msqids == NULL)
148 		panic("msqids is NULL");
149 
150 	/*
151 	 * msginfo.msgssz should be a power of two for efficiency reasons.
152 	 * It is also pretty silly if msginfo.msgssz is less than 8
153 	 * or greater than about 256 so ...
154 	 */
155 
156 	i = 8;
157 	while (i < 1024 && i != msginfo.msgssz)
158 		i <<= 1;
159     	if (i != msginfo.msgssz) {
160 		DPRINTF(("msginfo.msgssz=%d (0x%x)\n", msginfo.msgssz,
161 		    msginfo.msgssz));
162 		panic("msginfo.msgssz not a small power of 2");
163 	}
164 
165 	if (msginfo.msgseg > 32767) {
166 		DPRINTF(("msginfo.msgseg=%d\n", msginfo.msgseg));
167 		panic("msginfo.msgseg > 32767");
168 	}
169 
170 	if (msgmaps == NULL)
171 		panic("msgmaps is NULL");
172 
173 	for (i = 0; i < msginfo.msgseg; i++) {
174 		if (i > 0)
175 			msgmaps[i-1].next = i;
176 		msgmaps[i].next = -1;	/* implies entry is available */
177 	}
178 	free_msgmaps = 0;
179 	nfree_msgmaps = msginfo.msgseg;
180 
181 	if (msghdrs == NULL)
182 		panic("msghdrs is NULL");
183 
184 	for (i = 0; i < msginfo.msgtql; i++) {
185 		msghdrs[i].msg_type = 0;
186 		if (i > 0)
187 			msghdrs[i-1].msg_next = &msghdrs[i];
188 		msghdrs[i].msg_next = NULL;
189     	}
190 	free_msghdrs = &msghdrs[0];
191 
192 	if (msqids == NULL)
193 		panic("msqids is NULL");
194 
195 	for (i = 0; i < msginfo.msgmni; i++) {
196 		msqids[i].u.msg_qbytes = 0;	/* implies entry is available */
197 		msqids[i].u.msg_perm.seq = 0;	/* reset to a known value */
198 		msqids[i].u.msg_perm.mode = 0;
199 	}
200 	mtx_init(&msq_mtx, "msq", NULL, MTX_DEF);
201 }
202 
203 static int
204 msgunload()
205 {
206 	struct msqid_kernel *msqkptr;
207 	int msqid;
208 
209 	for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
210 		/*
211 		 * Look for an unallocated and unlocked msqid_ds.
212 		 * msqid_ds's can be locked by msgsnd or msgrcv while
213 		 * they are copying the message in/out.  We can't
214 		 * re-use the entry until they release it.
215 		 */
216 		msqkptr = &msqids[msqid];
217 		if (msqkptr->u.msg_qbytes != 0 ||
218 		    (msqkptr->u.msg_perm.mode & MSG_LOCKED) != 0)
219 			break;
220 	}
221 	if (msqid != msginfo.msgmni)
222 		return (EBUSY);
223 
224 	free(msgpool, M_MSG);
225 	free(msgmaps, M_MSG);
226 	free(msghdrs, M_MSG);
227 	free(msqids, M_MSG);
228 	mtx_destroy(&msq_mtx);
229 	return (0);
230 }
231 
232 
233 static int
234 sysvmsg_modload(struct module *module, int cmd, void *arg)
235 {
236 	int error = 0;
237 
238 	switch (cmd) {
239 	case MOD_LOAD:
240 		msginit();
241 		break;
242 	case MOD_UNLOAD:
243 		error = msgunload();
244 		break;
245 	case MOD_SHUTDOWN:
246 		break;
247 	default:
248 		error = EINVAL;
249 		break;
250 	}
251 	return (error);
252 }
253 
254 static moduledata_t sysvmsg_mod = {
255 	"sysvmsg",
256 	&sysvmsg_modload,
257 	NULL
258 };
259 
260 SYSCALL_MODULE_HELPER(msgsys);
261 SYSCALL_MODULE_HELPER(msgctl);
262 SYSCALL_MODULE_HELPER(msgget);
263 SYSCALL_MODULE_HELPER(msgsnd);
264 SYSCALL_MODULE_HELPER(msgrcv);
265 
266 DECLARE_MODULE(sysvmsg, sysvmsg_mod,
267 	SI_SUB_SYSV_MSG, SI_ORDER_FIRST);
268 MODULE_VERSION(sysvmsg, 1);
269 
270 /*
271  * Entry point for all MSG calls
272  *
273  * MPSAFE
274  */
275 int
276 msgsys(td, uap)
277 	struct thread *td;
278 	/* XXX actually varargs. */
279 	struct msgsys_args /* {
280 		int	which;
281 		int	a2;
282 		int	a3;
283 		int	a4;
284 		int	a5;
285 		int	a6;
286 	} */ *uap;
287 {
288 	int error;
289 
290 	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
291 		return (ENOSYS);
292 	if (uap->which < 0 ||
293 	    uap->which >= sizeof(msgcalls)/sizeof(msgcalls[0]))
294 		return (EINVAL);
295 	error = (*msgcalls[uap->which])(td, &uap->a2);
296 	return (error);
297 }
298 
299 static void
300 msg_freehdr(msghdr)
301 	struct msg *msghdr;
302 {
303 	while (msghdr->msg_ts > 0) {
304 		short next;
305 		if (msghdr->msg_spot < 0 || msghdr->msg_spot >= msginfo.msgseg)
306 			panic("msghdr->msg_spot out of range");
307 		next = msgmaps[msghdr->msg_spot].next;
308 		msgmaps[msghdr->msg_spot].next = free_msgmaps;
309 		free_msgmaps = msghdr->msg_spot;
310 		nfree_msgmaps++;
311 		msghdr->msg_spot = next;
312 		if (msghdr->msg_ts >= msginfo.msgssz)
313 			msghdr->msg_ts -= msginfo.msgssz;
314 		else
315 			msghdr->msg_ts = 0;
316 	}
317 	if (msghdr->msg_spot != -1)
318 		panic("msghdr->msg_spot != -1");
319 	msghdr->msg_next = free_msghdrs;
320 	free_msghdrs = msghdr;
321 }
322 
323 #ifndef _SYS_SYSPROTO_H_
324 struct msgctl_args {
325 	int	msqid;
326 	int	cmd;
327 	struct	msqid_ds *buf;
328 };
329 #endif
330 
331 /*
332  * MPSAFE
333  */
334 int
335 msgctl(td, uap)
336 	struct thread *td;
337 	register struct msgctl_args *uap;
338 {
339 	int msqid = uap->msqid;
340 	int cmd = uap->cmd;
341 	struct msqid_ds *user_msqptr = uap->buf;
342 	int rval, error;
343 	struct msqid_ds msqbuf;
344 	register struct msqid_kernel *msqkptr;
345 
346 	DPRINTF(("call to msgctl(%d, %d, 0x%x)\n", msqid, cmd, user_msqptr));
347 	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
348 		return (ENOSYS);
349 
350 	msqid = IPCID_TO_IX(msqid);
351 
352 	if (msqid < 0 || msqid >= msginfo.msgmni) {
353 		DPRINTF(("msqid (%d) out of range (0<=msqid<%d)\n", msqid,
354 		    msginfo.msgmni));
355 		return (EINVAL);
356 	}
357 	if (cmd == IPC_SET &&
358 	    (error = copyin(user_msqptr, &msqbuf, sizeof(msqbuf))) != 0)
359 		return (error);
360 
361 	msqkptr = &msqids[msqid];
362 
363 	mtx_lock(&msq_mtx);
364 	if (msqkptr->u.msg_qbytes == 0) {
365 		DPRINTF(("no such msqid\n"));
366 		error = EINVAL;
367 		goto done2;
368 	}
369 	if (msqkptr->u.msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
370 		DPRINTF(("wrong sequence number\n"));
371 		error = EINVAL;
372 		goto done2;
373 	}
374 
375 	error = 0;
376 	rval = 0;
377 
378 	switch (cmd) {
379 
380 	case IPC_RMID:
381 	{
382 		struct msg *msghdr;
383 		if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_M)))
384 			goto done2;
385 
386 		/* Free the message headers */
387 		msghdr = msqkptr->u.msg_first;
388 		while (msghdr != NULL) {
389 			struct msg *msghdr_tmp;
390 
391 			/* Free the segments of each message */
392 			msqkptr->u.msg_cbytes -= msghdr->msg_ts;
393 			msqkptr->u.msg_qnum--;
394 			msghdr_tmp = msghdr;
395 			msghdr = msghdr->msg_next;
396 			msg_freehdr(msghdr_tmp);
397 		}
398 
399 		if (msqkptr->u.msg_cbytes != 0)
400 			panic("msg_cbytes is screwed up");
401 		if (msqkptr->u.msg_qnum != 0)
402 			panic("msg_qnum is screwed up");
403 
404 		msqkptr->u.msg_qbytes = 0;	/* Mark it as free */
405 
406 		wakeup(msqkptr);
407 	}
408 
409 		break;
410 
411 	case IPC_SET:
412 		if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_M)))
413 			goto done2;
414 		if (msqbuf.msg_qbytes > msqkptr->u.msg_qbytes) {
415 			error = suser(td);
416 			if (error)
417 				goto done2;
418 		}
419 		if (msqbuf.msg_qbytes > msginfo.msgmnb) {
420 			DPRINTF(("can't increase msg_qbytes beyond %d"
421 			    "(truncating)\n", msginfo.msgmnb));
422 			msqbuf.msg_qbytes = msginfo.msgmnb;	/* silently restrict qbytes to system limit */
423 		}
424 		if (msqbuf.msg_qbytes == 0) {
425 			DPRINTF(("can't reduce msg_qbytes to 0\n"));
426 			error = EINVAL;		/* non-standard errno! */
427 			goto done2;
428 		}
429 		msqkptr->u.msg_perm.uid = msqbuf.msg_perm.uid;	/* change the owner */
430 		msqkptr->u.msg_perm.gid = msqbuf.msg_perm.gid;	/* change the owner */
431 		msqkptr->u.msg_perm.mode = (msqkptr->u.msg_perm.mode & ~0777) |
432 		    (msqbuf.msg_perm.mode & 0777);
433 		msqkptr->u.msg_qbytes = msqbuf.msg_qbytes;
434 		msqkptr->u.msg_ctime = time_second;
435 		break;
436 
437 	case IPC_STAT:
438 		if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_R))) {
439 			DPRINTF(("requester doesn't have read access\n"));
440 			goto done2;
441 		}
442 		break;
443 
444 	default:
445 		DPRINTF(("invalid command %d\n", cmd));
446 		error = EINVAL;
447 		goto done2;
448 	}
449 
450 	if (error == 0)
451 		td->td_retval[0] = rval;
452 done2:
453 	mtx_unlock(&msq_mtx);
454 	if (cmd == IPC_STAT && error == 0)
455 		error = copyout(&(msqkptr->u), user_msqptr, sizeof(struct msqid_ds));
456 	return(error);
457 }
458 
459 #ifndef _SYS_SYSPROTO_H_
460 struct msgget_args {
461 	key_t	key;
462 	int	msgflg;
463 };
464 #endif
465 
466 /*
467  * MPSAFE
468  */
469 int
470 msgget(td, uap)
471 	struct thread *td;
472 	register struct msgget_args *uap;
473 {
474 	int msqid, error = 0;
475 	int key = uap->key;
476 	int msgflg = uap->msgflg;
477 	struct ucred *cred = td->td_ucred;
478 	register struct msqid_kernel *msqkptr = NULL;
479 
480 	DPRINTF(("msgget(0x%x, 0%o)\n", key, msgflg));
481 
482 	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
483 		return (ENOSYS);
484 
485 	mtx_lock(&msq_mtx);
486 	if (key != IPC_PRIVATE) {
487 		for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
488 			msqkptr = &msqids[msqid];
489 			if (msqkptr->u.msg_qbytes != 0 &&
490 			    msqkptr->u.msg_perm.key == key)
491 				break;
492 		}
493 		if (msqid < msginfo.msgmni) {
494 			DPRINTF(("found public key\n"));
495 			if ((msgflg & IPC_CREAT) && (msgflg & IPC_EXCL)) {
496 				DPRINTF(("not exclusive\n"));
497 				error = EEXIST;
498 				goto done2;
499 			}
500 			if ((error = ipcperm(td, &msqkptr->u.msg_perm,
501 			    msgflg & 0700))) {
502 				DPRINTF(("requester doesn't have 0%o access\n",
503 				    msgflg & 0700));
504 				goto done2;
505 			}
506 			goto found;
507 		}
508 	}
509 
510 	DPRINTF(("need to allocate the msqid_ds\n"));
511 	if (key == IPC_PRIVATE || (msgflg & IPC_CREAT)) {
512 		for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
513 			/*
514 			 * Look for an unallocated and unlocked msqid_ds.
515 			 * msqid_ds's can be locked by msgsnd or msgrcv while
516 			 * they are copying the message in/out.  We can't
517 			 * re-use the entry until they release it.
518 			 */
519 			msqkptr = &msqids[msqid];
520 			if (msqkptr->u.msg_qbytes == 0 &&
521 			    (msqkptr->u.msg_perm.mode & MSG_LOCKED) == 0)
522 				break;
523 		}
524 		if (msqid == msginfo.msgmni) {
525 			DPRINTF(("no more msqid_ds's available\n"));
526 			error = ENOSPC;
527 			goto done2;
528 		}
529 		DPRINTF(("msqid %d is available\n", msqid));
530 		msqkptr->u.msg_perm.key = key;
531 		msqkptr->u.msg_perm.cuid = cred->cr_uid;
532 		msqkptr->u.msg_perm.uid = cred->cr_uid;
533 		msqkptr->u.msg_perm.cgid = cred->cr_gid;
534 		msqkptr->u.msg_perm.gid = cred->cr_gid;
535 		msqkptr->u.msg_perm.mode = (msgflg & 0777);
536 		/* Make sure that the returned msqid is unique */
537 		msqkptr->u.msg_perm.seq = (msqkptr->u.msg_perm.seq + 1) & 0x7fff;
538 		msqkptr->u.msg_first = NULL;
539 		msqkptr->u.msg_last = NULL;
540 		msqkptr->u.msg_cbytes = 0;
541 		msqkptr->u.msg_qnum = 0;
542 		msqkptr->u.msg_qbytes = msginfo.msgmnb;
543 		msqkptr->u.msg_lspid = 0;
544 		msqkptr->u.msg_lrpid = 0;
545 		msqkptr->u.msg_stime = 0;
546 		msqkptr->u.msg_rtime = 0;
547 		msqkptr->u.msg_ctime = time_second;
548 	} else {
549 		DPRINTF(("didn't find it and wasn't asked to create it\n"));
550 		error = ENOENT;
551 		goto done2;
552 	}
553 
554 found:
555 	/* Construct the unique msqid */
556 	td->td_retval[0] = IXSEQ_TO_IPCID(msqid, msqkptr->u.msg_perm);
557 done2:
558 	mtx_unlock(&msq_mtx);
559 	return (error);
560 }
561 
562 #ifndef _SYS_SYSPROTO_H_
563 struct msgsnd_args {
564 	int	msqid;
565 	const void	*msgp;
566 	size_t	msgsz;
567 	int	msgflg;
568 };
569 #endif
570 
571 /*
572  * MPSAFE
573  */
574 int
575 msgsnd(td, uap)
576 	struct thread *td;
577 	register struct msgsnd_args *uap;
578 {
579 	int msqid = uap->msqid;
580 	const void *user_msgp = uap->msgp;
581 	size_t msgsz = uap->msgsz;
582 	int msgflg = uap->msgflg;
583 	int segs_needed, error = 0;
584 	register struct msqid_kernel *msqkptr;
585 	register struct msg *msghdr;
586 	short next;
587 
588 	DPRINTF(("call to msgsnd(%d, 0x%x, %d, %d)\n", msqid, user_msgp, msgsz,
589 	    msgflg));
590 	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
591 		return (ENOSYS);
592 
593 	mtx_lock(&msq_mtx);
594 	msqid = IPCID_TO_IX(msqid);
595 
596 	if (msqid < 0 || msqid >= msginfo.msgmni) {
597 		DPRINTF(("msqid (%d) out of range (0<=msqid<%d)\n", msqid,
598 		    msginfo.msgmni));
599 		error = EINVAL;
600 		goto done2;
601 	}
602 
603 	msqkptr = &msqids[msqid];
604 	if (msqkptr->u.msg_qbytes == 0) {
605 		DPRINTF(("no such message queue id\n"));
606 		error = EINVAL;
607 		goto done2;
608 	}
609 	if (msqkptr->u.msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
610 		DPRINTF(("wrong sequence number\n"));
611 		error = EINVAL;
612 		goto done2;
613 	}
614 
615 	if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_W))) {
616 		DPRINTF(("requester doesn't have write access\n"));
617 		goto done2;
618 	}
619 
620 	segs_needed = (msgsz + msginfo.msgssz - 1) / msginfo.msgssz;
621 	DPRINTF(("msgsz=%d, msgssz=%d, segs_needed=%d\n", msgsz, msginfo.msgssz,
622 	    segs_needed));
623 	for (;;) {
624 		int need_more_resources = 0;
625 
626 		/*
627 		 * check msgsz
628 		 * (inside this loop in case msg_qbytes changes while we sleep)
629 		 */
630 
631 		if (msgsz > msqkptr->u.msg_qbytes) {
632 			DPRINTF(("msgsz > msqkptr->u.msg_qbytes\n"));
633 			error = EINVAL;
634 			goto done2;
635 		}
636 
637 		if (msqkptr->u.msg_perm.mode & MSG_LOCKED) {
638 			DPRINTF(("msqid is locked\n"));
639 			need_more_resources = 1;
640 		}
641 		if (msgsz + msqkptr->u.msg_cbytes > msqkptr->u.msg_qbytes) {
642 			DPRINTF(("msgsz + msg_cbytes > msg_qbytes\n"));
643 			need_more_resources = 1;
644 		}
645 		if (segs_needed > nfree_msgmaps) {
646 			DPRINTF(("segs_needed > nfree_msgmaps\n"));
647 			need_more_resources = 1;
648 		}
649 		if (free_msghdrs == NULL) {
650 			DPRINTF(("no more msghdrs\n"));
651 			need_more_resources = 1;
652 		}
653 
654 		if (need_more_resources) {
655 			int we_own_it;
656 
657 			if ((msgflg & IPC_NOWAIT) != 0) {
658 				DPRINTF(("need more resources but caller "
659 				    "doesn't want to wait\n"));
660 				error = EAGAIN;
661 				goto done2;
662 			}
663 
664 			if ((msqkptr->u.msg_perm.mode & MSG_LOCKED) != 0) {
665 				DPRINTF(("we don't own the msqid_ds\n"));
666 				we_own_it = 0;
667 			} else {
668 				/* Force later arrivals to wait for our
669 				   request */
670 				DPRINTF(("we own the msqid_ds\n"));
671 				msqkptr->u.msg_perm.mode |= MSG_LOCKED;
672 				we_own_it = 1;
673 			}
674 			DPRINTF(("goodnight\n"));
675 			error = msleep(msqkptr, &msq_mtx, (PZERO - 4) | PCATCH,
676 			    "msgwait", 0);
677 			DPRINTF(("good morning, error=%d\n", error));
678 			if (we_own_it)
679 				msqkptr->u.msg_perm.mode &= ~MSG_LOCKED;
680 			if (error != 0) {
681 				DPRINTF(("msgsnd:  interrupted system call\n"));
682 				error = EINTR;
683 				goto done2;
684 			}
685 
686 			/*
687 			 * Make sure that the msq queue still exists
688 			 */
689 
690 			if (msqkptr->u.msg_qbytes == 0) {
691 				DPRINTF(("msqid deleted\n"));
692 				error = EIDRM;
693 				goto done2;
694 			}
695 
696 		} else {
697 			DPRINTF(("got all the resources that we need\n"));
698 			break;
699 		}
700 	}
701 
702 	/*
703 	 * We have the resources that we need.
704 	 * Make sure!
705 	 */
706 
707 	if (msqkptr->u.msg_perm.mode & MSG_LOCKED)
708 		panic("msg_perm.mode & MSG_LOCKED");
709 	if (segs_needed > nfree_msgmaps)
710 		panic("segs_needed > nfree_msgmaps");
711 	if (msgsz + msqkptr->u.msg_cbytes > msqkptr->u.msg_qbytes)
712 		panic("msgsz + msg_cbytes > msg_qbytes");
713 	if (free_msghdrs == NULL)
714 		panic("no more msghdrs");
715 
716 	/*
717 	 * Re-lock the msqid_ds in case we page-fault when copying in the
718 	 * message
719 	 */
720 
721 	if ((msqkptr->u.msg_perm.mode & MSG_LOCKED) != 0)
722 		panic("msqid_ds is already locked");
723 	msqkptr->u.msg_perm.mode |= MSG_LOCKED;
724 
725 	/*
726 	 * Allocate a message header
727 	 */
728 
729 	msghdr = free_msghdrs;
730 	free_msghdrs = msghdr->msg_next;
731 	msghdr->msg_spot = -1;
732 	msghdr->msg_ts = msgsz;
733 
734 	/*
735 	 * Allocate space for the message
736 	 */
737 
738 	while (segs_needed > 0) {
739 		if (nfree_msgmaps <= 0)
740 			panic("not enough msgmaps");
741 		if (free_msgmaps == -1)
742 			panic("nil free_msgmaps");
743 		next = free_msgmaps;
744 		if (next <= -1)
745 			panic("next too low #1");
746 		if (next >= msginfo.msgseg)
747 			panic("next out of range #1");
748 		DPRINTF(("allocating segment %d to message\n", next));
749 		free_msgmaps = msgmaps[next].next;
750 		nfree_msgmaps--;
751 		msgmaps[next].next = msghdr->msg_spot;
752 		msghdr->msg_spot = next;
753 		segs_needed--;
754 	}
755 
756 	/*
757 	 * Copy in the message type
758 	 */
759 
760 	mtx_unlock(&msq_mtx);
761 	if ((error = copyin(user_msgp, &msghdr->msg_type,
762 	    sizeof(msghdr->msg_type))) != 0) {
763 		mtx_lock(&msq_mtx);
764 		DPRINTF(("error %d copying the message type\n", error));
765 		msg_freehdr(msghdr);
766 		msqkptr->u.msg_perm.mode &= ~MSG_LOCKED;
767 		wakeup(msqkptr);
768 		goto done2;
769 	}
770 	mtx_lock(&msq_mtx);
771 	user_msgp = (const char *)user_msgp + sizeof(msghdr->msg_type);
772 
773 	/*
774 	 * Validate the message type
775 	 */
776 
777 	if (msghdr->msg_type < 1) {
778 		msg_freehdr(msghdr);
779 		msqkptr->u.msg_perm.mode &= ~MSG_LOCKED;
780 		wakeup(msqkptr);
781 		DPRINTF(("mtype (%d) < 1\n", msghdr->msg_type));
782 		error = EINVAL;
783 		goto done2;
784 	}
785 
786 	/*
787 	 * Copy in the message body
788 	 */
789 
790 	next = msghdr->msg_spot;
791 	while (msgsz > 0) {
792 		size_t tlen;
793 		if (msgsz > msginfo.msgssz)
794 			tlen = msginfo.msgssz;
795 		else
796 			tlen = msgsz;
797 		if (next <= -1)
798 			panic("next too low #2");
799 		if (next >= msginfo.msgseg)
800 			panic("next out of range #2");
801 		mtx_unlock(&msq_mtx);
802 		if ((error = copyin(user_msgp, &msgpool[next * msginfo.msgssz],
803 		    tlen)) != 0) {
804 			mtx_lock(&msq_mtx);
805 			DPRINTF(("error %d copying in message segment\n",
806 			    error));
807 			msg_freehdr(msghdr);
808 			msqkptr->u.msg_perm.mode &= ~MSG_LOCKED;
809 			wakeup(msqkptr);
810 			goto done2;
811 		}
812 		mtx_lock(&msq_mtx);
813 		msgsz -= tlen;
814 		user_msgp = (const char *)user_msgp + tlen;
815 		next = msgmaps[next].next;
816 	}
817 	if (next != -1)
818 		panic("didn't use all the msg segments");
819 
820 	/*
821 	 * We've got the message.  Unlock the msqid_ds.
822 	 */
823 
824 	msqkptr->u.msg_perm.mode &= ~MSG_LOCKED;
825 
826 	/*
827 	 * Make sure that the msqid_ds is still allocated.
828 	 */
829 
830 	if (msqkptr->u.msg_qbytes == 0) {
831 		msg_freehdr(msghdr);
832 		wakeup(msqkptr);
833 		error = EIDRM;
834 		goto done2;
835 	}
836 
837 	/*
838 	 * Put the message into the queue
839 	 */
840 	if (msqkptr->u.msg_first == NULL) {
841 		msqkptr->u.msg_first = msghdr;
842 		msqkptr->u.msg_last = msghdr;
843 	} else {
844 		msqkptr->u.msg_last->msg_next = msghdr;
845 		msqkptr->u.msg_last = msghdr;
846 	}
847 	msqkptr->u.msg_last->msg_next = NULL;
848 
849 	msqkptr->u.msg_cbytes += msghdr->msg_ts;
850 	msqkptr->u.msg_qnum++;
851 	msqkptr->u.msg_lspid = td->td_proc->p_pid;
852 	msqkptr->u.msg_stime = time_second;
853 
854 	wakeup(msqkptr);
855 	td->td_retval[0] = 0;
856 done2:
857 	mtx_unlock(&msq_mtx);
858 	return (error);
859 }
860 
861 #ifndef _SYS_SYSPROTO_H_
862 struct msgrcv_args {
863 	int	msqid;
864 	void	*msgp;
865 	size_t	msgsz;
866 	long	msgtyp;
867 	int	msgflg;
868 };
869 #endif
870 
871 /*
872  * MPSAFE
873  */
874 int
875 msgrcv(td, uap)
876 	struct thread *td;
877 	register struct msgrcv_args *uap;
878 {
879 	int msqid = uap->msqid;
880 	void *user_msgp = uap->msgp;
881 	size_t msgsz = uap->msgsz;
882 	long msgtyp = uap->msgtyp;
883 	int msgflg = uap->msgflg;
884 	size_t len;
885 	register struct msqid_kernel *msqkptr;
886 	register struct msg *msghdr;
887 	int error = 0;
888 	short next;
889 
890 	DPRINTF(("call to msgrcv(%d, 0x%x, %d, %ld, %d)\n", msqid, user_msgp,
891 	    msgsz, msgtyp, msgflg));
892 
893 	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
894 		return (ENOSYS);
895 
896 	msqid = IPCID_TO_IX(msqid);
897 
898 	if (msqid < 0 || msqid >= msginfo.msgmni) {
899 		DPRINTF(("msqid (%d) out of range (0<=msqid<%d)\n", msqid,
900 		    msginfo.msgmni));
901 		return (EINVAL);
902 	}
903 
904 	msqkptr = &msqids[msqid];
905 	mtx_lock(&msq_mtx);
906 	if (msqkptr->u.msg_qbytes == 0) {
907 		DPRINTF(("no such message queue id\n"));
908 		error = EINVAL;
909 		goto done2;
910 	}
911 	if (msqkptr->u.msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
912 		DPRINTF(("wrong sequence number\n"));
913 		error = EINVAL;
914 		goto done2;
915 	}
916 
917 	if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_R))) {
918 		DPRINTF(("requester doesn't have read access\n"));
919 		goto done2;
920 	}
921 
922 	msghdr = NULL;
923 	while (msghdr == NULL) {
924 		if (msgtyp == 0) {
925 			msghdr = msqkptr->u.msg_first;
926 			if (msghdr != NULL) {
927 				if (msgsz < msghdr->msg_ts &&
928 				    (msgflg & MSG_NOERROR) == 0) {
929 					DPRINTF(("first message on the queue "
930 					    "is too big (want %d, got %d)\n",
931 					    msgsz, msghdr->msg_ts));
932 					error = E2BIG;
933 					goto done2;
934 				}
935 				if (msqkptr->u.msg_first == msqkptr->u.msg_last) {
936 					msqkptr->u.msg_first = NULL;
937 					msqkptr->u.msg_last = NULL;
938 				} else {
939 					msqkptr->u.msg_first = msghdr->msg_next;
940 					if (msqkptr->u.msg_first == NULL)
941 						panic("msg_first/last screwed up #1");
942 				}
943 			}
944 		} else {
945 			struct msg *previous;
946 			struct msg **prev;
947 
948 			previous = NULL;
949 			prev = &(msqkptr->u.msg_first);
950 			while ((msghdr = *prev) != NULL) {
951 				/*
952 				 * Is this message's type an exact match or is
953 				 * this message's type less than or equal to
954 				 * the absolute value of a negative msgtyp?
955 				 * Note that the second half of this test can
956 				 * NEVER be true if msgtyp is positive since
957 				 * msg_type is always positive!
958 				 */
959 
960 				if (msgtyp == msghdr->msg_type ||
961 				    msghdr->msg_type <= -msgtyp) {
962 					DPRINTF(("found message type %d, "
963 					    "requested %d\n",
964 					    msghdr->msg_type, msgtyp));
965 					if (msgsz < msghdr->msg_ts &&
966 					    (msgflg & MSG_NOERROR) == 0) {
967 						DPRINTF(("requested message "
968 						    "on the queue is too big "
969 						    "(want %d, got %d)\n",
970 						    msgsz, msghdr->msg_ts));
971 						error = E2BIG;
972 						goto done2;
973 					}
974 					*prev = msghdr->msg_next;
975 					if (msghdr == msqkptr->u.msg_last) {
976 						if (previous == NULL) {
977 							if (prev !=
978 							    &msqkptr->u.msg_first)
979 								panic("msg_first/last screwed up #2");
980 							msqkptr->u.msg_first =
981 							    NULL;
982 							msqkptr->u.msg_last =
983 							    NULL;
984 						} else {
985 							if (prev ==
986 							    &msqkptr->u.msg_first)
987 								panic("msg_first/last screwed up #3");
988 							msqkptr->u.msg_last =
989 							    previous;
990 						}
991 					}
992 					break;
993 				}
994 				previous = msghdr;
995 				prev = &(msghdr->msg_next);
996 			}
997 		}
998 
999 		/*
1000 		 * We've either extracted the msghdr for the appropriate
1001 		 * message or there isn't one.
1002 		 * If there is one then bail out of this loop.
1003 		 */
1004 
1005 		if (msghdr != NULL)
1006 			break;
1007 
1008 		/*
1009 		 * Hmph!  No message found.  Does the user want to wait?
1010 		 */
1011 
1012 		if ((msgflg & IPC_NOWAIT) != 0) {
1013 			DPRINTF(("no appropriate message found (msgtyp=%d)\n",
1014 			    msgtyp));
1015 			/* The SVID says to return ENOMSG. */
1016 			error = ENOMSG;
1017 			goto done2;
1018 		}
1019 
1020 		/*
1021 		 * Wait for something to happen
1022 		 */
1023 
1024 		DPRINTF(("msgrcv:  goodnight\n"));
1025 		error = msleep(msqkptr, &msq_mtx, (PZERO - 4) | PCATCH,
1026 		    "msgwait", 0);
1027 		DPRINTF(("msgrcv:  good morning (error=%d)\n", error));
1028 
1029 		if (error != 0) {
1030 			DPRINTF(("msgsnd:  interrupted system call\n"));
1031 			error = EINTR;
1032 			goto done2;
1033 		}
1034 
1035 		/*
1036 		 * Make sure that the msq queue still exists
1037 		 */
1038 
1039 		if (msqkptr->u.msg_qbytes == 0 ||
1040 		    msqkptr->u.msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
1041 			DPRINTF(("msqid deleted\n"));
1042 			error = EIDRM;
1043 			goto done2;
1044 		}
1045 	}
1046 
1047 	/*
1048 	 * Return the message to the user.
1049 	 *
1050 	 * First, do the bookkeeping (before we risk being interrupted).
1051 	 */
1052 
1053 	msqkptr->u.msg_cbytes -= msghdr->msg_ts;
1054 	msqkptr->u.msg_qnum--;
1055 	msqkptr->u.msg_lrpid = td->td_proc->p_pid;
1056 	msqkptr->u.msg_rtime = time_second;
1057 
1058 	/*
1059 	 * Make msgsz the actual amount that we'll be returning.
1060 	 * Note that this effectively truncates the message if it is too long
1061 	 * (since msgsz is never increased).
1062 	 */
1063 
1064 	DPRINTF(("found a message, msgsz=%d, msg_ts=%d\n", msgsz,
1065 	    msghdr->msg_ts));
1066 	if (msgsz > msghdr->msg_ts)
1067 		msgsz = msghdr->msg_ts;
1068 
1069 	/*
1070 	 * Return the type to the user.
1071 	 */
1072 
1073 	mtx_unlock(&msq_mtx);
1074 	error = copyout(&(msghdr->msg_type), user_msgp,
1075 	    sizeof(msghdr->msg_type));
1076 	mtx_lock(&msq_mtx);
1077 	if (error != 0) {
1078 		DPRINTF(("error (%d) copying out message type\n", error));
1079 		msg_freehdr(msghdr);
1080 		wakeup(msqkptr);
1081 		goto done2;
1082 	}
1083 	user_msgp = (char *)user_msgp + sizeof(msghdr->msg_type);
1084 
1085 	/*
1086 	 * Return the segments to the user
1087 	 */
1088 
1089 	next = msghdr->msg_spot;
1090 	for (len = 0; len < msgsz; len += msginfo.msgssz) {
1091 		size_t tlen;
1092 
1093 		if (msgsz - len > msginfo.msgssz)
1094 			tlen = msginfo.msgssz;
1095 		else
1096 			tlen = msgsz - len;
1097 		if (next <= -1)
1098 			panic("next too low #3");
1099 		if (next >= msginfo.msgseg)
1100 			panic("next out of range #3");
1101 		mtx_unlock(&msq_mtx);
1102 		error = copyout(&msgpool[next * msginfo.msgssz],
1103 		    user_msgp, tlen);
1104 		mtx_lock(&msq_mtx);
1105 		if (error != 0) {
1106 			DPRINTF(("error (%d) copying out message segment\n",
1107 			    error));
1108 			msg_freehdr(msghdr);
1109 			wakeup(msqkptr);
1110 			goto done2;
1111 		}
1112 		user_msgp = (char *)user_msgp + tlen;
1113 		next = msgmaps[next].next;
1114 	}
1115 
1116 	/*
1117 	 * Done, return the actual number of bytes copied out.
1118 	 */
1119 
1120 	msg_freehdr(msghdr);
1121 	wakeup(msqkptr);
1122 	td->td_retval[0] = msgsz;
1123 done2:
1124 	mtx_unlock(&msq_mtx);
1125 	return (error);
1126 }
1127 
1128 static int
1129 sysctl_msqids(SYSCTL_HANDLER_ARGS)
1130 {
1131 
1132 	return (SYSCTL_OUT(req, msqids,
1133 	    sizeof(struct msqid_kernel) * msginfo.msgmni));
1134 }
1135 
1136 SYSCTL_DECL(_kern_ipc);
1137 SYSCTL_INT(_kern_ipc, OID_AUTO, msgmax, CTLFLAG_RD, &msginfo.msgmax, 0, "");
1138 SYSCTL_INT(_kern_ipc, OID_AUTO, msgmni, CTLFLAG_RDTUN, &msginfo.msgmni, 0, "");
1139 SYSCTL_INT(_kern_ipc, OID_AUTO, msgmnb, CTLFLAG_RD, &msginfo.msgmnb, 0, "");
1140 SYSCTL_INT(_kern_ipc, OID_AUTO, msgtql, CTLFLAG_RD, &msginfo.msgtql, 0, "");
1141 SYSCTL_INT(_kern_ipc, OID_AUTO, msgssz, CTLFLAG_RDTUN, &msginfo.msgssz, 0, "");
1142 SYSCTL_INT(_kern_ipc, OID_AUTO, msgseg, CTLFLAG_RDTUN, &msginfo.msgseg, 0, "");
1143 SYSCTL_PROC(_kern_ipc, OID_AUTO, msqids, CTLFLAG_RD,
1144     NULL, 0, sysctl_msqids, "", "Message queue IDs");
1145