xref: /freebsd/sys/kern/sysv_msg.c (revision a220d00e74dd245b4fca59c5eca0c53963686325)
1 /* $FreeBSD$ */
2 
3 /*
4  * Implementation of SVID messages
5  *
6  * Author:  Daniel Boulet
7  *
8  * Copyright 1993 Daniel Boulet and RTMX Inc.
9  *
10  * This system call was implemented by Daniel Boulet under contract from RTMX.
11  *
12  * Redistribution and use in source forms, with and without modification,
13  * are permitted provided that this entire comment appears intact.
14  *
15  * Redistribution in binary form may occur without any restrictions.
16  * Obviously, it would be nice if you gave credit where credit is due
17  * but requiring it would be too onerous.
18  *
19  * This software is provided ``AS IS'' without any warranties of any kind.
20  */
21 
22 #include "opt_sysvipc.h"
23 
24 #include <sys/param.h>
25 #include <sys/systm.h>
26 #include <sys/sysproto.h>
27 #include <sys/kernel.h>
28 #include <sys/proc.h>
29 #include <sys/lock.h>
30 #include <sys/mutex.h>
31 #include <sys/msg.h>
32 #include <sys/syscall.h>
33 #include <sys/sysent.h>
34 #include <sys/sysctl.h>
35 #include <sys/malloc.h>
36 #include <sys/jail.h>
37 
38 static MALLOC_DEFINE(M_MSG, "msg", "SVID compatible message queues");
39 
40 static void msginit __P((void));
41 static int msgunload __P((void));
42 static int sysvmsg_modload __P((struct module *, int, void *));
43 
44 #define MSG_DEBUG
45 #undef MSG_DEBUG_OK
46 
47 static void msg_freehdr __P((struct msg *msghdr));
48 
49 /* XXX casting to (sy_call_t *) is bogus, as usual. */
50 static sy_call_t *msgcalls[] = {
51 	(sy_call_t *)msgctl, (sy_call_t *)msgget,
52 	(sy_call_t *)msgsnd, (sy_call_t *)msgrcv
53 };
54 
55 struct msg {
56 	struct	msg *msg_next;	/* next msg in the chain */
57 	long	msg_type;	/* type of this message */
58     				/* >0 -> type of this message */
59     				/* 0 -> free header */
60 	u_short	msg_ts;		/* size of this message */
61 	short	msg_spot;	/* location of start of msg in buffer */
62 };
63 
64 
65 #ifndef MSGSSZ
66 #define MSGSSZ	8		/* Each segment must be 2^N long */
67 #endif
68 #ifndef MSGSEG
69 #define MSGSEG	2048		/* must be less than 32767 */
70 #endif
71 #define MSGMAX	(MSGSSZ*MSGSEG)
72 #ifndef MSGMNB
73 #define MSGMNB	2048		/* max # of bytes in a queue */
74 #endif
75 #ifndef MSGMNI
76 #define MSGMNI	40
77 #endif
78 #ifndef MSGTQL
79 #define MSGTQL	40
80 #endif
81 
82 /*
83  * Based on the configuration parameters described in an SVR2 (yes, two)
84  * config(1m) man page.
85  *
86  * Each message is broken up and stored in segments that are msgssz bytes
87  * long.  For efficiency reasons, this should be a power of two.  Also,
88  * it doesn't make sense if it is less than 8 or greater than about 256.
89  * Consequently, msginit in kern/sysv_msg.c checks that msgssz is a power of
90  * two between 8 and 1024 inclusive (and panic's if it isn't).
91  */
92 struct msginfo msginfo = {
93                 MSGMAX,         /* max chars in a message */
94                 MSGMNI,         /* # of message queue identifiers */
95                 MSGMNB,         /* max chars in a queue */
96                 MSGTQL,         /* max messages in system */
97                 MSGSSZ,         /* size of a message segment */
98                 		/* (must be small power of 2 greater than 4) */
99                 MSGSEG          /* number of message segments */
100 };
101 
102 /*
103  * macros to convert between msqid_ds's and msqid's.
104  * (specific to this implementation)
105  */
106 #define MSQID(ix,ds)	((ix) & 0xffff | (((ds).msg_perm.seq << 16) & 0xffff0000))
107 #define MSQID_IX(id)	((id) & 0xffff)
108 #define MSQID_SEQ(id)	(((id) >> 16) & 0xffff)
109 
110 /*
111  * The rest of this file is specific to this particular implementation.
112  */
113 
114 struct msgmap {
115 	short	next;		/* next segment in buffer */
116     				/* -1 -> available */
117     				/* 0..(MSGSEG-1) -> index of next segment */
118 };
119 
120 #define MSG_LOCKED	01000	/* Is this msqid_ds locked? */
121 
122 static int nfree_msgmaps;	/* # of free map entries */
123 static short free_msgmaps;	/* head of linked list of free map entries */
124 static struct msg *free_msghdrs;/* list of free msg headers */
125 static char *msgpool;		/* MSGMAX byte long msg buffer pool */
126 static struct msgmap *msgmaps;	/* MSGSEG msgmap structures */
127 static struct msg *msghdrs;	/* MSGTQL msg headers */
128 static struct msqid_ds *msqids;	/* MSGMNI msqid_ds struct's */
129 
130 static void
131 msginit()
132 {
133 	register int i;
134 
135 	TUNABLE_INT_FETCH("kern.ipc.msgseg", &msginfo.msgseg);
136 	TUNABLE_INT_FETCH("kern.ipc.msgssz", &msginfo.msgssz);
137 	msginfo.msgmax = msginfo.msgseg * msginfo.msgssz;
138 	TUNABLE_INT_FETCH("kern.ipc.msgmni", &msginfo.msgmni);
139 
140 	msgpool = malloc(msginfo.msgmax, M_MSG, M_WAITOK);
141 	if (msgpool == NULL)
142 		panic("msgpool is NULL");
143 	msgmaps = malloc(sizeof(struct msgmap) * msginfo.msgseg, M_MSG, M_WAITOK);
144 	if (msgmaps == NULL)
145 		panic("msgmaps is NULL");
146 	msghdrs = malloc(sizeof(struct msg) * msginfo.msgtql, M_MSG, M_WAITOK);
147 	if (msghdrs == NULL)
148 		panic("msghdrs is NULL");
149 	msqids = malloc(sizeof(struct msqid_ds) * msginfo.msgmni, M_MSG, M_WAITOK);
150 	if (msqids == NULL)
151 		panic("msqids is NULL");
152 
153 	/*
154 	 * msginfo.msgssz should be a power of two for efficiency reasons.
155 	 * It is also pretty silly if msginfo.msgssz is less than 8
156 	 * or greater than about 256 so ...
157 	 */
158 
159 	i = 8;
160 	while (i < 1024 && i != msginfo.msgssz)
161 		i <<= 1;
162     	if (i != msginfo.msgssz) {
163 		printf("msginfo.msgssz=%d (0x%x)\n", msginfo.msgssz,
164 		    msginfo.msgssz);
165 		panic("msginfo.msgssz not a small power of 2");
166 	}
167 
168 	if (msginfo.msgseg > 32767) {
169 		printf("msginfo.msgseg=%d\n", msginfo.msgseg);
170 		panic("msginfo.msgseg > 32767");
171 	}
172 
173 	if (msgmaps == NULL)
174 		panic("msgmaps is NULL");
175 
176 	for (i = 0; i < msginfo.msgseg; i++) {
177 		if (i > 0)
178 			msgmaps[i-1].next = i;
179 		msgmaps[i].next = -1;	/* implies entry is available */
180 	}
181 	free_msgmaps = 0;
182 	nfree_msgmaps = msginfo.msgseg;
183 
184 	if (msghdrs == NULL)
185 		panic("msghdrs is NULL");
186 
187 	for (i = 0; i < msginfo.msgtql; i++) {
188 		msghdrs[i].msg_type = 0;
189 		if (i > 0)
190 			msghdrs[i-1].msg_next = &msghdrs[i];
191 		msghdrs[i].msg_next = NULL;
192     	}
193 	free_msghdrs = &msghdrs[0];
194 
195 	if (msqids == NULL)
196 		panic("msqids is NULL");
197 
198 	for (i = 0; i < msginfo.msgmni; i++) {
199 		msqids[i].msg_qbytes = 0;	/* implies entry is available */
200 		msqids[i].msg_perm.seq = 0;	/* reset to a known value */
201 		msqids[i].msg_perm.mode = 0;
202 	}
203 }
204 
205 static int
206 msgunload()
207 {
208 	struct msqid_ds *msqptr;
209 	int msqid;
210 
211 	for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
212 		/*
213 		 * Look for an unallocated and unlocked msqid_ds.
214 		 * msqid_ds's can be locked by msgsnd or msgrcv while
215 		 * they are copying the message in/out.  We can't
216 		 * re-use the entry until they release it.
217 		 */
218 		msqptr = &msqids[msqid];
219 		if (msqptr->msg_qbytes != 0 ||
220 		    (msqptr->msg_perm.mode & MSG_LOCKED) != 0)
221 			break;
222 	}
223 	if (msqid != msginfo.msgmni)
224 		return (EBUSY);
225 
226 	free(msgpool, M_MSG);
227 	free(msgmaps, M_MSG);
228 	free(msghdrs, M_MSG);
229 	free(msqids, M_MSG);
230 	return (0);
231 }
232 
233 
234 static int
235 sysvmsg_modload(struct module *module, int cmd, void *arg)
236 {
237 	int error = 0;
238 
239 	switch (cmd) {
240 	case MOD_LOAD:
241 		msginit();
242 		break;
243 	case MOD_UNLOAD:
244 		error = msgunload();
245 		break;
246 	case MOD_SHUTDOWN:
247 		break;
248 	default:
249 		error = EINVAL;
250 		break;
251 	}
252 	return (error);
253 }
254 
255 static moduledata_t sysvmsg_mod = {
256 	"sysvmsg",
257 	&sysvmsg_modload,
258 	NULL
259 };
260 
261 SYSCALL_MODULE_HELPER(msgsys, 6);
262 SYSCALL_MODULE_HELPER(msgctl, 3);
263 SYSCALL_MODULE_HELPER(msgget, 2);
264 SYSCALL_MODULE_HELPER(msgsnd, 4);
265 SYSCALL_MODULE_HELPER(msgrcv, 5);
266 
267 DECLARE_MODULE(sysvmsg, sysvmsg_mod,
268 	SI_SUB_SYSV_MSG, SI_ORDER_FIRST);
269 MODULE_VERSION(sysvmsg, 1);
270 
271 /*
272  * Entry point for all MSG calls
273  *
274  * MPSAFE
275  */
276 int
277 msgsys(td, uap)
278 	struct thread *td;
279 	/* XXX actually varargs. */
280 	struct msgsys_args /* {
281 		u_int	which;
282 		int	a2;
283 		int	a3;
284 		int	a4;
285 		int	a5;
286 		int	a6;
287 	} */ *uap;
288 {
289 	int error;
290 
291 	mtx_lock(&Giant);
292 	if (!jail_sysvipc_allowed && jailed(td->td_proc->p_ucred)) {
293 		error = ENOSYS;
294 		goto done2;
295 	}
296 	if (uap->which >= sizeof(msgcalls)/sizeof(msgcalls[0])) {
297 		error = EINVAL;
298 		goto done2;
299 	}
300 	error = (*msgcalls[uap->which])(td, &uap->a2);
301 done2:
302 	mtx_unlock(&Giant);
303 	return (error);
304 }
305 
306 static void
307 msg_freehdr(msghdr)
308 	struct msg *msghdr;
309 {
310 	while (msghdr->msg_ts > 0) {
311 		short next;
312 		if (msghdr->msg_spot < 0 || msghdr->msg_spot >= msginfo.msgseg)
313 			panic("msghdr->msg_spot out of range");
314 		next = msgmaps[msghdr->msg_spot].next;
315 		msgmaps[msghdr->msg_spot].next = free_msgmaps;
316 		free_msgmaps = msghdr->msg_spot;
317 		nfree_msgmaps++;
318 		msghdr->msg_spot = next;
319 		if (msghdr->msg_ts >= msginfo.msgssz)
320 			msghdr->msg_ts -= msginfo.msgssz;
321 		else
322 			msghdr->msg_ts = 0;
323 	}
324 	if (msghdr->msg_spot != -1)
325 		panic("msghdr->msg_spot != -1");
326 	msghdr->msg_next = free_msghdrs;
327 	free_msghdrs = msghdr;
328 }
329 
330 #ifndef _SYS_SYSPROTO_H_
331 struct msgctl_args {
332 	int	msqid;
333 	int	cmd;
334 	struct	msqid_ds *buf;
335 };
336 #endif
337 
338 /*
339  * MPSAFE
340  */
341 int
342 msgctl(td, uap)
343 	struct thread *td;
344 	register struct msgctl_args *uap;
345 {
346 	int msqid = uap->msqid;
347 	int cmd = uap->cmd;
348 	struct msqid_ds *user_msqptr = uap->buf;
349 	int rval, error;
350 	struct msqid_ds msqbuf;
351 	register struct msqid_ds *msqptr;
352 
353 #ifdef MSG_DEBUG_OK
354 	printf("call to msgctl(%d, %d, 0x%x)\n", msqid, cmd, user_msqptr);
355 #endif
356 	mtx_lock(&Giant);
357 	if (!jail_sysvipc_allowed && jailed(td->td_proc->p_ucred)) {
358 		error = ENOSYS;
359 		goto done2;
360 	}
361 
362 	msqid = IPCID_TO_IX(msqid);
363 
364 	if (msqid < 0 || msqid >= msginfo.msgmni) {
365 #ifdef MSG_DEBUG_OK
366 		printf("msqid (%d) out of range (0<=msqid<%d)\n", msqid,
367 		    msginfo.msgmni);
368 #endif
369 		error = EINVAL;
370 		goto done2;
371 	}
372 
373 	msqptr = &msqids[msqid];
374 
375 	if (msqptr->msg_qbytes == 0) {
376 #ifdef MSG_DEBUG_OK
377 		printf("no such msqid\n");
378 #endif
379 		error = EINVAL;
380 		goto done2;
381 	}
382 	if (msqptr->msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
383 #ifdef MSG_DEBUG_OK
384 		printf("wrong sequence number\n");
385 #endif
386 		error = EINVAL;
387 		goto done2;
388 	}
389 
390 	error = 0;
391 	rval = 0;
392 
393 	switch (cmd) {
394 
395 	case IPC_RMID:
396 	{
397 		struct msg *msghdr;
398 		if ((error = ipcperm(td, &msqptr->msg_perm, IPC_M)))
399 			goto done2;
400 		/* Free the message headers */
401 		msghdr = msqptr->msg_first;
402 		while (msghdr != NULL) {
403 			struct msg *msghdr_tmp;
404 
405 			/* Free the segments of each message */
406 			msqptr->msg_cbytes -= msghdr->msg_ts;
407 			msqptr->msg_qnum--;
408 			msghdr_tmp = msghdr;
409 			msghdr = msghdr->msg_next;
410 			msg_freehdr(msghdr_tmp);
411 		}
412 
413 		if (msqptr->msg_cbytes != 0)
414 			panic("msg_cbytes is screwed up");
415 		if (msqptr->msg_qnum != 0)
416 			panic("msg_qnum is screwed up");
417 
418 		msqptr->msg_qbytes = 0;	/* Mark it as free */
419 
420 		wakeup((caddr_t)msqptr);
421 	}
422 
423 		break;
424 
425 	case IPC_SET:
426 		if ((error = ipcperm(td, &msqptr->msg_perm, IPC_M)))
427 			goto done2;
428 		if ((error = copyin(user_msqptr, &msqbuf, sizeof(msqbuf))) != 0)
429 			goto done2;
430 		if (msqbuf.msg_qbytes > msqptr->msg_qbytes) {
431 			error = suser_td(td);
432 			if (error)
433 				goto done2;
434 		}
435 		if (msqbuf.msg_qbytes > msginfo.msgmnb) {
436 #ifdef MSG_DEBUG_OK
437 			printf("can't increase msg_qbytes beyond %d (truncating)\n",
438 			    msginfo.msgmnb);
439 #endif
440 			msqbuf.msg_qbytes = msginfo.msgmnb;	/* silently restrict qbytes to system limit */
441 		}
442 		if (msqbuf.msg_qbytes == 0) {
443 #ifdef MSG_DEBUG_OK
444 			printf("can't reduce msg_qbytes to 0\n");
445 #endif
446 			error = EINVAL;		/* non-standard errno! */
447 			goto done2;
448 		}
449 		msqptr->msg_perm.uid = msqbuf.msg_perm.uid;	/* change the owner */
450 		msqptr->msg_perm.gid = msqbuf.msg_perm.gid;	/* change the owner */
451 		msqptr->msg_perm.mode = (msqptr->msg_perm.mode & ~0777) |
452 		    (msqbuf.msg_perm.mode & 0777);
453 		msqptr->msg_qbytes = msqbuf.msg_qbytes;
454 		msqptr->msg_ctime = time_second;
455 		break;
456 
457 	case IPC_STAT:
458 		if ((error = ipcperm(td, &msqptr->msg_perm, IPC_R))) {
459 #ifdef MSG_DEBUG_OK
460 			printf("requester doesn't have read access\n");
461 #endif
462 			goto done2;
463 		}
464 		error = copyout((caddr_t)msqptr, user_msqptr,
465 		    sizeof(struct msqid_ds));
466 		break;
467 
468 	default:
469 #ifdef MSG_DEBUG_OK
470 		printf("invalid command %d\n", cmd);
471 #endif
472 		error = EINVAL;
473 		goto done2;
474 	}
475 
476 	if (error == 0)
477 		td->td_retval[0] = rval;
478 done2:
479 	mtx_unlock(&Giant);
480 	return(error);
481 }
482 
483 #ifndef _SYS_SYSPROTO_H_
484 struct msgget_args {
485 	key_t	key;
486 	int	msgflg;
487 };
488 #endif
489 
490 /*
491  * MPSAFE
492  */
493 int
494 msgget(td, uap)
495 	struct thread *td;
496 	register struct msgget_args *uap;
497 {
498 	int msqid, error = 0;
499 	int key = uap->key;
500 	int msgflg = uap->msgflg;
501 	struct ucred *cred = td->td_proc->p_ucred;
502 	register struct msqid_ds *msqptr = NULL;
503 
504 #ifdef MSG_DEBUG_OK
505 	printf("msgget(0x%x, 0%o)\n", key, msgflg);
506 #endif
507 
508 	mtx_lock(&Giant);
509 	if (!jail_sysvipc_allowed && jailed(td->td_proc->p_ucred)) {
510 		error = ENOSYS;
511 		goto done2;
512 	}
513 
514 	if (key != IPC_PRIVATE) {
515 		for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
516 			msqptr = &msqids[msqid];
517 			if (msqptr->msg_qbytes != 0 &&
518 			    msqptr->msg_perm.key == key)
519 				break;
520 		}
521 		if (msqid < msginfo.msgmni) {
522 #ifdef MSG_DEBUG_OK
523 			printf("found public key\n");
524 #endif
525 			if ((msgflg & IPC_CREAT) && (msgflg & IPC_EXCL)) {
526 #ifdef MSG_DEBUG_OK
527 				printf("not exclusive\n");
528 #endif
529 				error = EEXIST;
530 				goto done2;
531 			}
532 			if ((error = ipcperm(td, &msqptr->msg_perm, msgflg & 0700 ))) {
533 #ifdef MSG_DEBUG_OK
534 				printf("requester doesn't have 0%o access\n",
535 				    msgflg & 0700);
536 #endif
537 				goto done2;
538 			}
539 			goto found;
540 		}
541 	}
542 
543 #ifdef MSG_DEBUG_OK
544 	printf("need to allocate the msqid_ds\n");
545 #endif
546 	if (key == IPC_PRIVATE || (msgflg & IPC_CREAT)) {
547 		for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
548 			/*
549 			 * Look for an unallocated and unlocked msqid_ds.
550 			 * msqid_ds's can be locked by msgsnd or msgrcv while
551 			 * they are copying the message in/out.  We can't
552 			 * re-use the entry until they release it.
553 			 */
554 			msqptr = &msqids[msqid];
555 			if (msqptr->msg_qbytes == 0 &&
556 			    (msqptr->msg_perm.mode & MSG_LOCKED) == 0)
557 				break;
558 		}
559 		if (msqid == msginfo.msgmni) {
560 #ifdef MSG_DEBUG_OK
561 			printf("no more msqid_ds's available\n");
562 #endif
563 			error = ENOSPC;
564 			goto done2;
565 		}
566 #ifdef MSG_DEBUG_OK
567 		printf("msqid %d is available\n", msqid);
568 #endif
569 		msqptr->msg_perm.key = key;
570 		msqptr->msg_perm.cuid = cred->cr_uid;
571 		msqptr->msg_perm.uid = cred->cr_uid;
572 		msqptr->msg_perm.cgid = cred->cr_gid;
573 		msqptr->msg_perm.gid = cred->cr_gid;
574 		msqptr->msg_perm.mode = (msgflg & 0777);
575 		/* Make sure that the returned msqid is unique */
576 		msqptr->msg_perm.seq++;
577 		msqptr->msg_first = NULL;
578 		msqptr->msg_last = NULL;
579 		msqptr->msg_cbytes = 0;
580 		msqptr->msg_qnum = 0;
581 		msqptr->msg_qbytes = msginfo.msgmnb;
582 		msqptr->msg_lspid = 0;
583 		msqptr->msg_lrpid = 0;
584 		msqptr->msg_stime = 0;
585 		msqptr->msg_rtime = 0;
586 		msqptr->msg_ctime = time_second;
587 	} else {
588 #ifdef MSG_DEBUG_OK
589 		printf("didn't find it and wasn't asked to create it\n");
590 #endif
591 		error = ENOENT;
592 		goto done2;
593 	}
594 
595 found:
596 	/* Construct the unique msqid */
597 	td->td_retval[0] = IXSEQ_TO_IPCID(msqid, msqptr->msg_perm);
598 done2:
599 	mtx_unlock(&Giant);
600 	return (error);
601 }
602 
603 #ifndef _SYS_SYSPROTO_H_
604 struct msgsnd_args {
605 	int	msqid;
606 	void	*msgp;
607 	size_t	msgsz;
608 	int	msgflg;
609 };
610 #endif
611 
612 /*
613  * MPSAFE
614  */
615 int
616 msgsnd(td, uap)
617 	struct thread *td;
618 	register struct msgsnd_args *uap;
619 {
620 	int msqid = uap->msqid;
621 	void *user_msgp = uap->msgp;
622 	size_t msgsz = uap->msgsz;
623 	int msgflg = uap->msgflg;
624 	int segs_needed, error = 0;
625 	register struct msqid_ds *msqptr;
626 	register struct msg *msghdr;
627 	short next;
628 
629 #ifdef MSG_DEBUG_OK
630 	printf("call to msgsnd(%d, 0x%x, %d, %d)\n", msqid, user_msgp, msgsz,
631 	    msgflg);
632 #endif
633 	mtx_lock(&Giant);
634 	if (!jail_sysvipc_allowed && jailed(td->td_proc->p_ucred)) {
635 		error = ENOSYS;
636 		goto done2;
637 	}
638 
639 	msqid = IPCID_TO_IX(msqid);
640 
641 	if (msqid < 0 || msqid >= msginfo.msgmni) {
642 #ifdef MSG_DEBUG_OK
643 		printf("msqid (%d) out of range (0<=msqid<%d)\n", msqid,
644 		    msginfo.msgmni);
645 #endif
646 		error = EINVAL;
647 		goto done2;
648 	}
649 
650 	msqptr = &msqids[msqid];
651 	if (msqptr->msg_qbytes == 0) {
652 #ifdef MSG_DEBUG_OK
653 		printf("no such message queue id\n");
654 #endif
655 		error = EINVAL;
656 		goto done2;
657 	}
658 	if (msqptr->msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
659 #ifdef MSG_DEBUG_OK
660 		printf("wrong sequence number\n");
661 #endif
662 		error = EINVAL;
663 		goto done2;
664 	}
665 
666 	if ((error = ipcperm(td, &msqptr->msg_perm, IPC_W))) {
667 #ifdef MSG_DEBUG_OK
668 		printf("requester doesn't have write access\n");
669 #endif
670 		goto done2;
671 	}
672 
673 	segs_needed = (msgsz + msginfo.msgssz - 1) / msginfo.msgssz;
674 #ifdef MSG_DEBUG_OK
675 	printf("msgsz=%d, msgssz=%d, segs_needed=%d\n", msgsz, msginfo.msgssz,
676 	    segs_needed);
677 #endif
678 	for (;;) {
679 		int need_more_resources = 0;
680 
681 		/*
682 		 * check msgsz
683 		 * (inside this loop in case msg_qbytes changes while we sleep)
684 		 */
685 
686 		if (msgsz > msqptr->msg_qbytes) {
687 #ifdef MSG_DEBUG_OK
688 			printf("msgsz > msqptr->msg_qbytes\n");
689 #endif
690 			error = EINVAL;
691 			goto done2;
692 		}
693 
694 		if (msqptr->msg_perm.mode & MSG_LOCKED) {
695 #ifdef MSG_DEBUG_OK
696 			printf("msqid is locked\n");
697 #endif
698 			need_more_resources = 1;
699 		}
700 		if (msgsz + msqptr->msg_cbytes > msqptr->msg_qbytes) {
701 #ifdef MSG_DEBUG_OK
702 			printf("msgsz + msg_cbytes > msg_qbytes\n");
703 #endif
704 			need_more_resources = 1;
705 		}
706 		if (segs_needed > nfree_msgmaps) {
707 #ifdef MSG_DEBUG_OK
708 			printf("segs_needed > nfree_msgmaps\n");
709 #endif
710 			need_more_resources = 1;
711 		}
712 		if (free_msghdrs == NULL) {
713 #ifdef MSG_DEBUG_OK
714 			printf("no more msghdrs\n");
715 #endif
716 			need_more_resources = 1;
717 		}
718 
719 		if (need_more_resources) {
720 			int we_own_it;
721 
722 			if ((msgflg & IPC_NOWAIT) != 0) {
723 #ifdef MSG_DEBUG_OK
724 				printf("need more resources but caller doesn't want to wait\n");
725 #endif
726 				error = EAGAIN;
727 				goto done2;
728 			}
729 
730 			if ((msqptr->msg_perm.mode & MSG_LOCKED) != 0) {
731 #ifdef MSG_DEBUG_OK
732 				printf("we don't own the msqid_ds\n");
733 #endif
734 				we_own_it = 0;
735 			} else {
736 				/* Force later arrivals to wait for our
737 				   request */
738 #ifdef MSG_DEBUG_OK
739 				printf("we own the msqid_ds\n");
740 #endif
741 				msqptr->msg_perm.mode |= MSG_LOCKED;
742 				we_own_it = 1;
743 			}
744 #ifdef MSG_DEBUG_OK
745 			printf("goodnight\n");
746 #endif
747 			error = tsleep((caddr_t)msqptr, (PZERO - 4) | PCATCH,
748 			    "msgwait", 0);
749 #ifdef MSG_DEBUG_OK
750 			printf("good morning, error=%d\n", error);
751 #endif
752 			if (we_own_it)
753 				msqptr->msg_perm.mode &= ~MSG_LOCKED;
754 			if (error != 0) {
755 #ifdef MSG_DEBUG_OK
756 				printf("msgsnd:  interrupted system call\n");
757 #endif
758 				error = EINTR;
759 				goto done2;
760 			}
761 
762 			/*
763 			 * Make sure that the msq queue still exists
764 			 */
765 
766 			if (msqptr->msg_qbytes == 0) {
767 #ifdef MSG_DEBUG_OK
768 				printf("msqid deleted\n");
769 #endif
770 				error = EIDRM;
771 				goto done2;
772 			}
773 
774 		} else {
775 #ifdef MSG_DEBUG_OK
776 			printf("got all the resources that we need\n");
777 #endif
778 			break;
779 		}
780 	}
781 
782 	/*
783 	 * We have the resources that we need.
784 	 * Make sure!
785 	 */
786 
787 	if (msqptr->msg_perm.mode & MSG_LOCKED)
788 		panic("msg_perm.mode & MSG_LOCKED");
789 	if (segs_needed > nfree_msgmaps)
790 		panic("segs_needed > nfree_msgmaps");
791 	if (msgsz + msqptr->msg_cbytes > msqptr->msg_qbytes)
792 		panic("msgsz + msg_cbytes > msg_qbytes");
793 	if (free_msghdrs == NULL)
794 		panic("no more msghdrs");
795 
796 	/*
797 	 * Re-lock the msqid_ds in case we page-fault when copying in the
798 	 * message
799 	 */
800 
801 	if ((msqptr->msg_perm.mode & MSG_LOCKED) != 0)
802 		panic("msqid_ds is already locked");
803 	msqptr->msg_perm.mode |= MSG_LOCKED;
804 
805 	/*
806 	 * Allocate a message header
807 	 */
808 
809 	msghdr = free_msghdrs;
810 	free_msghdrs = msghdr->msg_next;
811 	msghdr->msg_spot = -1;
812 	msghdr->msg_ts = msgsz;
813 
814 	/*
815 	 * Allocate space for the message
816 	 */
817 
818 	while (segs_needed > 0) {
819 		if (nfree_msgmaps <= 0)
820 			panic("not enough msgmaps");
821 		if (free_msgmaps == -1)
822 			panic("nil free_msgmaps");
823 		next = free_msgmaps;
824 		if (next <= -1)
825 			panic("next too low #1");
826 		if (next >= msginfo.msgseg)
827 			panic("next out of range #1");
828 #ifdef MSG_DEBUG_OK
829 		printf("allocating segment %d to message\n", next);
830 #endif
831 		free_msgmaps = msgmaps[next].next;
832 		nfree_msgmaps--;
833 		msgmaps[next].next = msghdr->msg_spot;
834 		msghdr->msg_spot = next;
835 		segs_needed--;
836 	}
837 
838 	/*
839 	 * Copy in the message type
840 	 */
841 
842 	if ((error = copyin(user_msgp, &msghdr->msg_type,
843 	    sizeof(msghdr->msg_type))) != 0) {
844 #ifdef MSG_DEBUG_OK
845 		printf("error %d copying the message type\n", error);
846 #endif
847 		msg_freehdr(msghdr);
848 		msqptr->msg_perm.mode &= ~MSG_LOCKED;
849 		wakeup((caddr_t)msqptr);
850 		goto done2;
851 	}
852 	user_msgp = (char *)user_msgp + sizeof(msghdr->msg_type);
853 
854 	/*
855 	 * Validate the message type
856 	 */
857 
858 	if (msghdr->msg_type < 1) {
859 		msg_freehdr(msghdr);
860 		msqptr->msg_perm.mode &= ~MSG_LOCKED;
861 		wakeup((caddr_t)msqptr);
862 #ifdef MSG_DEBUG_OK
863 		printf("mtype (%d) < 1\n", msghdr->msg_type);
864 #endif
865 		error = EINVAL;
866 		goto done2;
867 	}
868 
869 	/*
870 	 * Copy in the message body
871 	 */
872 
873 	next = msghdr->msg_spot;
874 	while (msgsz > 0) {
875 		size_t tlen;
876 		if (msgsz > msginfo.msgssz)
877 			tlen = msginfo.msgssz;
878 		else
879 			tlen = msgsz;
880 		if (next <= -1)
881 			panic("next too low #2");
882 		if (next >= msginfo.msgseg)
883 			panic("next out of range #2");
884 		if ((error = copyin(user_msgp, &msgpool[next * msginfo.msgssz],
885 		    tlen)) != 0) {
886 #ifdef MSG_DEBUG_OK
887 			printf("error %d copying in message segment\n", error);
888 #endif
889 			msg_freehdr(msghdr);
890 			msqptr->msg_perm.mode &= ~MSG_LOCKED;
891 			wakeup((caddr_t)msqptr);
892 			goto done2;
893 		}
894 		msgsz -= tlen;
895 		user_msgp = (char *)user_msgp + tlen;
896 		next = msgmaps[next].next;
897 	}
898 	if (next != -1)
899 		panic("didn't use all the msg segments");
900 
901 	/*
902 	 * We've got the message.  Unlock the msqid_ds.
903 	 */
904 
905 	msqptr->msg_perm.mode &= ~MSG_LOCKED;
906 
907 	/*
908 	 * Make sure that the msqid_ds is still allocated.
909 	 */
910 
911 	if (msqptr->msg_qbytes == 0) {
912 		msg_freehdr(msghdr);
913 		wakeup((caddr_t)msqptr);
914 		error = EIDRM;
915 		goto done2;
916 	}
917 
918 	/*
919 	 * Put the message into the queue
920 	 */
921 
922 	if (msqptr->msg_first == NULL) {
923 		msqptr->msg_first = msghdr;
924 		msqptr->msg_last = msghdr;
925 	} else {
926 		msqptr->msg_last->msg_next = msghdr;
927 		msqptr->msg_last = msghdr;
928 	}
929 	msqptr->msg_last->msg_next = NULL;
930 
931 	msqptr->msg_cbytes += msghdr->msg_ts;
932 	msqptr->msg_qnum++;
933 	msqptr->msg_lspid = td->td_proc->p_pid;
934 	msqptr->msg_stime = time_second;
935 
936 	wakeup((caddr_t)msqptr);
937 	td->td_retval[0] = 0;
938 done2:
939 	mtx_unlock(&Giant);
940 	return (error);
941 }
942 
943 #ifndef _SYS_SYSPROTO_H_
944 struct msgrcv_args {
945 	int	msqid;
946 	void	*msgp;
947 	size_t	msgsz;
948 	long	msgtyp;
949 	int	msgflg;
950 };
951 #endif
952 
953 /*
954  * MPSAFE
955  */
956 int
957 msgrcv(td, uap)
958 	struct thread *td;
959 	register struct msgrcv_args *uap;
960 {
961 	int msqid = uap->msqid;
962 	void *user_msgp = uap->msgp;
963 	size_t msgsz = uap->msgsz;
964 	long msgtyp = uap->msgtyp;
965 	int msgflg = uap->msgflg;
966 	size_t len;
967 	register struct msqid_ds *msqptr;
968 	register struct msg *msghdr;
969 	int error = 0;
970 	short next;
971 
972 #ifdef MSG_DEBUG_OK
973 	printf("call to msgrcv(%d, 0x%x, %d, %ld, %d)\n", msqid, user_msgp,
974 	    msgsz, msgtyp, msgflg);
975 #endif
976 
977 	mtx_lock(&Giant);
978 	if (!jail_sysvipc_allowed && jailed(td->td_proc->p_ucred)) {
979 		error = ENOSYS;
980 		goto done2;
981 	}
982 
983 	msqid = IPCID_TO_IX(msqid);
984 
985 	if (msqid < 0 || msqid >= msginfo.msgmni) {
986 #ifdef MSG_DEBUG_OK
987 		printf("msqid (%d) out of range (0<=msqid<%d)\n", msqid,
988 		    msginfo.msgmni);
989 #endif
990 		error = EINVAL;
991 		goto done2;
992 	}
993 
994 	msqptr = &msqids[msqid];
995 	if (msqptr->msg_qbytes == 0) {
996 #ifdef MSG_DEBUG_OK
997 		printf("no such message queue id\n");
998 #endif
999 		error = EINVAL;
1000 		goto done2;
1001 	}
1002 	if (msqptr->msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
1003 #ifdef MSG_DEBUG_OK
1004 		printf("wrong sequence number\n");
1005 #endif
1006 		error = EINVAL;
1007 		goto done2;
1008 	}
1009 
1010 	if ((error = ipcperm(td, &msqptr->msg_perm, IPC_R))) {
1011 #ifdef MSG_DEBUG_OK
1012 		printf("requester doesn't have read access\n");
1013 #endif
1014 		goto done2;
1015 	}
1016 
1017 	msghdr = NULL;
1018 	while (msghdr == NULL) {
1019 		if (msgtyp == 0) {
1020 			msghdr = msqptr->msg_first;
1021 			if (msghdr != NULL) {
1022 				if (msgsz < msghdr->msg_ts &&
1023 				    (msgflg & MSG_NOERROR) == 0) {
1024 #ifdef MSG_DEBUG_OK
1025 					printf("first message on the queue is too big (want %d, got %d)\n",
1026 					    msgsz, msghdr->msg_ts);
1027 #endif
1028 					error = E2BIG;
1029 					goto done2;
1030 				}
1031 				if (msqptr->msg_first == msqptr->msg_last) {
1032 					msqptr->msg_first = NULL;
1033 					msqptr->msg_last = NULL;
1034 				} else {
1035 					msqptr->msg_first = msghdr->msg_next;
1036 					if (msqptr->msg_first == NULL)
1037 						panic("msg_first/last screwed up #1");
1038 				}
1039 			}
1040 		} else {
1041 			struct msg *previous;
1042 			struct msg **prev;
1043 
1044 			previous = NULL;
1045 			prev = &(msqptr->msg_first);
1046 			while ((msghdr = *prev) != NULL) {
1047 				/*
1048 				 * Is this message's type an exact match or is
1049 				 * this message's type less than or equal to
1050 				 * the absolute value of a negative msgtyp?
1051 				 * Note that the second half of this test can
1052 				 * NEVER be true if msgtyp is positive since
1053 				 * msg_type is always positive!
1054 				 */
1055 
1056 				if (msgtyp == msghdr->msg_type ||
1057 				    msghdr->msg_type <= -msgtyp) {
1058 #ifdef MSG_DEBUG_OK
1059 					printf("found message type %d, requested %d\n",
1060 					    msghdr->msg_type, msgtyp);
1061 #endif
1062 					if (msgsz < msghdr->msg_ts &&
1063 					    (msgflg & MSG_NOERROR) == 0) {
1064 #ifdef MSG_DEBUG_OK
1065 						printf("requested message on the queue is too big (want %d, got %d)\n",
1066 						    msgsz, msghdr->msg_ts);
1067 #endif
1068 						error = E2BIG;
1069 						goto done2;
1070 					}
1071 					*prev = msghdr->msg_next;
1072 					if (msghdr == msqptr->msg_last) {
1073 						if (previous == NULL) {
1074 							if (prev !=
1075 							    &msqptr->msg_first)
1076 								panic("msg_first/last screwed up #2");
1077 							msqptr->msg_first =
1078 							    NULL;
1079 							msqptr->msg_last =
1080 							    NULL;
1081 						} else {
1082 							if (prev ==
1083 							    &msqptr->msg_first)
1084 								panic("msg_first/last screwed up #3");
1085 							msqptr->msg_last =
1086 							    previous;
1087 						}
1088 					}
1089 					break;
1090 				}
1091 				previous = msghdr;
1092 				prev = &(msghdr->msg_next);
1093 			}
1094 		}
1095 
1096 		/*
1097 		 * We've either extracted the msghdr for the appropriate
1098 		 * message or there isn't one.
1099 		 * If there is one then bail out of this loop.
1100 		 */
1101 
1102 		if (msghdr != NULL)
1103 			break;
1104 
1105 		/*
1106 		 * Hmph!  No message found.  Does the user want to wait?
1107 		 */
1108 
1109 		if ((msgflg & IPC_NOWAIT) != 0) {
1110 #ifdef MSG_DEBUG_OK
1111 			printf("no appropriate message found (msgtyp=%d)\n",
1112 			    msgtyp);
1113 #endif
1114 			/* The SVID says to return ENOMSG. */
1115 			error = ENOMSG;
1116 			goto done2;
1117 		}
1118 
1119 		/*
1120 		 * Wait for something to happen
1121 		 */
1122 
1123 #ifdef MSG_DEBUG_OK
1124 		printf("msgrcv:  goodnight\n");
1125 #endif
1126 		error = tsleep((caddr_t)msqptr, (PZERO - 4) | PCATCH, "msgwait",
1127 		    0);
1128 #ifdef MSG_DEBUG_OK
1129 		printf("msgrcv:  good morning (error=%d)\n", error);
1130 #endif
1131 
1132 		if (error != 0) {
1133 #ifdef MSG_DEBUG_OK
1134 			printf("msgsnd:  interrupted system call\n");
1135 #endif
1136 			error = EINTR;
1137 			goto done2;
1138 		}
1139 
1140 		/*
1141 		 * Make sure that the msq queue still exists
1142 		 */
1143 
1144 		if (msqptr->msg_qbytes == 0 ||
1145 		    msqptr->msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
1146 #ifdef MSG_DEBUG_OK
1147 			printf("msqid deleted\n");
1148 #endif
1149 			error = EIDRM;
1150 			goto done2;
1151 		}
1152 	}
1153 
1154 	/*
1155 	 * Return the message to the user.
1156 	 *
1157 	 * First, do the bookkeeping (before we risk being interrupted).
1158 	 */
1159 
1160 	msqptr->msg_cbytes -= msghdr->msg_ts;
1161 	msqptr->msg_qnum--;
1162 	msqptr->msg_lrpid = td->td_proc->p_pid;
1163 	msqptr->msg_rtime = time_second;
1164 
1165 	/*
1166 	 * Make msgsz the actual amount that we'll be returning.
1167 	 * Note that this effectively truncates the message if it is too long
1168 	 * (since msgsz is never increased).
1169 	 */
1170 
1171 #ifdef MSG_DEBUG_OK
1172 	printf("found a message, msgsz=%d, msg_ts=%d\n", msgsz,
1173 	    msghdr->msg_ts);
1174 #endif
1175 	if (msgsz > msghdr->msg_ts)
1176 		msgsz = msghdr->msg_ts;
1177 
1178 	/*
1179 	 * Return the type to the user.
1180 	 */
1181 
1182 	error = copyout((caddr_t)&(msghdr->msg_type), user_msgp,
1183 	    sizeof(msghdr->msg_type));
1184 	if (error != 0) {
1185 #ifdef MSG_DEBUG_OK
1186 		printf("error (%d) copying out message type\n", error);
1187 #endif
1188 		msg_freehdr(msghdr);
1189 		wakeup((caddr_t)msqptr);
1190 		goto done2;
1191 	}
1192 	user_msgp = (char *)user_msgp + sizeof(msghdr->msg_type);
1193 
1194 	/*
1195 	 * Return the segments to the user
1196 	 */
1197 
1198 	next = msghdr->msg_spot;
1199 	for (len = 0; len < msgsz; len += msginfo.msgssz) {
1200 		size_t tlen;
1201 
1202 		if (msgsz - len > msginfo.msgssz)
1203 			tlen = msginfo.msgssz;
1204 		else
1205 			tlen = msgsz - len;
1206 		if (next <= -1)
1207 			panic("next too low #3");
1208 		if (next >= msginfo.msgseg)
1209 			panic("next out of range #3");
1210 		error = copyout((caddr_t)&msgpool[next * msginfo.msgssz],
1211 		    user_msgp, tlen);
1212 		if (error != 0) {
1213 #ifdef MSG_DEBUG_OK
1214 			printf("error (%d) copying out message segment\n",
1215 			    error);
1216 #endif
1217 			msg_freehdr(msghdr);
1218 			wakeup((caddr_t)msqptr);
1219 			goto done2;
1220 		}
1221 		user_msgp = (char *)user_msgp + tlen;
1222 		next = msgmaps[next].next;
1223 	}
1224 
1225 	/*
1226 	 * Done, return the actual number of bytes copied out.
1227 	 */
1228 
1229 	msg_freehdr(msghdr);
1230 	wakeup((caddr_t)msqptr);
1231 	td->td_retval[0] = msgsz;
1232 done2:
1233 	mtx_unlock(&Giant);
1234 	return (error);
1235 }
1236 
1237 static int
1238 sysctl_msqids(SYSCTL_HANDLER_ARGS)
1239 {
1240 
1241 	return (SYSCTL_OUT(req, msqids,
1242 	    sizeof(struct msqid_ds) * msginfo.msgmni));
1243 }
1244 
1245 SYSCTL_DECL(_kern_ipc);
1246 SYSCTL_INT(_kern_ipc, OID_AUTO, msgmax, CTLFLAG_RD, &msginfo.msgmax, 0, "");
1247 SYSCTL_INT(_kern_ipc, OID_AUTO, msgmni, CTLFLAG_RD, &msginfo.msgmni, 0, "");
1248 SYSCTL_INT(_kern_ipc, OID_AUTO, msgmnb, CTLFLAG_RD, &msginfo.msgmnb, 0, "");
1249 SYSCTL_INT(_kern_ipc, OID_AUTO, msgtql, CTLFLAG_RD, &msginfo.msgtql, 0, "");
1250 SYSCTL_INT(_kern_ipc, OID_AUTO, msgssz, CTLFLAG_RD, &msginfo.msgssz, 0, "");
1251 SYSCTL_INT(_kern_ipc, OID_AUTO, msgseg, CTLFLAG_RD, &msginfo.msgseg, 0, "")
1252 SYSCTL_PROC(_kern_ipc, OID_AUTO, msqids, CTLFLAG_RD,
1253     NULL, 0, sysctl_msqids, "", "Message queue IDs");
1254