xref: /titanic_51/usr/src/uts/common/os/msg.c (revision 98579b20de8e05c5117968705a18979f8b75b863)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
27 /*	  All Rights Reserved  	*/
28 
29 
30 #pragma ident	"%Z%%M%	%I%	%E% SMI"
31 
32 /*
33  * Inter-Process Communication Message Facility.
34  *
35  * See os/ipc.c for a description of common IPC functionality.
36  *
37  * Resource controls
38  * -----------------
39  *
40  * Control:      project.max-msg-ids (rc_project_msgmni)
41  * Description:  Maximum number of message queue ids allowed a project.
42  *
43  *   When msgget() is used to allocate a message queue, one id is
44  *   allocated.  If the id allocation doesn't succeed, msgget() fails
45  *   and errno is set to ENOSPC.  Upon successful msgctl(, IPC_RMID)
46  *   the id is deallocated.
47  *
48  * Control:      process.max-msg-qbytes (rc_process_msgmnb)
49  * Description:  Maximum number of bytes of messages on a message queue.
50  *
51  *   When msgget() successfully allocates a message queue, the minimum
52  *   enforced value of this limit is used to initialize msg_qbytes.
53  *
54  * Control:      process.max-msg-messages (rc_process_msgtql)
55  * Description:  Maximum number of messages on a message queue.
56  *
57  *   When msgget() successfully allocates a message queue, the minimum
58  *   enforced value of this limit is used to initialize a per-queue
59  *   limit on the number of messages.
60  */
61 
62 #include <sys/types.h>
63 #include <sys/t_lock.h>
64 #include <sys/param.h>
65 #include <sys/cred.h>
66 #include <sys/user.h>
67 #include <sys/proc.h>
68 #include <sys/time.h>
69 #include <sys/ipc.h>
70 #include <sys/ipc_impl.h>
71 #include <sys/msg.h>
72 #include <sys/msg_impl.h>
73 #include <sys/list.h>
74 #include <sys/systm.h>
75 #include <sys/sysmacros.h>
76 #include <sys/cpuvar.h>
77 #include <sys/kmem.h>
78 #include <sys/ddi.h>
79 #include <sys/errno.h>
80 #include <sys/cmn_err.h>
81 #include <sys/debug.h>
82 #include <sys/project.h>
83 #include <sys/modctl.h>
84 #include <sys/syscall.h>
85 #include <sys/policy.h>
86 #include <sys/zone.h>
87 
88 #include <c2/audit.h>
89 
90 /*
91  * The following tunables are obsolete.  Though for compatibility we
92  * still read and interpret msginfo_msgmnb, msginfo_msgmni, and
93  * msginfo_msgtql (see os/project.c and os/rctl_proc.c), the preferred
94  * mechanism for administrating the IPC Message facility is through the
95  * resource controls described at the top of this file.
96  */
97 size_t	msginfo_msgmax = 2048;	/* (obsolete) */
98 size_t	msginfo_msgmnb = 4096;	/* (obsolete) */
99 int	msginfo_msgmni = 50;	/* (obsolete) */
100 int	msginfo_msgtql = 40;	/* (obsolete) */
101 int	msginfo_msgssz = 8;	/* (obsolete) */
102 int	msginfo_msgmap = 0;	/* (obsolete) */
103 ushort_t msginfo_msgseg = 1024;	/* (obsolete) */
104 
105 extern rctl_hndl_t rc_project_msgmni;
106 extern rctl_hndl_t rc_process_msgmnb;
107 extern rctl_hndl_t rc_process_msgtql;
108 static ipc_service_t *msq_svc;
109 static zone_key_t msg_zone_key;
110 
111 static void msg_dtor(kipc_perm_t *);
112 static void msg_rmid(kipc_perm_t *);
113 static void msg_remove_zone(zoneid_t, void *);
114 
115 /*
116  * Module linkage information for the kernel.
117  */
118 static ssize_t msgsys(int opcode, uintptr_t a0, uintptr_t a1, uintptr_t a2,
119 	uintptr_t a4, uintptr_t a5);
120 
121 static struct sysent ipcmsg_sysent = {
122 	6,
123 #ifdef	_LP64
124 	SE_ARGC | SE_NOUNLOAD | SE_64RVAL,
125 #else
126 	SE_ARGC | SE_NOUNLOAD | SE_32RVAL1,
127 #endif
128 	(int (*)())msgsys
129 };
130 
131 #ifdef	_SYSCALL32_IMPL
132 static ssize32_t msgsys32(int opcode, uint32_t a0, uint32_t a1, uint32_t a2,
133 	uint32_t a4, uint32_t a5);
134 
135 static struct sysent ipcmsg_sysent32 = {
136 	6,
137 	SE_ARGC | SE_NOUNLOAD | SE_32RVAL1,
138 	(int (*)())msgsys32
139 };
140 #endif	/* _SYSCALL32_IMPL */
141 
142 static struct modlsys modlsys = {
143 	&mod_syscallops, "System V message facility", &ipcmsg_sysent
144 };
145 
146 #ifdef _SYSCALL32_IMPL
147 static struct modlsys modlsys32 = {
148 	&mod_syscallops32, "32-bit System V message facility", &ipcmsg_sysent32
149 };
150 #endif
151 
152 static struct modlinkage modlinkage = {
153 	MODREV_1,
154 	&modlsys,
155 #ifdef _SYSCALL32_IMPL
156 	&modlsys32,
157 #endif
158 	NULL
159 };
160 
161 
162 int
163 _init(void)
164 {
165 	int result;
166 
167 	msq_svc = ipcs_create("msqids", rc_project_msgmni, sizeof (kmsqid_t),
168 	    msg_dtor, msg_rmid, AT_IPC_MSG,
169 	    offsetof(kproject_data_t, kpd_msgmni));
170 	zone_key_create(&msg_zone_key, NULL, msg_remove_zone, NULL);
171 
172 	if ((result = mod_install(&modlinkage)) == 0)
173 		return (0);
174 
175 	(void) zone_key_delete(msg_zone_key);
176 	ipcs_destroy(msq_svc);
177 
178 	return (result);
179 }
180 
181 int
182 _fini(void)
183 {
184 	return (EBUSY);
185 }
186 
187 int
188 _info(struct modinfo *modinfop)
189 {
190 	return (mod_info(&modlinkage, modinfop));
191 }
192 
193 static void
194 msg_dtor(kipc_perm_t *perm)
195 {
196 	kmsqid_t *qp = (kmsqid_t *)perm;
197 	int		ii;
198 
199 	for (ii = 0; ii < MAX_QNUM_CV; ii++)
200 		ASSERT(qp->msg_rcv_cnt[ii] == 0);
201 	ASSERT(qp->msg_snd_cnt == 0);
202 	ASSERT(qp->msg_cbytes == 0);
203 	list_destroy(&qp->msg_list);
204 }
205 
206 
207 #define	msg_hold(mp)	(mp)->msg_copycnt++
208 
209 /*
210  * msg_rele - decrement the reference count on the message.  When count
211  * reaches zero, free message header and contents.
212  */
213 static void
214 msg_rele(struct msg *mp)
215 {
216 	ASSERT(mp->msg_copycnt > 0);
217 	if (mp->msg_copycnt-- == 1) {
218 		if (mp->msg_addr)
219 			kmem_free(mp->msg_addr, mp->msg_size);
220 		kmem_free(mp, sizeof (struct msg));
221 	}
222 }
223 
224 /*
225  * msgunlink - Unlink msg from queue, decrement byte count and wake up anyone
226  * waiting for free bytes on queue.
227  *
228  * Called with queue locked.
229  */
230 static void
231 msgunlink(kmsqid_t *qp, struct msg *mp)
232 {
233 	list_remove(&qp->msg_list, mp);
234 	qp->msg_qnum--;
235 	qp->msg_cbytes -= mp->msg_size;
236 	msg_rele(mp);
237 
238 	/* Wake up waiting writers */
239 	if (qp->msg_snd_cnt)
240 		cv_broadcast(&qp->msg_snd_cv);
241 }
242 
243 static void
244 msg_rmid(kipc_perm_t *perm)
245 {
246 	kmsqid_t *qp = (kmsqid_t *)perm;
247 	struct msg *mp;
248 	int		ii;
249 
250 
251 	while ((mp = list_head(&qp->msg_list)) != NULL)
252 		msgunlink(qp, mp);
253 	ASSERT(qp->msg_cbytes == 0);
254 
255 	for (ii = 0; ii < MAX_QNUM_CV; ii++) {
256 		if (qp->msg_rcv_cnt[ii])
257 			cv_broadcast(&qp->msg_rcv_cv[ii]);
258 	}
259 	if (qp->msg_snd_cnt)
260 		cv_broadcast(&qp->msg_snd_cv);
261 }
262 
263 /*
264  * msgctl system call.
265  *
266  * gets q lock (via ipc_lookup), releases before return.
267  * may call users of msg_lock
268  */
269 static int
270 msgctl(int msgid, int cmd, void *arg)
271 {
272 	STRUCT_DECL(msqid_ds, ds);		/* SVR4 queue work area */
273 	kmsqid_t		*qp;		/* ptr to associated q */
274 	int			error, ii;
275 	struct	cred		*cr;
276 	model_t	mdl = get_udatamodel();
277 	struct msqid_ds64	ds64;
278 	kmutex_t		*lock;
279 	proc_t			*pp = curproc;
280 
281 	STRUCT_INIT(ds, mdl);
282 	cr = CRED();
283 
284 	/*
285 	 * Perform pre- or non-lookup actions (e.g. copyins, RMID).
286 	 */
287 	switch (cmd) {
288 	case IPC_SET:
289 		if (copyin(arg, STRUCT_BUF(ds), STRUCT_SIZE(ds)))
290 			return (set_errno(EFAULT));
291 		break;
292 
293 	case IPC_SET64:
294 		if (copyin(arg, &ds64, sizeof (struct msqid_ds64)))
295 			return (set_errno(EFAULT));
296 		break;
297 
298 	case IPC_RMID:
299 		if (error = ipc_rmid(msq_svc, msgid, cr))
300 			return (set_errno(error));
301 		return (0);
302 	}
303 
304 	/*
305 	 * get msqid_ds for this msgid
306 	 */
307 	if ((lock = ipc_lookup(msq_svc, msgid, (kipc_perm_t **)&qp)) == NULL)
308 		return (set_errno(EINVAL));
309 
310 	switch (cmd) {
311 	case IPC_SET:
312 		if (STRUCT_FGET(ds, msg_qbytes) > qp->msg_qbytes &&
313 		    secpolicy_ipc_config(cr) != 0) {
314 			mutex_exit(lock);
315 			return (set_errno(EPERM));
316 		}
317 		if (error = ipcperm_set(msq_svc, cr, &qp->msg_perm,
318 		    &STRUCT_BUF(ds)->msg_perm, mdl)) {
319 			mutex_exit(lock);
320 			return (set_errno(error));
321 		}
322 		qp->msg_qbytes = STRUCT_FGET(ds, msg_qbytes);
323 		qp->msg_ctime = gethrestime_sec();
324 		break;
325 
326 	case IPC_STAT:
327 		if (error = ipcperm_access(&qp->msg_perm, MSG_R, cr)) {
328 			mutex_exit(lock);
329 			return (set_errno(error));
330 		}
331 
332 		for (ii = 0; ii < MAX_QNUM_CV; ii++) {
333 			if (qp->msg_rcv_cnt[ii]) {
334 				qp->msg_perm.ipc_mode |= MSG_RWAIT;
335 				break;
336 			}
337 		}
338 		if (qp->msg_snd_cnt)
339 			qp->msg_perm.ipc_mode |= MSG_WWAIT;
340 		ipcperm_stat(&STRUCT_BUF(ds)->msg_perm, &qp->msg_perm, mdl);
341 		qp->msg_perm.ipc_mode &= ~(MSG_RWAIT|MSG_WWAIT);
342 		STRUCT_FSETP(ds, msg_first, NULL); 	/* kernel addr */
343 		STRUCT_FSETP(ds, msg_last, NULL);
344 		STRUCT_FSET(ds, msg_cbytes, qp->msg_cbytes);
345 		STRUCT_FSET(ds, msg_qnum, qp->msg_qnum);
346 		STRUCT_FSET(ds, msg_qbytes, qp->msg_qbytes);
347 		STRUCT_FSET(ds, msg_lspid, qp->msg_lspid);
348 		STRUCT_FSET(ds, msg_lrpid, qp->msg_lrpid);
349 		STRUCT_FSET(ds, msg_stime, qp->msg_stime);
350 		STRUCT_FSET(ds, msg_rtime, qp->msg_rtime);
351 		STRUCT_FSET(ds, msg_ctime, qp->msg_ctime);
352 		break;
353 
354 	case IPC_SET64:
355 		mutex_enter(&pp->p_lock);
356 		if ((ds64.msgx_qbytes > qp->msg_qbytes) &&
357 		    secpolicy_ipc_config(cr) != 0 &&
358 		    rctl_test(rc_process_msgmnb, pp->p_rctls, pp,
359 		    ds64.msgx_qbytes, RCA_SAFE) & RCT_DENY) {
360 			mutex_exit(&pp->p_lock);
361 			mutex_exit(lock);
362 			return (set_errno(EPERM));
363 		}
364 		mutex_exit(&pp->p_lock);
365 		if (error = ipcperm_set64(msq_svc, cr, &qp->msg_perm,
366 		    &ds64.msgx_perm)) {
367 			mutex_exit(lock);
368 			return (set_errno(error));
369 		}
370 		qp->msg_qbytes = ds64.msgx_qbytes;
371 		qp->msg_ctime = gethrestime_sec();
372 		break;
373 
374 	case IPC_STAT64:
375 		for (ii = 0; ii < MAX_QNUM_CV; ii++) {
376 			if (qp->msg_rcv_cnt[ii]) {
377 				qp->msg_perm.ipc_mode |= MSG_RWAIT;
378 				break;
379 			}
380 		}
381 		if (qp->msg_snd_cnt)
382 			qp->msg_perm.ipc_mode |= MSG_WWAIT;
383 		ipcperm_stat64(&ds64.msgx_perm, &qp->msg_perm);
384 		qp->msg_perm.ipc_mode &= ~(MSG_RWAIT|MSG_WWAIT);
385 		ds64.msgx_cbytes = qp->msg_cbytes;
386 		ds64.msgx_qnum = qp->msg_qnum;
387 		ds64.msgx_qbytes = qp->msg_qbytes;
388 		ds64.msgx_lspid = qp->msg_lspid;
389 		ds64.msgx_lrpid = qp->msg_lrpid;
390 		ds64.msgx_stime = qp->msg_stime;
391 		ds64.msgx_rtime = qp->msg_rtime;
392 		ds64.msgx_ctime = qp->msg_ctime;
393 		break;
394 
395 	default:
396 		mutex_exit(lock);
397 		return (set_errno(EINVAL));
398 	}
399 
400 	mutex_exit(lock);
401 
402 	/*
403 	 * Do copyout last (after releasing mutex).
404 	 */
405 	switch (cmd) {
406 	case IPC_STAT:
407 		if (copyout(STRUCT_BUF(ds), arg, STRUCT_SIZE(ds)))
408 			return (set_errno(EFAULT));
409 		break;
410 
411 	case IPC_STAT64:
412 		if (copyout(&ds64, arg, sizeof (struct msqid_ds64)))
413 			return (set_errno(EFAULT));
414 		break;
415 	}
416 
417 	return (0);
418 }
419 
420 /*
421  * Remove all message queues associated with a given zone.  Called by
422  * zone_shutdown when the zone is halted.
423  */
424 /*ARGSUSED1*/
425 static void
426 msg_remove_zone(zoneid_t zoneid, void *arg)
427 {
428 	ipc_remove_zone(msq_svc, zoneid);
429 }
430 
431 /*
432  * msgget system call.
433  */
434 static int
435 msgget(key_t key, int msgflg)
436 {
437 	kmsqid_t	*qp;
438 	kmutex_t	*lock;
439 	int		id, error;
440 	int		ii;
441 	proc_t		*pp = curproc;
442 
443 top:
444 	if (error = ipc_get(msq_svc, key, msgflg, (kipc_perm_t **)&qp, &lock))
445 		return (set_errno(error));
446 
447 	if (IPC_FREE(&qp->msg_perm)) {
448 		mutex_exit(lock);
449 		mutex_exit(&pp->p_lock);
450 
451 		list_create(&qp->msg_list, sizeof (struct msg),
452 		    offsetof(struct msg, msg_node));
453 		qp->msg_qnum = 0;
454 		qp->msg_lspid = qp->msg_lrpid = 0;
455 		qp->msg_stime = qp->msg_rtime = 0;
456 		qp->msg_ctime = gethrestime_sec();
457 		for (ii = 0; ii < MAX_QNUM_CV; ii++)
458 			qp->msg_rcv_cnt[ii] = 0;
459 		qp->msg_snd_cnt = 0;
460 
461 		if (error = ipc_commit_begin(msq_svc, key, msgflg,
462 		    (kipc_perm_t *)qp)) {
463 			if (error == EAGAIN)
464 				goto top;
465 			return (set_errno(error));
466 		}
467 		qp->msg_qbytes = rctl_enforced_value(rc_process_msgmnb,
468 		    pp->p_rctls, pp);
469 		qp->msg_qmax = rctl_enforced_value(rc_process_msgtql,
470 		    pp->p_rctls, pp);
471 		lock = ipc_commit_end(msq_svc, &qp->msg_perm);
472 	}
473 #ifdef C2_AUDIT
474 	if (audit_active)
475 		audit_ipcget(AT_IPC_MSG, (void *)qp);
476 #endif
477 	id = qp->msg_perm.ipc_id;
478 	mutex_exit(lock);
479 	return (id);
480 }
481 
482 /*
483  * msgrcv system call.
484  */
485 static ssize_t
486 msgrcv(int msqid, struct ipcmsgbuf *msgp, size_t msgsz, long msgtyp, int msgflg)
487 {
488 	struct msg	*mp;	/* ptr to msg on q */
489 	struct msg	*smp;	/* ptr to best msg on q */
490 	kmsqid_t	*qp;	/* ptr to associated q */
491 	kmutex_t	*lock;
492 	size_t		xtsz;	/* transfer byte count */
493 	int		error = 0, copyerror = 0;
494 	int		cvres;
495 	STRUCT_HANDLE(ipcmsgbuf, umsgp);
496 	model_t		mdl = get_udatamodel();
497 
498 	CPU_STATS_ADDQ(CPU, sys, msg, 1);	/* bump msg send/rcv count */
499 	STRUCT_SET_HANDLE(umsgp, mdl, msgp);
500 
501 	if ((lock = ipc_lookup(msq_svc, msqid, (kipc_perm_t **)&qp)) == NULL)
502 		return ((ssize_t)set_errno(EINVAL));
503 	ipc_hold(msq_svc, (kipc_perm_t *)qp);
504 
505 	if (error = ipcperm_access(&qp->msg_perm, MSG_R, CRED()))
506 		goto msgrcv_out;
507 
508 findmsg:
509 	smp = NULL;
510 	mp = list_head(&qp->msg_list);
511 	if (msgtyp == 0) {
512 		smp = mp;
513 	} else {
514 		for (; mp; mp = list_next(&qp->msg_list, mp)) {
515 			if (msgtyp > 0) {
516 				if (msgtyp != mp->msg_type)
517 					continue;
518 				smp = mp;
519 				break;
520 			}
521 			if (mp->msg_type <= -msgtyp) {
522 				if (smp && smp->msg_type <= mp->msg_type)
523 					continue;
524 				smp = mp;
525 			}
526 		}
527 	}
528 
529 	if (smp) {
530 		/*
531 		 * Message found.
532 		 */
533 		if ((smp->msg_flags & MSG_RCVCOPY) == 0) {
534 			/*
535 			 * No one else is copying this message. Copy it.
536 			 */
537 			if (msgsz < smp->msg_size) {
538 				if ((msgflg & MSG_NOERROR) == 0) {
539 					error = E2BIG;
540 					goto msgrcv_out;
541 				} else {
542 					xtsz = msgsz;
543 				}
544 			} else {
545 				xtsz = smp->msg_size;
546 			}
547 
548 			/*
549 			 * Mark message as being copied out. Release mutex
550 			 * while copying out.
551 			 */
552 			ASSERT((smp->msg_flags & MSG_RCVCOPY) == 0);
553 			smp->msg_flags |= MSG_RCVCOPY;
554 			msg_hold(smp);
555 			mutex_exit(lock);
556 
557 			if (mdl == DATAMODEL_NATIVE) {
558 				copyerror = copyout(&smp->msg_type, msgp,
559 				    sizeof (smp->msg_type));
560 			} else {
561 				/*
562 				 * 32-bit callers need an imploded msg type.
563 				 */
564 				int32_t	msg_type32 = smp->msg_type;
565 
566 				copyerror = copyout(&msg_type32, msgp,
567 				    sizeof (msg_type32));
568 			}
569 
570 			if (copyerror == 0 && xtsz)
571 				copyerror = copyout(smp->msg_addr,
572 				    STRUCT_FADDR(umsgp, mtext), xtsz);
573 
574 			/*
575 			 * Reclaim mutex, make sure queue still exists,
576 			 * and remove message.
577 			 */
578 			lock = ipc_lock(msq_svc, qp->msg_perm.ipc_id);
579 			ASSERT(smp->msg_flags & MSG_RCVCOPY);
580 			smp->msg_flags &= ~MSG_RCVCOPY;
581 			msg_rele(smp);
582 
583 			if (IPC_FREE(&qp->msg_perm)) {
584 				error = EIDRM;
585 				goto msgrcv_out;
586 			}
587 			/*
588 			 * MSG_RCVCOPY was set while we dropped and reaquired
589 			 * the lock. A thread looking for same message type
590 			 * might have entered during that interval and seeing
591 			 * MSG_RCVCOPY set, would have landed up in the sleepq.
592 			 */
593 			cv_broadcast(&qp->msg_rcv_cv[MSG_QNUM(smp->msg_type)]);
594 			cv_broadcast(&qp->msg_rcv_cv[0]);
595 
596 			if (copyerror) {
597 				error = EFAULT;
598 				goto msgrcv_out;
599 			}
600 			qp->msg_lrpid = ttoproc(curthread)->p_pid;
601 			qp->msg_rtime = gethrestime_sec();
602 			msgunlink(qp, smp);
603 			goto msgrcv_out;
604 		}
605 
606 	} else {
607 		/*
608 		 * No message found.
609 		 */
610 		if (msgflg & IPC_NOWAIT) {
611 			error = ENOMSG;
612 			goto msgrcv_out;
613 		}
614 	}
615 
616 	/* Wait for new message */
617 	qp->msg_rcv_cnt[MSG_QNUM(msgtyp)]++;
618 	cvres = cv_wait_sig(&qp->msg_rcv_cv[MSG_QNUM(msgtyp)], lock);
619 	lock = ipc_relock(msq_svc, qp->msg_perm.ipc_id, lock);
620 	qp->msg_rcv_cnt[MSG_QNUM(msgtyp)]--;
621 
622 	if (IPC_FREE(&qp->msg_perm)) {
623 		error = EIDRM;
624 		goto msgrcv_out;
625 	}
626 	if (cvres == 0) {
627 		error = EINTR;
628 		goto msgrcv_out;
629 	}
630 
631 	goto findmsg;
632 
633 msgrcv_out:
634 	ipc_rele(msq_svc, (kipc_perm_t *)qp);
635 	if (error)
636 		return ((ssize_t)set_errno(error));
637 	return ((ssize_t)xtsz);
638 }
639 
640 /*
641  * msgids system call.
642  */
643 static int
644 msgids(int *buf, uint_t nids, uint_t *pnids)
645 {
646 	int error;
647 
648 	if (error = ipc_ids(msq_svc, buf, nids, pnids))
649 		return (set_errno(error));
650 
651 	return (0);
652 }
653 
654 #define	RND(x)		roundup((x), sizeof (size_t))
655 #define	RND32(x)	roundup((x), sizeof (size32_t))
656 
657 /*
658  * msgsnap system call.
659  */
660 static int
661 msgsnap(int msqid, caddr_t buf, size_t bufsz, long msgtyp)
662 {
663 	struct msg	*mp;	/* ptr to msg on q */
664 	kmsqid_t	*qp;	/* ptr to associated q */
665 	kmutex_t	*lock;
666 	size_t		size;
667 	size_t		nmsg;
668 	struct msg	**snaplist;
669 	int		error, i;
670 	model_t		mdl = get_udatamodel();
671 	STRUCT_DECL(msgsnap_head, head);
672 	STRUCT_DECL(msgsnap_mhead, mhead);
673 
674 	STRUCT_INIT(head, mdl);
675 	STRUCT_INIT(mhead, mdl);
676 
677 	if (bufsz < STRUCT_SIZE(head))
678 		return (set_errno(EINVAL));
679 
680 	if ((lock = ipc_lookup(msq_svc, msqid, (kipc_perm_t **)&qp)) == NULL)
681 		return (set_errno(EINVAL));
682 
683 	if (error = ipcperm_access(&qp->msg_perm, MSG_R, CRED())) {
684 		mutex_exit(lock);
685 		return (set_errno(error));
686 	}
687 	ipc_hold(msq_svc, (kipc_perm_t *)qp);
688 
689 	/*
690 	 * First compute the required buffer size and
691 	 * the number of messages on the queue.
692 	 */
693 	size = nmsg = 0;
694 	for (mp = list_head(&qp->msg_list); mp;
695 	    mp = list_next(&qp->msg_list, mp)) {
696 		if (msgtyp == 0 ||
697 		    (msgtyp > 0 && msgtyp == mp->msg_type) ||
698 		    (msgtyp < 0 && mp->msg_type <= -msgtyp)) {
699 			nmsg++;
700 			if (mdl == DATAMODEL_NATIVE)
701 				size += RND(mp->msg_size);
702 			else
703 				size += RND32(mp->msg_size);
704 		}
705 	}
706 
707 	size += STRUCT_SIZE(head) + nmsg * STRUCT_SIZE(mhead);
708 	if (size > bufsz)
709 		nmsg = 0;
710 
711 	if (nmsg > 0) {
712 		/*
713 		 * Mark the messages as being copied.
714 		 */
715 		snaplist = (struct msg **)kmem_alloc(nmsg *
716 		    sizeof (struct msg *), KM_SLEEP);
717 		i = 0;
718 		for (mp = list_head(&qp->msg_list); mp;
719 		    mp = list_next(&qp->msg_list, mp)) {
720 			if (msgtyp == 0 ||
721 			    (msgtyp > 0 && msgtyp == mp->msg_type) ||
722 			    (msgtyp < 0 && mp->msg_type <= -msgtyp)) {
723 				msg_hold(mp);
724 				snaplist[i] = mp;
725 				i++;
726 			}
727 		}
728 	}
729 	mutex_exit(lock);
730 
731 	/*
732 	 * Copy out the buffer header.
733 	 */
734 	STRUCT_FSET(head, msgsnap_size, size);
735 	STRUCT_FSET(head, msgsnap_nmsg, nmsg);
736 	if (copyout(STRUCT_BUF(head), buf, STRUCT_SIZE(head)))
737 		error = EFAULT;
738 
739 	buf += STRUCT_SIZE(head);
740 
741 	/*
742 	 * Now copy out the messages one by one.
743 	 */
744 	for (i = 0; i < nmsg; i++) {
745 		mp = snaplist[i];
746 		if (error == 0) {
747 			STRUCT_FSET(mhead, msgsnap_mlen, mp->msg_size);
748 			STRUCT_FSET(mhead, msgsnap_mtype, mp->msg_type);
749 			if (copyout(STRUCT_BUF(mhead), buf, STRUCT_SIZE(mhead)))
750 				error = EFAULT;
751 			buf += STRUCT_SIZE(mhead);
752 
753 			if (error == 0 &&
754 			    mp->msg_size != 0 &&
755 			    copyout(mp->msg_addr, buf, mp->msg_size))
756 				error = EFAULT;
757 			if (mdl == DATAMODEL_NATIVE)
758 				buf += RND(mp->msg_size);
759 			else
760 				buf += RND32(mp->msg_size);
761 		}
762 		lock = ipc_lock(msq_svc, qp->msg_perm.ipc_id);
763 		msg_rele(mp);
764 		/* Check for msg q deleted or reallocated */
765 		if (IPC_FREE(&qp->msg_perm))
766 			error = EIDRM;
767 		mutex_exit(lock);
768 	}
769 
770 	(void) ipc_lock(msq_svc, qp->msg_perm.ipc_id);
771 	ipc_rele(msq_svc, (kipc_perm_t *)qp);
772 
773 	if (nmsg > 0)
774 		kmem_free(snaplist, nmsg * sizeof (struct msg *));
775 
776 	if (error)
777 		return (set_errno(error));
778 	return (0);
779 }
780 
781 /*
782  * msgsnd system call.
783  */
784 static int
785 msgsnd(int msqid, struct ipcmsgbuf *msgp, size_t msgsz, int msgflg)
786 {
787 	kmsqid_t	*qp;
788 	kmutex_t	*lock;
789 	struct msg	*mp = NULL;
790 	long		type;
791 	int		error = 0;
792 	model_t		mdl = get_udatamodel();
793 	STRUCT_HANDLE(ipcmsgbuf, umsgp);
794 
795 	CPU_STATS_ADDQ(CPU, sys, msg, 1);	/* bump msg send/rcv count */
796 	STRUCT_SET_HANDLE(umsgp, mdl, msgp);
797 
798 	if (mdl == DATAMODEL_NATIVE) {
799 		if (copyin(msgp, &type, sizeof (type)))
800 			return (set_errno(EFAULT));
801 	} else {
802 		int32_t	type32;
803 		if (copyin(msgp, &type32, sizeof (type32)))
804 			return (set_errno(EFAULT));
805 		type = type32;
806 	}
807 
808 	if (type < 1)
809 		return (set_errno(EINVAL));
810 
811 	if ((lock = ipc_lookup(msq_svc, msqid, (kipc_perm_t **)&qp)) == NULL)
812 		return (set_errno(EINVAL));
813 	ipc_hold(msq_svc, (kipc_perm_t *)qp);
814 
815 	if (msgsz > qp->msg_qbytes) {
816 		error = EINVAL;
817 		goto msgsnd_out;
818 	}
819 
820 	if (error = ipcperm_access(&qp->msg_perm, MSG_W, CRED()))
821 		goto msgsnd_out;
822 
823 top:
824 	/*
825 	 * Allocate space on q, message header, & buffer space.
826 	 */
827 	ASSERT(qp->msg_qnum <= qp->msg_qmax);
828 	while ((msgsz > qp->msg_qbytes - qp->msg_cbytes) ||
829 	    (qp->msg_qnum == qp->msg_qmax)) {
830 		int cvres;
831 
832 		if (msgflg & IPC_NOWAIT) {
833 			error = EAGAIN;
834 			goto msgsnd_out;
835 		}
836 
837 		qp->msg_snd_cnt++;
838 		cvres = cv_wait_sig(&qp->msg_snd_cv, lock);
839 		lock = ipc_relock(msq_svc, qp->msg_perm.ipc_id, lock);
840 		qp->msg_snd_cnt--;
841 
842 		if (IPC_FREE(&qp->msg_perm)) {
843 			error = EIDRM;
844 			goto msgsnd_out;
845 		}
846 
847 		if (cvres == 0) {
848 			error = EINTR;
849 			goto msgsnd_out;
850 		}
851 	}
852 
853 	if (mp == NULL) {
854 		int failure;
855 
856 		mutex_exit(lock);
857 		mp = kmem_zalloc(sizeof (struct msg), KM_SLEEP);
858 		mp->msg_addr = kmem_zalloc(msgsz, KM_SLEEP);
859 		mp->msg_size = msgsz;
860 		mp->msg_copycnt = 1;
861 
862 		failure = msgsz && (copyin(STRUCT_FADDR(umsgp, mtext),
863 		    mp->msg_addr, msgsz) == -1);
864 		lock = ipc_lock(msq_svc, qp->msg_perm.ipc_id);
865 		if (IPC_FREE(&qp->msg_perm)) {
866 			error = EIDRM;
867 			goto msgsnd_out;
868 		}
869 		if (failure) {
870 			error = EFAULT;
871 			goto msgsnd_out;
872 		}
873 		goto top;
874 	}
875 
876 	/*
877 	 * Everything is available, put msg on q.
878 	 */
879 	qp->msg_qnum++;
880 	qp->msg_cbytes += msgsz;
881 	qp->msg_lspid = curproc->p_pid;
882 	qp->msg_stime = gethrestime_sec();
883 	mp->msg_type = type;
884 	mp->msg_flags = 0;
885 	list_insert_tail(&qp->msg_list, mp);
886 	/*
887 	 * For all message type >= 1.
888 	 */
889 	if (qp->msg_rcv_cnt[MSG_QNUM(type)])
890 		cv_broadcast(&qp->msg_rcv_cv[MSG_QNUM(type)]);
891 	/*
892 	 * For all message type < 1.
893 	 */
894 	if (qp->msg_rcv_cnt[0])
895 		cv_broadcast(&qp->msg_rcv_cv[0]);
896 
897 msgsnd_out:
898 	ipc_rele(msq_svc, (kipc_perm_t *)qp);	/* drops lock */
899 
900 	if (error) {
901 		if (mp)
902 			msg_rele(mp);
903 		return (set_errno(error));
904 	}
905 
906 	return (0);
907 }
908 
909 /*
910  * msgsys - System entry point for msgctl, msgget, msgrcv, and msgsnd
911  * system calls.
912  */
913 static ssize_t
914 msgsys(int opcode, uintptr_t a1, uintptr_t a2, uintptr_t a3,
915 	uintptr_t a4, uintptr_t a5)
916 {
917 	ssize_t error;
918 
919 	switch (opcode) {
920 	case MSGGET:
921 		error = msgget((key_t)a1, (int)a2);
922 		break;
923 	case MSGCTL:
924 		error = msgctl((int)a1, (int)a2, (void *)a3);
925 		break;
926 	case MSGRCV:
927 		error = msgrcv((int)a1, (struct ipcmsgbuf *)a2,
928 		    (size_t)a3, (long)a4, (int)a5);
929 		break;
930 	case MSGSND:
931 		error = msgsnd((int)a1, (struct ipcmsgbuf *)a2,
932 		    (size_t)a3, (int)a4);
933 		break;
934 	case MSGIDS:
935 		error = msgids((int *)a1, (uint_t)a2, (uint_t *)a3);
936 		break;
937 	case MSGSNAP:
938 		error = msgsnap((int)a1, (caddr_t)a2, (size_t)a3, (long)a4);
939 		break;
940 	default:
941 		error = set_errno(EINVAL);
942 		break;
943 	}
944 
945 	return (error);
946 }
947 
948 #ifdef	_SYSCALL32_IMPL
949 /*
950  * msgsys32 - System entry point for msgctl, msgget, msgrcv, and msgsnd
951  * system calls for 32-bit callers on LP64 kernel.
952  */
953 static ssize32_t
954 msgsys32(int opcode, uint32_t a1, uint32_t a2, uint32_t a3,
955 	uint32_t a4, uint32_t a5)
956 {
957 	ssize_t error;
958 
959 	switch (opcode) {
960 	case MSGGET:
961 		error = msgget((key_t)a1, (int)a2);
962 		break;
963 	case MSGCTL:
964 		error = msgctl((int)a1, (int)a2, (void *)(uintptr_t)a3);
965 		break;
966 	case MSGRCV:
967 		error = msgrcv((int)a1, (struct ipcmsgbuf *)(uintptr_t)a2,
968 		    (size_t)a3, (long)(int32_t)a4, (int)a5);
969 		break;
970 	case MSGSND:
971 		error = msgsnd((int)a1, (struct ipcmsgbuf *)(uintptr_t)a2,
972 		    (size_t)(int32_t)a3, (int)a4);
973 		break;
974 	case MSGIDS:
975 		error = msgids((int *)(uintptr_t)a1, (uint_t)a2,
976 		    (uint_t *)(uintptr_t)a3);
977 		break;
978 	case MSGSNAP:
979 		error = msgsnap((int)a1, (caddr_t)(uintptr_t)a2, (size_t)a3,
980 		    (long)(int32_t)a4);
981 		break;
982 	default:
983 		error = set_errno(EINVAL);
984 		break;
985 	}
986 
987 	return (error);
988 }
989 #endif	/* SYSCALL32_IMPL */
990