xref: /titanic_51/usr/src/uts/common/os/msg.c (revision 261a51afbf7133d9f7c89f1388050677f56b7d1a)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
27 /*	  All Rights Reserved  	*/
28 
29 
30 #pragma ident	"%Z%%M%	%I%	%E% SMI"
31 
32 /*
33  * Inter-Process Communication Message Facility.
34  *
35  * See os/ipc.c for a description of common IPC functionality.
36  *
37  * Resource controls
38  * -----------------
39  *
40  * Control:      zone.max-msg-ids (rc_zone_msgmni)
41  * Description:  Maximum number of message queue ids allowed a zone.
42  *
43  *   When msgget() is used to allocate a message queue, one id is
44  *   allocated.  If the id allocation doesn't succeed, msgget() fails
45  *   and errno is set to ENOSPC.  Upon successful msgctl(, IPC_RMID)
46  *   the id is deallocated.
47  *
48  * Control:      project.max-msg-ids (rc_project_msgmni)
49  * Description:  Maximum number of message queue ids allowed a project.
50  *
51  *   When msgget() is used to allocate a message queue, one id is
52  *   allocated.  If the id allocation doesn't succeed, msgget() fails
53  *   and errno is set to ENOSPC.  Upon successful msgctl(, IPC_RMID)
54  *   the id is deallocated.
55  *
56  * Control:      process.max-msg-qbytes (rc_process_msgmnb)
57  * Description:  Maximum number of bytes of messages on a message queue.
58  *
59  *   When msgget() successfully allocates a message queue, the minimum
60  *   enforced value of this limit is used to initialize msg_qbytes.
61  *
62  * Control:      process.max-msg-messages (rc_process_msgtql)
63  * Description:  Maximum number of messages on a message queue.
64  *
65  *   When msgget() successfully allocates a message queue, the minimum
66  *   enforced value of this limit is used to initialize a per-queue
67  *   limit on the number of messages.
68  */
69 
70 #include <sys/types.h>
71 #include <sys/t_lock.h>
72 #include <sys/param.h>
73 #include <sys/cred.h>
74 #include <sys/user.h>
75 #include <sys/proc.h>
76 #include <sys/time.h>
77 #include <sys/ipc.h>
78 #include <sys/ipc_impl.h>
79 #include <sys/msg.h>
80 #include <sys/msg_impl.h>
81 #include <sys/list.h>
82 #include <sys/systm.h>
83 #include <sys/sysmacros.h>
84 #include <sys/cpuvar.h>
85 #include <sys/kmem.h>
86 #include <sys/ddi.h>
87 #include <sys/errno.h>
88 #include <sys/cmn_err.h>
89 #include <sys/debug.h>
90 #include <sys/project.h>
91 #include <sys/modctl.h>
92 #include <sys/syscall.h>
93 #include <sys/policy.h>
94 #include <sys/zone.h>
95 
96 #include <c2/audit.h>
97 
98 /*
99  * The following tunables are obsolete.  Though for compatibility we
100  * still read and interpret msginfo_msgmnb, msginfo_msgmni, and
101  * msginfo_msgtql (see os/project.c and os/rctl_proc.c), the preferred
102  * mechanism for administrating the IPC Message facility is through the
103  * resource controls described at the top of this file.
104  */
105 size_t	msginfo_msgmax = 2048;	/* (obsolete) */
106 size_t	msginfo_msgmnb = 4096;	/* (obsolete) */
107 int	msginfo_msgmni = 50;	/* (obsolete) */
108 int	msginfo_msgtql = 40;	/* (obsolete) */
109 int	msginfo_msgssz = 8;	/* (obsolete) */
110 int	msginfo_msgmap = 0;	/* (obsolete) */
111 ushort_t msginfo_msgseg = 1024;	/* (obsolete) */
112 
113 extern rctl_hndl_t rc_zone_msgmni;
114 extern rctl_hndl_t rc_project_msgmni;
115 extern rctl_hndl_t rc_process_msgmnb;
116 extern rctl_hndl_t rc_process_msgtql;
117 static ipc_service_t *msq_svc;
118 static zone_key_t msg_zone_key;
119 
120 static void msg_dtor(kipc_perm_t *);
121 static void msg_rmid(kipc_perm_t *);
122 static void msg_remove_zone(zoneid_t, void *);
123 
124 /*
125  * Module linkage information for the kernel.
126  */
127 static ssize_t msgsys(int opcode, uintptr_t a0, uintptr_t a1, uintptr_t a2,
128 	uintptr_t a4, uintptr_t a5);
129 
130 static struct sysent ipcmsg_sysent = {
131 	6,
132 #ifdef	_LP64
133 	SE_ARGC | SE_NOUNLOAD | SE_64RVAL,
134 #else
135 	SE_ARGC | SE_NOUNLOAD | SE_32RVAL1,
136 #endif
137 	(int (*)())msgsys
138 };
139 
140 #ifdef	_SYSCALL32_IMPL
141 static ssize32_t msgsys32(int opcode, uint32_t a0, uint32_t a1, uint32_t a2,
142 	uint32_t a4, uint32_t a5);
143 
144 static struct sysent ipcmsg_sysent32 = {
145 	6,
146 	SE_ARGC | SE_NOUNLOAD | SE_32RVAL1,
147 	(int (*)())msgsys32
148 };
149 #endif	/* _SYSCALL32_IMPL */
150 
151 static struct modlsys modlsys = {
152 	&mod_syscallops, "System V message facility", &ipcmsg_sysent
153 };
154 
155 #ifdef _SYSCALL32_IMPL
156 static struct modlsys modlsys32 = {
157 	&mod_syscallops32, "32-bit System V message facility", &ipcmsg_sysent32
158 };
159 #endif
160 
161 static struct modlinkage modlinkage = {
162 	MODREV_1,
163 	&modlsys,
164 #ifdef _SYSCALL32_IMPL
165 	&modlsys32,
166 #endif
167 	NULL
168 };
169 
170 
171 int
172 _init(void)
173 {
174 	int result;
175 
176 	msq_svc = ipcs_create("msqids", rc_project_msgmni, rc_zone_msgmni,
177 	    sizeof (kmsqid_t), msg_dtor, msg_rmid, AT_IPC_MSG,
178 	    offsetof(ipc_rqty_t, ipcq_msgmni));
179 	zone_key_create(&msg_zone_key, NULL, msg_remove_zone, NULL);
180 
181 	if ((result = mod_install(&modlinkage)) == 0)
182 		return (0);
183 
184 	(void) zone_key_delete(msg_zone_key);
185 	ipcs_destroy(msq_svc);
186 
187 	return (result);
188 }
189 
190 int
191 _fini(void)
192 {
193 	return (EBUSY);
194 }
195 
196 int
197 _info(struct modinfo *modinfop)
198 {
199 	return (mod_info(&modlinkage, modinfop));
200 }
201 
202 static void
203 msg_dtor(kipc_perm_t *perm)
204 {
205 	kmsqid_t *qp = (kmsqid_t *)perm;
206 	int		ii;
207 
208 	for (ii = 0; ii < MAX_QNUM_CV; ii++)
209 		ASSERT(qp->msg_rcv_cnt[ii] == 0);
210 	ASSERT(qp->msg_snd_cnt == 0);
211 	ASSERT(qp->msg_cbytes == 0);
212 	list_destroy(&qp->msg_list);
213 }
214 
215 
216 #define	msg_hold(mp)	(mp)->msg_copycnt++
217 
218 /*
219  * msg_rele - decrement the reference count on the message.  When count
220  * reaches zero, free message header and contents.
221  */
222 static void
223 msg_rele(struct msg *mp)
224 {
225 	ASSERT(mp->msg_copycnt > 0);
226 	if (mp->msg_copycnt-- == 1) {
227 		if (mp->msg_addr)
228 			kmem_free(mp->msg_addr, mp->msg_size);
229 		kmem_free(mp, sizeof (struct msg));
230 	}
231 }
232 
233 /*
234  * msgunlink - Unlink msg from queue, decrement byte count and wake up anyone
235  * waiting for free bytes on queue.
236  *
237  * Called with queue locked.
238  */
239 static void
240 msgunlink(kmsqid_t *qp, struct msg *mp)
241 {
242 	list_remove(&qp->msg_list, mp);
243 	qp->msg_qnum--;
244 	qp->msg_cbytes -= mp->msg_size;
245 	msg_rele(mp);
246 
247 	/* Wake up waiting writers */
248 	if (qp->msg_snd_cnt)
249 		cv_broadcast(&qp->msg_snd_cv);
250 }
251 
252 static void
253 msg_rmid(kipc_perm_t *perm)
254 {
255 	kmsqid_t *qp = (kmsqid_t *)perm;
256 	struct msg *mp;
257 	int		ii;
258 
259 
260 	while ((mp = list_head(&qp->msg_list)) != NULL)
261 		msgunlink(qp, mp);
262 	ASSERT(qp->msg_cbytes == 0);
263 
264 	for (ii = 0; ii < MAX_QNUM_CV; ii++) {
265 		if (qp->msg_rcv_cnt[ii])
266 			cv_broadcast(&qp->msg_rcv_cv[ii]);
267 	}
268 	if (qp->msg_snd_cnt)
269 		cv_broadcast(&qp->msg_snd_cv);
270 }
271 
272 /*
273  * msgctl system call.
274  *
275  * gets q lock (via ipc_lookup), releases before return.
276  * may call users of msg_lock
277  */
278 static int
279 msgctl(int msgid, int cmd, void *arg)
280 {
281 	STRUCT_DECL(msqid_ds, ds);		/* SVR4 queue work area */
282 	kmsqid_t		*qp;		/* ptr to associated q */
283 	int			error, ii;
284 	struct	cred		*cr;
285 	model_t	mdl = get_udatamodel();
286 	struct msqid_ds64	ds64;
287 	kmutex_t		*lock;
288 	proc_t			*pp = curproc;
289 
290 	STRUCT_INIT(ds, mdl);
291 	cr = CRED();
292 
293 	/*
294 	 * Perform pre- or non-lookup actions (e.g. copyins, RMID).
295 	 */
296 	switch (cmd) {
297 	case IPC_SET:
298 		if (copyin(arg, STRUCT_BUF(ds), STRUCT_SIZE(ds)))
299 			return (set_errno(EFAULT));
300 		break;
301 
302 	case IPC_SET64:
303 		if (copyin(arg, &ds64, sizeof (struct msqid_ds64)))
304 			return (set_errno(EFAULT));
305 		break;
306 
307 	case IPC_RMID:
308 		if (error = ipc_rmid(msq_svc, msgid, cr))
309 			return (set_errno(error));
310 		return (0);
311 	}
312 
313 	/*
314 	 * get msqid_ds for this msgid
315 	 */
316 	if ((lock = ipc_lookup(msq_svc, msgid, (kipc_perm_t **)&qp)) == NULL)
317 		return (set_errno(EINVAL));
318 
319 	switch (cmd) {
320 	case IPC_SET:
321 		if (STRUCT_FGET(ds, msg_qbytes) > qp->msg_qbytes &&
322 		    secpolicy_ipc_config(cr) != 0) {
323 			mutex_exit(lock);
324 			return (set_errno(EPERM));
325 		}
326 		if (error = ipcperm_set(msq_svc, cr, &qp->msg_perm,
327 		    &STRUCT_BUF(ds)->msg_perm, mdl)) {
328 			mutex_exit(lock);
329 			return (set_errno(error));
330 		}
331 		qp->msg_qbytes = STRUCT_FGET(ds, msg_qbytes);
332 		qp->msg_ctime = gethrestime_sec();
333 		break;
334 
335 	case IPC_STAT:
336 		if (error = ipcperm_access(&qp->msg_perm, MSG_R, cr)) {
337 			mutex_exit(lock);
338 			return (set_errno(error));
339 		}
340 
341 		for (ii = 0; ii < MAX_QNUM_CV; ii++) {
342 			if (qp->msg_rcv_cnt[ii]) {
343 				qp->msg_perm.ipc_mode |= MSG_RWAIT;
344 				break;
345 			}
346 		}
347 		if (qp->msg_snd_cnt)
348 			qp->msg_perm.ipc_mode |= MSG_WWAIT;
349 		ipcperm_stat(&STRUCT_BUF(ds)->msg_perm, &qp->msg_perm, mdl);
350 		qp->msg_perm.ipc_mode &= ~(MSG_RWAIT|MSG_WWAIT);
351 		STRUCT_FSETP(ds, msg_first, NULL); 	/* kernel addr */
352 		STRUCT_FSETP(ds, msg_last, NULL);
353 		STRUCT_FSET(ds, msg_cbytes, qp->msg_cbytes);
354 		STRUCT_FSET(ds, msg_qnum, qp->msg_qnum);
355 		STRUCT_FSET(ds, msg_qbytes, qp->msg_qbytes);
356 		STRUCT_FSET(ds, msg_lspid, qp->msg_lspid);
357 		STRUCT_FSET(ds, msg_lrpid, qp->msg_lrpid);
358 		STRUCT_FSET(ds, msg_stime, qp->msg_stime);
359 		STRUCT_FSET(ds, msg_rtime, qp->msg_rtime);
360 		STRUCT_FSET(ds, msg_ctime, qp->msg_ctime);
361 		break;
362 
363 	case IPC_SET64:
364 		mutex_enter(&pp->p_lock);
365 		if ((ds64.msgx_qbytes > qp->msg_qbytes) &&
366 		    secpolicy_ipc_config(cr) != 0 &&
367 		    rctl_test(rc_process_msgmnb, pp->p_rctls, pp,
368 		    ds64.msgx_qbytes, RCA_SAFE) & RCT_DENY) {
369 			mutex_exit(&pp->p_lock);
370 			mutex_exit(lock);
371 			return (set_errno(EPERM));
372 		}
373 		mutex_exit(&pp->p_lock);
374 		if (error = ipcperm_set64(msq_svc, cr, &qp->msg_perm,
375 		    &ds64.msgx_perm)) {
376 			mutex_exit(lock);
377 			return (set_errno(error));
378 		}
379 		qp->msg_qbytes = ds64.msgx_qbytes;
380 		qp->msg_ctime = gethrestime_sec();
381 		break;
382 
383 	case IPC_STAT64:
384 		for (ii = 0; ii < MAX_QNUM_CV; ii++) {
385 			if (qp->msg_rcv_cnt[ii]) {
386 				qp->msg_perm.ipc_mode |= MSG_RWAIT;
387 				break;
388 			}
389 		}
390 		if (qp->msg_snd_cnt)
391 			qp->msg_perm.ipc_mode |= MSG_WWAIT;
392 		ipcperm_stat64(&ds64.msgx_perm, &qp->msg_perm);
393 		qp->msg_perm.ipc_mode &= ~(MSG_RWAIT|MSG_WWAIT);
394 		ds64.msgx_cbytes = qp->msg_cbytes;
395 		ds64.msgx_qnum = qp->msg_qnum;
396 		ds64.msgx_qbytes = qp->msg_qbytes;
397 		ds64.msgx_lspid = qp->msg_lspid;
398 		ds64.msgx_lrpid = qp->msg_lrpid;
399 		ds64.msgx_stime = qp->msg_stime;
400 		ds64.msgx_rtime = qp->msg_rtime;
401 		ds64.msgx_ctime = qp->msg_ctime;
402 		break;
403 
404 	default:
405 		mutex_exit(lock);
406 		return (set_errno(EINVAL));
407 	}
408 
409 	mutex_exit(lock);
410 
411 	/*
412 	 * Do copyout last (after releasing mutex).
413 	 */
414 	switch (cmd) {
415 	case IPC_STAT:
416 		if (copyout(STRUCT_BUF(ds), arg, STRUCT_SIZE(ds)))
417 			return (set_errno(EFAULT));
418 		break;
419 
420 	case IPC_STAT64:
421 		if (copyout(&ds64, arg, sizeof (struct msqid_ds64)))
422 			return (set_errno(EFAULT));
423 		break;
424 	}
425 
426 	return (0);
427 }
428 
429 /*
430  * Remove all message queues associated with a given zone.  Called by
431  * zone_shutdown when the zone is halted.
432  */
433 /*ARGSUSED1*/
434 static void
435 msg_remove_zone(zoneid_t zoneid, void *arg)
436 {
437 	ipc_remove_zone(msq_svc, zoneid);
438 }
439 
440 /*
441  * msgget system call.
442  */
443 static int
444 msgget(key_t key, int msgflg)
445 {
446 	kmsqid_t	*qp;
447 	kmutex_t	*lock;
448 	int		id, error;
449 	int		ii;
450 	proc_t		*pp = curproc;
451 
452 top:
453 	if (error = ipc_get(msq_svc, key, msgflg, (kipc_perm_t **)&qp, &lock))
454 		return (set_errno(error));
455 
456 	if (IPC_FREE(&qp->msg_perm)) {
457 		mutex_exit(lock);
458 		mutex_exit(&pp->p_lock);
459 
460 		list_create(&qp->msg_list, sizeof (struct msg),
461 		    offsetof(struct msg, msg_node));
462 		qp->msg_qnum = 0;
463 		qp->msg_lspid = qp->msg_lrpid = 0;
464 		qp->msg_stime = qp->msg_rtime = 0;
465 		qp->msg_ctime = gethrestime_sec();
466 		for (ii = 0; ii < MAX_QNUM_CV; ii++)
467 			qp->msg_rcv_cnt[ii] = 0;
468 		qp->msg_snd_cnt = 0;
469 
470 		if (error = ipc_commit_begin(msq_svc, key, msgflg,
471 		    (kipc_perm_t *)qp)) {
472 			if (error == EAGAIN)
473 				goto top;
474 			return (set_errno(error));
475 		}
476 		qp->msg_qbytes = rctl_enforced_value(rc_process_msgmnb,
477 		    pp->p_rctls, pp);
478 		qp->msg_qmax = rctl_enforced_value(rc_process_msgtql,
479 		    pp->p_rctls, pp);
480 		lock = ipc_commit_end(msq_svc, &qp->msg_perm);
481 	}
482 #ifdef C2_AUDIT
483 	if (audit_active)
484 		audit_ipcget(AT_IPC_MSG, (void *)qp);
485 #endif
486 	id = qp->msg_perm.ipc_id;
487 	mutex_exit(lock);
488 	return (id);
489 }
490 
491 /*
492  * msgrcv system call.
493  */
494 static ssize_t
495 msgrcv(int msqid, struct ipcmsgbuf *msgp, size_t msgsz, long msgtyp, int msgflg)
496 {
497 	struct msg	*mp;	/* ptr to msg on q */
498 	struct msg	*smp;	/* ptr to best msg on q */
499 	kmsqid_t	*qp;	/* ptr to associated q */
500 	kmutex_t	*lock;
501 	size_t		xtsz;	/* transfer byte count */
502 	int		error = 0, copyerror = 0;
503 	int		cvres;
504 	STRUCT_HANDLE(ipcmsgbuf, umsgp);
505 	model_t		mdl = get_udatamodel();
506 
507 	CPU_STATS_ADDQ(CPU, sys, msg, 1);	/* bump msg send/rcv count */
508 	STRUCT_SET_HANDLE(umsgp, mdl, msgp);
509 
510 	if ((lock = ipc_lookup(msq_svc, msqid, (kipc_perm_t **)&qp)) == NULL)
511 		return ((ssize_t)set_errno(EINVAL));
512 	ipc_hold(msq_svc, (kipc_perm_t *)qp);
513 
514 	if (error = ipcperm_access(&qp->msg_perm, MSG_R, CRED()))
515 		goto msgrcv_out;
516 
517 findmsg:
518 	smp = NULL;
519 	mp = list_head(&qp->msg_list);
520 	if (msgtyp == 0) {
521 		smp = mp;
522 	} else {
523 		for (; mp; mp = list_next(&qp->msg_list, mp)) {
524 			if (msgtyp > 0) {
525 				if (msgtyp != mp->msg_type)
526 					continue;
527 				smp = mp;
528 				break;
529 			}
530 			if (mp->msg_type <= -msgtyp) {
531 				if (smp && smp->msg_type <= mp->msg_type)
532 					continue;
533 				smp = mp;
534 			}
535 		}
536 	}
537 
538 	if (smp) {
539 		/*
540 		 * Message found.
541 		 */
542 		if ((smp->msg_flags & MSG_RCVCOPY) == 0) {
543 			/*
544 			 * No one else is copying this message. Copy it.
545 			 */
546 			if (msgsz < smp->msg_size) {
547 				if ((msgflg & MSG_NOERROR) == 0) {
548 					error = E2BIG;
549 					goto msgrcv_out;
550 				} else {
551 					xtsz = msgsz;
552 				}
553 			} else {
554 				xtsz = smp->msg_size;
555 			}
556 
557 			/*
558 			 * Mark message as being copied out. Release mutex
559 			 * while copying out.
560 			 */
561 			ASSERT((smp->msg_flags & MSG_RCVCOPY) == 0);
562 			smp->msg_flags |= MSG_RCVCOPY;
563 			msg_hold(smp);
564 			mutex_exit(lock);
565 
566 			if (mdl == DATAMODEL_NATIVE) {
567 				copyerror = copyout(&smp->msg_type, msgp,
568 				    sizeof (smp->msg_type));
569 			} else {
570 				/*
571 				 * 32-bit callers need an imploded msg type.
572 				 */
573 				int32_t	msg_type32 = smp->msg_type;
574 
575 				copyerror = copyout(&msg_type32, msgp,
576 				    sizeof (msg_type32));
577 			}
578 
579 			if (copyerror == 0 && xtsz)
580 				copyerror = copyout(smp->msg_addr,
581 				    STRUCT_FADDR(umsgp, mtext), xtsz);
582 
583 			/*
584 			 * Reclaim mutex, make sure queue still exists,
585 			 * and remove message.
586 			 */
587 			lock = ipc_lock(msq_svc, qp->msg_perm.ipc_id);
588 			ASSERT(smp->msg_flags & MSG_RCVCOPY);
589 			smp->msg_flags &= ~MSG_RCVCOPY;
590 			msg_rele(smp);
591 
592 			if (IPC_FREE(&qp->msg_perm)) {
593 				error = EIDRM;
594 				goto msgrcv_out;
595 			}
596 			/*
597 			 * MSG_RCVCOPY was set while we dropped and reaquired
598 			 * the lock. A thread looking for same message type
599 			 * might have entered during that interval and seeing
600 			 * MSG_RCVCOPY set, would have landed up in the sleepq.
601 			 */
602 			cv_broadcast(&qp->msg_rcv_cv[MSG_QNUM(smp->msg_type)]);
603 			cv_broadcast(&qp->msg_rcv_cv[0]);
604 
605 			if (copyerror) {
606 				error = EFAULT;
607 				goto msgrcv_out;
608 			}
609 			qp->msg_lrpid = ttoproc(curthread)->p_pid;
610 			qp->msg_rtime = gethrestime_sec();
611 			msgunlink(qp, smp);
612 			goto msgrcv_out;
613 		}
614 
615 	} else {
616 		/*
617 		 * No message found.
618 		 */
619 		if (msgflg & IPC_NOWAIT) {
620 			error = ENOMSG;
621 			goto msgrcv_out;
622 		}
623 	}
624 
625 	/* Wait for new message */
626 	qp->msg_rcv_cnt[MSG_QNUM(msgtyp)]++;
627 	cvres = cv_wait_sig(&qp->msg_rcv_cv[MSG_QNUM(msgtyp)], lock);
628 	lock = ipc_relock(msq_svc, qp->msg_perm.ipc_id, lock);
629 	qp->msg_rcv_cnt[MSG_QNUM(msgtyp)]--;
630 
631 	if (IPC_FREE(&qp->msg_perm)) {
632 		error = EIDRM;
633 		goto msgrcv_out;
634 	}
635 	if (cvres == 0) {
636 		error = EINTR;
637 		goto msgrcv_out;
638 	}
639 
640 	goto findmsg;
641 
642 msgrcv_out:
643 	ipc_rele(msq_svc, (kipc_perm_t *)qp);
644 	if (error)
645 		return ((ssize_t)set_errno(error));
646 	return ((ssize_t)xtsz);
647 }
648 
649 /*
650  * msgids system call.
651  */
652 static int
653 msgids(int *buf, uint_t nids, uint_t *pnids)
654 {
655 	int error;
656 
657 	if (error = ipc_ids(msq_svc, buf, nids, pnids))
658 		return (set_errno(error));
659 
660 	return (0);
661 }
662 
663 #define	RND(x)		roundup((x), sizeof (size_t))
664 #define	RND32(x)	roundup((x), sizeof (size32_t))
665 
666 /*
667  * msgsnap system call.
668  */
669 static int
670 msgsnap(int msqid, caddr_t buf, size_t bufsz, long msgtyp)
671 {
672 	struct msg	*mp;	/* ptr to msg on q */
673 	kmsqid_t	*qp;	/* ptr to associated q */
674 	kmutex_t	*lock;
675 	size_t		size;
676 	size_t		nmsg;
677 	struct msg	**snaplist;
678 	int		error, i;
679 	model_t		mdl = get_udatamodel();
680 	STRUCT_DECL(msgsnap_head, head);
681 	STRUCT_DECL(msgsnap_mhead, mhead);
682 
683 	STRUCT_INIT(head, mdl);
684 	STRUCT_INIT(mhead, mdl);
685 
686 	if (bufsz < STRUCT_SIZE(head))
687 		return (set_errno(EINVAL));
688 
689 	if ((lock = ipc_lookup(msq_svc, msqid, (kipc_perm_t **)&qp)) == NULL)
690 		return (set_errno(EINVAL));
691 
692 	if (error = ipcperm_access(&qp->msg_perm, MSG_R, CRED())) {
693 		mutex_exit(lock);
694 		return (set_errno(error));
695 	}
696 	ipc_hold(msq_svc, (kipc_perm_t *)qp);
697 
698 	/*
699 	 * First compute the required buffer size and
700 	 * the number of messages on the queue.
701 	 */
702 	size = nmsg = 0;
703 	for (mp = list_head(&qp->msg_list); mp;
704 	    mp = list_next(&qp->msg_list, mp)) {
705 		if (msgtyp == 0 ||
706 		    (msgtyp > 0 && msgtyp == mp->msg_type) ||
707 		    (msgtyp < 0 && mp->msg_type <= -msgtyp)) {
708 			nmsg++;
709 			if (mdl == DATAMODEL_NATIVE)
710 				size += RND(mp->msg_size);
711 			else
712 				size += RND32(mp->msg_size);
713 		}
714 	}
715 
716 	size += STRUCT_SIZE(head) + nmsg * STRUCT_SIZE(mhead);
717 	if (size > bufsz)
718 		nmsg = 0;
719 
720 	if (nmsg > 0) {
721 		/*
722 		 * Mark the messages as being copied.
723 		 */
724 		snaplist = (struct msg **)kmem_alloc(nmsg *
725 		    sizeof (struct msg *), KM_SLEEP);
726 		i = 0;
727 		for (mp = list_head(&qp->msg_list); mp;
728 		    mp = list_next(&qp->msg_list, mp)) {
729 			if (msgtyp == 0 ||
730 			    (msgtyp > 0 && msgtyp == mp->msg_type) ||
731 			    (msgtyp < 0 && mp->msg_type <= -msgtyp)) {
732 				msg_hold(mp);
733 				snaplist[i] = mp;
734 				i++;
735 			}
736 		}
737 	}
738 	mutex_exit(lock);
739 
740 	/*
741 	 * Copy out the buffer header.
742 	 */
743 	STRUCT_FSET(head, msgsnap_size, size);
744 	STRUCT_FSET(head, msgsnap_nmsg, nmsg);
745 	if (copyout(STRUCT_BUF(head), buf, STRUCT_SIZE(head)))
746 		error = EFAULT;
747 
748 	buf += STRUCT_SIZE(head);
749 
750 	/*
751 	 * Now copy out the messages one by one.
752 	 */
753 	for (i = 0; i < nmsg; i++) {
754 		mp = snaplist[i];
755 		if (error == 0) {
756 			STRUCT_FSET(mhead, msgsnap_mlen, mp->msg_size);
757 			STRUCT_FSET(mhead, msgsnap_mtype, mp->msg_type);
758 			if (copyout(STRUCT_BUF(mhead), buf, STRUCT_SIZE(mhead)))
759 				error = EFAULT;
760 			buf += STRUCT_SIZE(mhead);
761 
762 			if (error == 0 &&
763 			    mp->msg_size != 0 &&
764 			    copyout(mp->msg_addr, buf, mp->msg_size))
765 				error = EFAULT;
766 			if (mdl == DATAMODEL_NATIVE)
767 				buf += RND(mp->msg_size);
768 			else
769 				buf += RND32(mp->msg_size);
770 		}
771 		lock = ipc_lock(msq_svc, qp->msg_perm.ipc_id);
772 		msg_rele(mp);
773 		/* Check for msg q deleted or reallocated */
774 		if (IPC_FREE(&qp->msg_perm))
775 			error = EIDRM;
776 		mutex_exit(lock);
777 	}
778 
779 	(void) ipc_lock(msq_svc, qp->msg_perm.ipc_id);
780 	ipc_rele(msq_svc, (kipc_perm_t *)qp);
781 
782 	if (nmsg > 0)
783 		kmem_free(snaplist, nmsg * sizeof (struct msg *));
784 
785 	if (error)
786 		return (set_errno(error));
787 	return (0);
788 }
789 
790 #define	MSG_PREALLOC_LIMIT 8192
791 
792 /*
793  * msgsnd system call.
794  */
795 static int
796 msgsnd(int msqid, struct ipcmsgbuf *msgp, size_t msgsz, int msgflg)
797 {
798 	kmsqid_t	*qp;
799 	kmutex_t	*lock = NULL;
800 	struct msg	*mp = NULL;
801 	long		type;
802 	int		error = 0;
803 	model_t		mdl = get_udatamodel();
804 	STRUCT_HANDLE(ipcmsgbuf, umsgp);
805 
806 	CPU_STATS_ADDQ(CPU, sys, msg, 1);	/* bump msg send/rcv count */
807 	STRUCT_SET_HANDLE(umsgp, mdl, msgp);
808 
809 	if (mdl == DATAMODEL_NATIVE) {
810 		if (copyin(msgp, &type, sizeof (type)))
811 			return (set_errno(EFAULT));
812 	} else {
813 		int32_t	type32;
814 		if (copyin(msgp, &type32, sizeof (type32)))
815 			return (set_errno(EFAULT));
816 		type = type32;
817 	}
818 
819 	if (type < 1)
820 		return (set_errno(EINVAL));
821 
822 	/*
823 	 * We want the value here large enough that most of the
824 	 * the message operations will use the "lockless" path,
825 	 * but small enough that a user can not reserve large
826 	 * chunks of kernel memory unless they have a valid
827 	 * reason to.
828 	 */
829 	if (msgsz <= MSG_PREALLOC_LIMIT) {
830 		/*
831 		 * We are small enough that we can afford to do the
832 		 * allocation now.  This saves dropping the lock
833 		 * and then reacquiring the lock.
834 		 */
835 		mp = kmem_zalloc(sizeof (struct msg), KM_SLEEP);
836 		mp->msg_copycnt = 1;
837 		mp->msg_size = msgsz;
838 		if (msgsz) {
839 			mp->msg_addr = kmem_alloc(msgsz, KM_SLEEP);
840 			if (copyin(STRUCT_FADDR(umsgp, mtext),
841 			    mp->msg_addr, msgsz) == -1) {
842 				error = EFAULT;
843 				goto msgsnd_out;
844 			}
845 		}
846 	}
847 
848 	if ((lock = ipc_lookup(msq_svc, msqid, (kipc_perm_t **)&qp)) == NULL) {
849 		error = EINVAL;
850 		goto msgsnd_out;
851 	}
852 
853 	ipc_hold(msq_svc, (kipc_perm_t *)qp);
854 
855 	if (msgsz > qp->msg_qbytes) {
856 		error = EINVAL;
857 		goto msgsnd_out;
858 	}
859 
860 	if (error = ipcperm_access(&qp->msg_perm, MSG_W, CRED()))
861 		goto msgsnd_out;
862 
863 top:
864 	/*
865 	 * Allocate space on q, message header, & buffer space.
866 	 */
867 	ASSERT(qp->msg_qnum <= qp->msg_qmax);
868 	while ((msgsz > qp->msg_qbytes - qp->msg_cbytes) ||
869 	    (qp->msg_qnum == qp->msg_qmax)) {
870 		int cvres;
871 
872 		if (msgflg & IPC_NOWAIT) {
873 			error = EAGAIN;
874 			goto msgsnd_out;
875 		}
876 
877 		qp->msg_snd_cnt++;
878 		cvres = cv_wait_sig(&qp->msg_snd_cv, lock);
879 		lock = ipc_relock(msq_svc, qp->msg_perm.ipc_id, lock);
880 		qp->msg_snd_cnt--;
881 
882 		if (IPC_FREE(&qp->msg_perm)) {
883 			error = EIDRM;
884 			goto msgsnd_out;
885 		}
886 
887 		if (cvres == 0) {
888 			error = EINTR;
889 			goto msgsnd_out;
890 		}
891 	}
892 
893 	if (mp == NULL) {
894 		int failure;
895 
896 		mutex_exit(lock);
897 		ASSERT(msgsz > 0);
898 		mp = kmem_zalloc(sizeof (struct msg), KM_SLEEP);
899 		mp->msg_addr = kmem_alloc(msgsz, KM_SLEEP);
900 		mp->msg_size = msgsz;
901 		mp->msg_copycnt = 1;
902 
903 		failure = (copyin(STRUCT_FADDR(umsgp, mtext),
904 		    mp->msg_addr, msgsz) == -1);
905 		lock = ipc_lock(msq_svc, qp->msg_perm.ipc_id);
906 		if (IPC_FREE(&qp->msg_perm)) {
907 			error = EIDRM;
908 			goto msgsnd_out;
909 		}
910 		if (failure) {
911 			error = EFAULT;
912 			goto msgsnd_out;
913 		}
914 		goto top;
915 	}
916 
917 	/*
918 	 * Everything is available, put msg on q.
919 	 */
920 	qp->msg_qnum++;
921 	qp->msg_cbytes += msgsz;
922 	qp->msg_lspid = curproc->p_pid;
923 	qp->msg_stime = gethrestime_sec();
924 	mp->msg_type = type;
925 	mp->msg_flags = 0;
926 	list_insert_tail(&qp->msg_list, mp);
927 	/*
928 	 * For all message type >= 1.
929 	 */
930 	if (qp->msg_rcv_cnt[MSG_QNUM(type)])
931 		cv_broadcast(&qp->msg_rcv_cv[MSG_QNUM(type)]);
932 	/*
933 	 * For all message type < 1.
934 	 */
935 	if (qp->msg_rcv_cnt[0])
936 		cv_broadcast(&qp->msg_rcv_cv[0]);
937 
938 msgsnd_out:
939 	if (lock)
940 		ipc_rele(msq_svc, (kipc_perm_t *)qp);	/* drops lock */
941 
942 	if (error) {
943 		if (mp)
944 			msg_rele(mp);
945 		return (set_errno(error));
946 	}
947 
948 	return (0);
949 }
950 
951 /*
952  * msgsys - System entry point for msgctl, msgget, msgrcv, and msgsnd
953  * system calls.
954  */
955 static ssize_t
956 msgsys(int opcode, uintptr_t a1, uintptr_t a2, uintptr_t a3,
957 	uintptr_t a4, uintptr_t a5)
958 {
959 	ssize_t error;
960 
961 	switch (opcode) {
962 	case MSGGET:
963 		error = msgget((key_t)a1, (int)a2);
964 		break;
965 	case MSGCTL:
966 		error = msgctl((int)a1, (int)a2, (void *)a3);
967 		break;
968 	case MSGRCV:
969 		error = msgrcv((int)a1, (struct ipcmsgbuf *)a2,
970 		    (size_t)a3, (long)a4, (int)a5);
971 		break;
972 	case MSGSND:
973 		error = msgsnd((int)a1, (struct ipcmsgbuf *)a2,
974 		    (size_t)a3, (int)a4);
975 		break;
976 	case MSGIDS:
977 		error = msgids((int *)a1, (uint_t)a2, (uint_t *)a3);
978 		break;
979 	case MSGSNAP:
980 		error = msgsnap((int)a1, (caddr_t)a2, (size_t)a3, (long)a4);
981 		break;
982 	default:
983 		error = set_errno(EINVAL);
984 		break;
985 	}
986 
987 	return (error);
988 }
989 
990 #ifdef	_SYSCALL32_IMPL
991 /*
992  * msgsys32 - System entry point for msgctl, msgget, msgrcv, and msgsnd
993  * system calls for 32-bit callers on LP64 kernel.
994  */
995 static ssize32_t
996 msgsys32(int opcode, uint32_t a1, uint32_t a2, uint32_t a3,
997 	uint32_t a4, uint32_t a5)
998 {
999 	ssize_t error;
1000 
1001 	switch (opcode) {
1002 	case MSGGET:
1003 		error = msgget((key_t)a1, (int)a2);
1004 		break;
1005 	case MSGCTL:
1006 		error = msgctl((int)a1, (int)a2, (void *)(uintptr_t)a3);
1007 		break;
1008 	case MSGRCV:
1009 		error = msgrcv((int)a1, (struct ipcmsgbuf *)(uintptr_t)a2,
1010 		    (size_t)a3, (long)(int32_t)a4, (int)a5);
1011 		break;
1012 	case MSGSND:
1013 		error = msgsnd((int)a1, (struct ipcmsgbuf *)(uintptr_t)a2,
1014 		    (size_t)(int32_t)a3, (int)a4);
1015 		break;
1016 	case MSGIDS:
1017 		error = msgids((int *)(uintptr_t)a1, (uint_t)a2,
1018 		    (uint_t *)(uintptr_t)a3);
1019 		break;
1020 	case MSGSNAP:
1021 		error = msgsnap((int)a1, (caddr_t)(uintptr_t)a2, (size_t)a3,
1022 		    (long)(int32_t)a4);
1023 		break;
1024 	default:
1025 		error = set_errno(EINVAL);
1026 		break;
1027 	}
1028 
1029 	return (error);
1030 }
1031 #endif	/* SYSCALL32_IMPL */
1032