xref: /titanic_51/usr/src/uts/common/os/msg.c (revision 2c5b6df145c068c61f714a0ccd0f4a3e64037fb5)
17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5b2eb1770Sudpa  * Common Development and Distribution License (the "License").
6b2eb1770Sudpa  * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate  *
87c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate  * and limitations under the License.
127c478bd9Sstevel@tonic-gate  *
137c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate  *
197c478bd9Sstevel@tonic-gate  * CDDL HEADER END
207c478bd9Sstevel@tonic-gate  */
217c478bd9Sstevel@tonic-gate /*
22*2c5b6df1Sdv142724  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
237c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
247c478bd9Sstevel@tonic-gate  */
257c478bd9Sstevel@tonic-gate 
267c478bd9Sstevel@tonic-gate /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
277c478bd9Sstevel@tonic-gate /*	  All Rights Reserved  	*/
287c478bd9Sstevel@tonic-gate 
297c478bd9Sstevel@tonic-gate 
307c478bd9Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
317c478bd9Sstevel@tonic-gate 
327c478bd9Sstevel@tonic-gate /*
337c478bd9Sstevel@tonic-gate  * Inter-Process Communication Message Facility.
347c478bd9Sstevel@tonic-gate  *
357c478bd9Sstevel@tonic-gate  * See os/ipc.c for a description of common IPC functionality.
367c478bd9Sstevel@tonic-gate  *
377c478bd9Sstevel@tonic-gate  * Resource controls
387c478bd9Sstevel@tonic-gate  * -----------------
397c478bd9Sstevel@tonic-gate  *
40824c205fSml93401  * Control:      zone.max-msg-ids (rc_zone_msgmni)
41824c205fSml93401  * Description:  Maximum number of message queue ids allowed a zone.
42824c205fSml93401  *
43824c205fSml93401  *   When msgget() is used to allocate a message queue, one id is
44824c205fSml93401  *   allocated.  If the id allocation doesn't succeed, msgget() fails
45824c205fSml93401  *   and errno is set to ENOSPC.  Upon successful msgctl(, IPC_RMID)
46824c205fSml93401  *   the id is deallocated.
47824c205fSml93401  *
487c478bd9Sstevel@tonic-gate  * Control:      project.max-msg-ids (rc_project_msgmni)
497c478bd9Sstevel@tonic-gate  * Description:  Maximum number of message queue ids allowed a project.
507c478bd9Sstevel@tonic-gate  *
517c478bd9Sstevel@tonic-gate  *   When msgget() is used to allocate a message queue, one id is
527c478bd9Sstevel@tonic-gate  *   allocated.  If the id allocation doesn't succeed, msgget() fails
537c478bd9Sstevel@tonic-gate  *   and errno is set to ENOSPC.  Upon successful msgctl(, IPC_RMID)
547c478bd9Sstevel@tonic-gate  *   the id is deallocated.
557c478bd9Sstevel@tonic-gate  *
567c478bd9Sstevel@tonic-gate  * Control:      process.max-msg-qbytes (rc_process_msgmnb)
577c478bd9Sstevel@tonic-gate  * Description:  Maximum number of bytes of messages on a message queue.
587c478bd9Sstevel@tonic-gate  *
597c478bd9Sstevel@tonic-gate  *   When msgget() successfully allocates a message queue, the minimum
607c478bd9Sstevel@tonic-gate  *   enforced value of this limit is used to initialize msg_qbytes.
617c478bd9Sstevel@tonic-gate  *
627c478bd9Sstevel@tonic-gate  * Control:      process.max-msg-messages (rc_process_msgtql)
637c478bd9Sstevel@tonic-gate  * Description:  Maximum number of messages on a message queue.
647c478bd9Sstevel@tonic-gate  *
657c478bd9Sstevel@tonic-gate  *   When msgget() successfully allocates a message queue, the minimum
667c478bd9Sstevel@tonic-gate  *   enforced value of this limit is used to initialize a per-queue
677c478bd9Sstevel@tonic-gate  *   limit on the number of messages.
687c478bd9Sstevel@tonic-gate  */
697c478bd9Sstevel@tonic-gate 
707c478bd9Sstevel@tonic-gate #include <sys/types.h>
717c478bd9Sstevel@tonic-gate #include <sys/t_lock.h>
727c478bd9Sstevel@tonic-gate #include <sys/param.h>
737c478bd9Sstevel@tonic-gate #include <sys/cred.h>
747c478bd9Sstevel@tonic-gate #include <sys/user.h>
757c478bd9Sstevel@tonic-gate #include <sys/proc.h>
767c478bd9Sstevel@tonic-gate #include <sys/time.h>
777c478bd9Sstevel@tonic-gate #include <sys/ipc.h>
787c478bd9Sstevel@tonic-gate #include <sys/ipc_impl.h>
797c478bd9Sstevel@tonic-gate #include <sys/msg.h>
807c478bd9Sstevel@tonic-gate #include <sys/msg_impl.h>
817c478bd9Sstevel@tonic-gate #include <sys/list.h>
827c478bd9Sstevel@tonic-gate #include <sys/systm.h>
837c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h>
847c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h>
857c478bd9Sstevel@tonic-gate #include <sys/kmem.h>
867c478bd9Sstevel@tonic-gate #include <sys/ddi.h>
877c478bd9Sstevel@tonic-gate #include <sys/errno.h>
887c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h>
897c478bd9Sstevel@tonic-gate #include <sys/debug.h>
907c478bd9Sstevel@tonic-gate #include <sys/project.h>
917c478bd9Sstevel@tonic-gate #include <sys/modctl.h>
927c478bd9Sstevel@tonic-gate #include <sys/syscall.h>
937c478bd9Sstevel@tonic-gate #include <sys/policy.h>
947c478bd9Sstevel@tonic-gate #include <sys/zone.h>
957c478bd9Sstevel@tonic-gate 
967c478bd9Sstevel@tonic-gate #include <c2/audit.h>
977c478bd9Sstevel@tonic-gate 
987c478bd9Sstevel@tonic-gate /*
997c478bd9Sstevel@tonic-gate  * The following tunables are obsolete.  Though for compatibility we
1007c478bd9Sstevel@tonic-gate  * still read and interpret msginfo_msgmnb, msginfo_msgmni, and
1017c478bd9Sstevel@tonic-gate  * msginfo_msgtql (see os/project.c and os/rctl_proc.c), the preferred
1027c478bd9Sstevel@tonic-gate  * mechanism for administrating the IPC Message facility is through the
1037c478bd9Sstevel@tonic-gate  * resource controls described at the top of this file.
1047c478bd9Sstevel@tonic-gate  */
1057c478bd9Sstevel@tonic-gate size_t	msginfo_msgmax = 2048;	/* (obsolete) */
1067c478bd9Sstevel@tonic-gate size_t	msginfo_msgmnb = 4096;	/* (obsolete) */
1077c478bd9Sstevel@tonic-gate int	msginfo_msgmni = 50;	/* (obsolete) */
1087c478bd9Sstevel@tonic-gate int	msginfo_msgtql = 40;	/* (obsolete) */
1097c478bd9Sstevel@tonic-gate int	msginfo_msgssz = 8;	/* (obsolete) */
1107c478bd9Sstevel@tonic-gate int	msginfo_msgmap = 0;	/* (obsolete) */
1117c478bd9Sstevel@tonic-gate ushort_t msginfo_msgseg = 1024;	/* (obsolete) */
1127c478bd9Sstevel@tonic-gate 
113824c205fSml93401 extern rctl_hndl_t rc_zone_msgmni;
1147c478bd9Sstevel@tonic-gate extern rctl_hndl_t rc_project_msgmni;
1157c478bd9Sstevel@tonic-gate extern rctl_hndl_t rc_process_msgmnb;
1167c478bd9Sstevel@tonic-gate extern rctl_hndl_t rc_process_msgtql;
1177c478bd9Sstevel@tonic-gate static ipc_service_t *msq_svc;
1187c478bd9Sstevel@tonic-gate static zone_key_t msg_zone_key;
1197c478bd9Sstevel@tonic-gate 
1207c478bd9Sstevel@tonic-gate static void msg_dtor(kipc_perm_t *);
1217c478bd9Sstevel@tonic-gate static void msg_rmid(kipc_perm_t *);
1227c478bd9Sstevel@tonic-gate static void msg_remove_zone(zoneid_t, void *);
1237c478bd9Sstevel@tonic-gate 
1247c478bd9Sstevel@tonic-gate /*
1257c478bd9Sstevel@tonic-gate  * Module linkage information for the kernel.
1267c478bd9Sstevel@tonic-gate  */
1277c478bd9Sstevel@tonic-gate static ssize_t msgsys(int opcode, uintptr_t a0, uintptr_t a1, uintptr_t a2,
1287c478bd9Sstevel@tonic-gate 	uintptr_t a4, uintptr_t a5);
1297c478bd9Sstevel@tonic-gate 
1307c478bd9Sstevel@tonic-gate static struct sysent ipcmsg_sysent = {
1317c478bd9Sstevel@tonic-gate 	6,
1327c478bd9Sstevel@tonic-gate #ifdef	_LP64
1337c478bd9Sstevel@tonic-gate 	SE_ARGC | SE_NOUNLOAD | SE_64RVAL,
1347c478bd9Sstevel@tonic-gate #else
1357c478bd9Sstevel@tonic-gate 	SE_ARGC | SE_NOUNLOAD | SE_32RVAL1,
1367c478bd9Sstevel@tonic-gate #endif
1377c478bd9Sstevel@tonic-gate 	(int (*)())msgsys
1387c478bd9Sstevel@tonic-gate };
1397c478bd9Sstevel@tonic-gate 
1407c478bd9Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
1417c478bd9Sstevel@tonic-gate static ssize32_t msgsys32(int opcode, uint32_t a0, uint32_t a1, uint32_t a2,
1427c478bd9Sstevel@tonic-gate 	uint32_t a4, uint32_t a5);
1437c478bd9Sstevel@tonic-gate 
1447c478bd9Sstevel@tonic-gate static struct sysent ipcmsg_sysent32 = {
1457c478bd9Sstevel@tonic-gate 	6,
1467c478bd9Sstevel@tonic-gate 	SE_ARGC | SE_NOUNLOAD | SE_32RVAL1,
1477c478bd9Sstevel@tonic-gate 	(int (*)())msgsys32
1487c478bd9Sstevel@tonic-gate };
1497c478bd9Sstevel@tonic-gate #endif	/* _SYSCALL32_IMPL */
1507c478bd9Sstevel@tonic-gate 
1517c478bd9Sstevel@tonic-gate static struct modlsys modlsys = {
1527c478bd9Sstevel@tonic-gate 	&mod_syscallops, "System V message facility", &ipcmsg_sysent
1537c478bd9Sstevel@tonic-gate };
1547c478bd9Sstevel@tonic-gate 
1557c478bd9Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL
1567c478bd9Sstevel@tonic-gate static struct modlsys modlsys32 = {
1577c478bd9Sstevel@tonic-gate 	&mod_syscallops32, "32-bit System V message facility", &ipcmsg_sysent32
1587c478bd9Sstevel@tonic-gate };
1597c478bd9Sstevel@tonic-gate #endif
1607c478bd9Sstevel@tonic-gate 
161*2c5b6df1Sdv142724 /*
162*2c5b6df1Sdv142724  *      Big Theory statement for message queue correctness
163*2c5b6df1Sdv142724  *
164*2c5b6df1Sdv142724  * The msgrcv and msgsnd functions no longer uses cv_broadcast to wake up
165*2c5b6df1Sdv142724  * receivers who are waiting for an event.  Using the cv_broadcast method
166*2c5b6df1Sdv142724  * resulted in negative scaling when the number of waiting receivers are large
167*2c5b6df1Sdv142724  * (the thundering herd problem).  Instead, the receivers waiting to receive a
168*2c5b6df1Sdv142724  * message are now linked in a queue-like fashion and awaken one at a time in
169*2c5b6df1Sdv142724  * a controlled manner.
170*2c5b6df1Sdv142724  *
171*2c5b6df1Sdv142724  * Receivers can block on two different classes of waiting list:
172*2c5b6df1Sdv142724  *    1) "sendwait" list, which is the more complex list of the two.  The
173*2c5b6df1Sdv142724  *	  receiver will be awakened by a sender posting a new message.  There
174*2c5b6df1Sdv142724  *	  are two types of "sendwait" list used:
175*2c5b6df1Sdv142724  *		a) msg_wait_snd: handles all receivers who are looking for
176*2c5b6df1Sdv142724  *		   a message type >= 0, but was unable to locate a match.
177*2c5b6df1Sdv142724  *
178*2c5b6df1Sdv142724  *		   slot 0: reserved for receivers that have designated they
179*2c5b6df1Sdv142724  *			   will take any message type.
180*2c5b6df1Sdv142724  *		   rest:   consist of receivers requesting a specific type
181*2c5b6df1Sdv142724  *			   but the type was not present.  The entries are
182*2c5b6df1Sdv142724  *			   hashed into a bucket in an attempt to keep
183*2c5b6df1Sdv142724  *			   any list search relatively short.
184*2c5b6df1Sdv142724  * 		b) msg_wait_snd_ngt: handles all receivers that have designated
185*2c5b6df1Sdv142724  *		   a negative message type. Unlike msg_wait_snd, the hash bucket
186*2c5b6df1Sdv142724  *		   serves a range of negative message types (-1 to -5, -6 to -10
187*2c5b6df1Sdv142724  *		   and so forth), where the last bucket is reserved for all the
188*2c5b6df1Sdv142724  *		   negative message types that hash outside of MSG_MAX_QNUM - 1.
189*2c5b6df1Sdv142724  *		   This is done this way to simplify the operation of locating a
190*2c5b6df1Sdv142724  *		   negative message type.
191*2c5b6df1Sdv142724  *
192*2c5b6df1Sdv142724  *    2) "copyout" list, where the receiver is awakened by another
193*2c5b6df1Sdv142724  *	 receiver after a message is copied out.  This is a linked list
194*2c5b6df1Sdv142724  *	 of waiters that are awakened one at a time.  Although the solution is
195*2c5b6df1Sdv142724  *	 not optimal, the complexity that would be added in for waking
196*2c5b6df1Sdv142724  *	 up the right entry far exceeds any potential pay back (too many
197*2c5b6df1Sdv142724  *	 correctness and corner case issues).
198*2c5b6df1Sdv142724  *
199*2c5b6df1Sdv142724  * The lists are doubly linked.  In the case of the "sendwait"
200*2c5b6df1Sdv142724  * list, this allows the thread to remove itself from the list without having
201*2c5b6df1Sdv142724  * to traverse the list.  In the case of the "copyout" list it simply allows
202*2c5b6df1Sdv142724  * us to use common functions with the "sendwait" list.
203*2c5b6df1Sdv142724  *
204*2c5b6df1Sdv142724  * To make sure receivers are not hung out to dry, we must guarantee:
205*2c5b6df1Sdv142724  *    1. If any queued message matches any receiver, then at least one
206*2c5b6df1Sdv142724  *       matching receiver must be processing the request.
207*2c5b6df1Sdv142724  *    2. Blocking on the copyout queue is only temporary while messages
208*2c5b6df1Sdv142724  *	 are being copied out.  The process is guaranted to wakeup
209*2c5b6df1Sdv142724  *	 when it gets to front of the queue (copyout is a FIFO).
210*2c5b6df1Sdv142724  *
211*2c5b6df1Sdv142724  * Rules for blocking and waking up:
212*2c5b6df1Sdv142724  *   1. A receiver entering msgrcv must examine all messages for a match
213*2c5b6df1Sdv142724  *      before blocking on a sendwait queue.
214*2c5b6df1Sdv142724  *   2. If the receiver blocks because the message it chose is already
215*2c5b6df1Sdv142724  *	being copied out, then when it wakes up needs to start start
216*2c5b6df1Sdv142724  *	checking the messages from the beginning.
217*2c5b6df1Sdv142724  *   3) When ever a process returns from msgrcv for any reason, if it
218*2c5b6df1Sdv142724  *	had attempted to copy a message or blocked waiting for a copy
219*2c5b6df1Sdv142724  *	to complete it needs to wakeup the next receiver blocked on
220*2c5b6df1Sdv142724  *	a copy out.
221*2c5b6df1Sdv142724  *   4) When a message is sent, the sender selects a process waiting
222*2c5b6df1Sdv142724  *	for that type of message.  This selection process rotates between
223*2c5b6df1Sdv142724  *	receivers types of 0, negative and positive to prevent starvation of
224*2c5b6df1Sdv142724  *	any one particular receiver type.
225*2c5b6df1Sdv142724  *   5) The following are the scenarios for processes that are awakened
226*2c5b6df1Sdv142724  *	by a msgsnd:
227*2c5b6df1Sdv142724  *		a) The process finds the message and is able to copy
228*2c5b6df1Sdv142724  *		   it out.  Once complete, the process returns.
229*2c5b6df1Sdv142724  *		b) The message that was sent that triggered the wakeup is no
230*2c5b6df1Sdv142724  *		   longer available (another process found the message first).
231*2c5b6df1Sdv142724  *		   We issue a wakeup on copy queue and then go back to
232*2c5b6df1Sdv142724  *		   sleep waiting for another matching message to be sent.
233*2c5b6df1Sdv142724  *		c) The message that was supposed to be processed was
234*2c5b6df1Sdv142724  *		   already serviced by another process.  However a different
235*2c5b6df1Sdv142724  *		   message is present which we can service.  The message
236*2c5b6df1Sdv142724  *		   is copied and the process returns.
237*2c5b6df1Sdv142724  *		d) The message is found, but some sort of error occurs that
238*2c5b6df1Sdv142724  *		   prevents the message from being copied.  The receiver
239*2c5b6df1Sdv142724  *		   wakes up the next sender that can service this message
240*2c5b6df1Sdv142724  *		   type and returns an error to the caller.
241*2c5b6df1Sdv142724  *		e) The message is found, but it is marked as being copied
242*2c5b6df1Sdv142724  *		   out.  The receiver then goes to sleep on the copyout
243*2c5b6df1Sdv142724  *		   queue where it will be awakened again sometime in the future.
244*2c5b6df1Sdv142724  *
245*2c5b6df1Sdv142724  *
246*2c5b6df1Sdv142724  *   6) Whenever a message is found that matches the message type designated,
247*2c5b6df1Sdv142724  * 	but is being copied out we have to block on the copyout queue.
248*2c5b6df1Sdv142724  *	After process copying finishes the copy out, it  must wakeup (either
249*2c5b6df1Sdv142724  *	directly or indirectly) all receivers who blocked on its copyout,
250*2c5b6df1Sdv142724  *	so they are guaranteed a chance to examine the remaining messages.
251*2c5b6df1Sdv142724  *	This is implemented via a chain of wakeups: Y wakes X, who wakes Z,
252*2c5b6df1Sdv142724  *	and so on.  The chain cannot be broken.  This leads to the following
253*2c5b6df1Sdv142724  *	cases:
254*2c5b6df1Sdv142724  *		a) A receiver is finished copying the message (or encountered)
255*2c5b6df1Sdv142724  *		   an error), the first entry on the copyout queue is woken
256*2c5b6df1Sdv142724  *		   up.
257*2c5b6df1Sdv142724  *		b) When the receiver is woken up, it attempts to locate
258*2c5b6df1Sdv142724  *		   a message type match.
259*2c5b6df1Sdv142724  *		c) If a message type is found and
260*2c5b6df1Sdv142724  *			-- MSG_RCVCOPY flag is not set, the message is
261*2c5b6df1Sdv142724  *			   marked for copying out.  Regardless of the copyout
262*2c5b6df1Sdv142724  *			   success the next entry on the copyout queue is
263*2c5b6df1Sdv142724  *			   awakened and the operation is completed.
264*2c5b6df1Sdv142724  *			-- MSG_RCVCOPY is set, we simply go back to sleep again
265*2c5b6df1Sdv142724  *			   on the copyout queue.
266*2c5b6df1Sdv142724  *		d) If the message type is not found then we wakeup the next
267*2c5b6df1Sdv142724  *		   process on the copyout queue.
268*2c5b6df1Sdv142724  */
269*2c5b6df1Sdv142724 
270*2c5b6df1Sdv142724 static ulong_t msg_type_hash(long);
271*2c5b6df1Sdv142724 static int msgq_check_err(kmsqid_t *qp, int cvres);
272*2c5b6df1Sdv142724 static int msg_rcvq_sleep(list_t *, msgq_wakeup_t *, kmutex_t **,
273*2c5b6df1Sdv142724     kmsqid_t *);
274*2c5b6df1Sdv142724 static int msg_copyout(kmsqid_t *, long, kmutex_t **, size_t *, size_t,
275*2c5b6df1Sdv142724     struct msg *, struct ipcmsgbuf *, int);
276*2c5b6df1Sdv142724 static void msg_rcvq_wakeup_all(list_t *);
277*2c5b6df1Sdv142724 static void msg_wakeup_rdr(kmsqid_t *, msg_select_t **, long);
278*2c5b6df1Sdv142724 static msgq_wakeup_t *msg_fnd_any_snd(kmsqid_t *, int, long);
279*2c5b6df1Sdv142724 static msgq_wakeup_t *msg_fnd_any_rdr(kmsqid_t *, int, long);
280*2c5b6df1Sdv142724 static msgq_wakeup_t *msg_fnd_neg_snd(kmsqid_t *, int, long);
281*2c5b6df1Sdv142724 static msgq_wakeup_t *msg_fnd_spc_snd(kmsqid_t *, int, long);
282*2c5b6df1Sdv142724 static struct msg *msgrcv_lookup(kmsqid_t *, long);
283*2c5b6df1Sdv142724 
284*2c5b6df1Sdv142724 msg_select_t msg_fnd_sndr[] = {
285*2c5b6df1Sdv142724 	{ msg_fnd_any_snd, &msg_fnd_sndr[1] },
286*2c5b6df1Sdv142724 	{ msg_fnd_spc_snd, &msg_fnd_sndr[2] },
287*2c5b6df1Sdv142724 	{ msg_fnd_neg_snd, &msg_fnd_sndr[0] }
288*2c5b6df1Sdv142724 };
289*2c5b6df1Sdv142724 
290*2c5b6df1Sdv142724 msg_select_t msg_fnd_rdr[1] = {
291*2c5b6df1Sdv142724 	{ msg_fnd_any_rdr, &msg_fnd_rdr[0] },
292*2c5b6df1Sdv142724 };
293*2c5b6df1Sdv142724 
2947c478bd9Sstevel@tonic-gate static struct modlinkage modlinkage = {
2957c478bd9Sstevel@tonic-gate 	MODREV_1,
2967c478bd9Sstevel@tonic-gate 	&modlsys,
2977c478bd9Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL
2987c478bd9Sstevel@tonic-gate 	&modlsys32,
2997c478bd9Sstevel@tonic-gate #endif
3007c478bd9Sstevel@tonic-gate 	NULL
3017c478bd9Sstevel@tonic-gate };
3027c478bd9Sstevel@tonic-gate 
3037c478bd9Sstevel@tonic-gate 
3047c478bd9Sstevel@tonic-gate int
3057c478bd9Sstevel@tonic-gate _init(void)
3067c478bd9Sstevel@tonic-gate {
3077c478bd9Sstevel@tonic-gate 	int result;
3087c478bd9Sstevel@tonic-gate 
309824c205fSml93401 	msq_svc = ipcs_create("msqids", rc_project_msgmni, rc_zone_msgmni,
310824c205fSml93401 	    sizeof (kmsqid_t), msg_dtor, msg_rmid, AT_IPC_MSG,
311824c205fSml93401 	    offsetof(ipc_rqty_t, ipcq_msgmni));
3127c478bd9Sstevel@tonic-gate 	zone_key_create(&msg_zone_key, NULL, msg_remove_zone, NULL);
3137c478bd9Sstevel@tonic-gate 
3147c478bd9Sstevel@tonic-gate 	if ((result = mod_install(&modlinkage)) == 0)
3157c478bd9Sstevel@tonic-gate 		return (0);
3167c478bd9Sstevel@tonic-gate 
3177c478bd9Sstevel@tonic-gate 	(void) zone_key_delete(msg_zone_key);
3187c478bd9Sstevel@tonic-gate 	ipcs_destroy(msq_svc);
3197c478bd9Sstevel@tonic-gate 
3207c478bd9Sstevel@tonic-gate 	return (result);
3217c478bd9Sstevel@tonic-gate }
3227c478bd9Sstevel@tonic-gate 
3237c478bd9Sstevel@tonic-gate int
3247c478bd9Sstevel@tonic-gate _fini(void)
3257c478bd9Sstevel@tonic-gate {
3267c478bd9Sstevel@tonic-gate 	return (EBUSY);
3277c478bd9Sstevel@tonic-gate }
3287c478bd9Sstevel@tonic-gate 
3297c478bd9Sstevel@tonic-gate int
3307c478bd9Sstevel@tonic-gate _info(struct modinfo *modinfop)
3317c478bd9Sstevel@tonic-gate {
3327c478bd9Sstevel@tonic-gate 	return (mod_info(&modlinkage, modinfop));
3337c478bd9Sstevel@tonic-gate }
3347c478bd9Sstevel@tonic-gate 
3357c478bd9Sstevel@tonic-gate static void
3367c478bd9Sstevel@tonic-gate msg_dtor(kipc_perm_t *perm)
3377c478bd9Sstevel@tonic-gate {
3387c478bd9Sstevel@tonic-gate 	kmsqid_t *qp = (kmsqid_t *)perm;
339b2eb1770Sudpa 	int		ii;
3407c478bd9Sstevel@tonic-gate 
341*2c5b6df1Sdv142724 	for (ii = 0; ii <= MSG_MAX_QNUM; ii++) {
342*2c5b6df1Sdv142724 		ASSERT(list_is_empty(&qp->msg_wait_snd[ii]));
343*2c5b6df1Sdv142724 		ASSERT(list_is_empty(&qp->msg_wait_snd_ngt[ii]));
344*2c5b6df1Sdv142724 		list_destroy(&qp->msg_wait_snd[ii]);
345*2c5b6df1Sdv142724 		list_destroy(&qp->msg_wait_snd_ngt[ii]);
346*2c5b6df1Sdv142724 	}
347*2c5b6df1Sdv142724 	ASSERT(list_is_empty(&qp->msg_cpy_block));
348*2c5b6df1Sdv142724 	list_destroy(&qp->msg_cpy_block);
3497c478bd9Sstevel@tonic-gate 	ASSERT(qp->msg_snd_cnt == 0);
3507c478bd9Sstevel@tonic-gate 	ASSERT(qp->msg_cbytes == 0);
3517c478bd9Sstevel@tonic-gate 	list_destroy(&qp->msg_list);
3527c478bd9Sstevel@tonic-gate }
3537c478bd9Sstevel@tonic-gate 
3547c478bd9Sstevel@tonic-gate 
3557c478bd9Sstevel@tonic-gate #define	msg_hold(mp)	(mp)->msg_copycnt++
3567c478bd9Sstevel@tonic-gate 
3577c478bd9Sstevel@tonic-gate /*
3587c478bd9Sstevel@tonic-gate  * msg_rele - decrement the reference count on the message.  When count
3597c478bd9Sstevel@tonic-gate  * reaches zero, free message header and contents.
3607c478bd9Sstevel@tonic-gate  */
3617c478bd9Sstevel@tonic-gate static void
3627c478bd9Sstevel@tonic-gate msg_rele(struct msg *mp)
3637c478bd9Sstevel@tonic-gate {
3647c478bd9Sstevel@tonic-gate 	ASSERT(mp->msg_copycnt > 0);
3657c478bd9Sstevel@tonic-gate 	if (mp->msg_copycnt-- == 1) {
3667c478bd9Sstevel@tonic-gate 		if (mp->msg_addr)
3677c478bd9Sstevel@tonic-gate 			kmem_free(mp->msg_addr, mp->msg_size);
3687c478bd9Sstevel@tonic-gate 		kmem_free(mp, sizeof (struct msg));
3697c478bd9Sstevel@tonic-gate 	}
3707c478bd9Sstevel@tonic-gate }
3717c478bd9Sstevel@tonic-gate 
3727c478bd9Sstevel@tonic-gate /*
3737c478bd9Sstevel@tonic-gate  * msgunlink - Unlink msg from queue, decrement byte count and wake up anyone
3747c478bd9Sstevel@tonic-gate  * waiting for free bytes on queue.
3757c478bd9Sstevel@tonic-gate  *
3767c478bd9Sstevel@tonic-gate  * Called with queue locked.
3777c478bd9Sstevel@tonic-gate  */
3787c478bd9Sstevel@tonic-gate static void
3797c478bd9Sstevel@tonic-gate msgunlink(kmsqid_t *qp, struct msg *mp)
3807c478bd9Sstevel@tonic-gate {
3817c478bd9Sstevel@tonic-gate 	list_remove(&qp->msg_list, mp);
3827c478bd9Sstevel@tonic-gate 	qp->msg_qnum--;
3837c478bd9Sstevel@tonic-gate 	qp->msg_cbytes -= mp->msg_size;
3847c478bd9Sstevel@tonic-gate 	msg_rele(mp);
3857c478bd9Sstevel@tonic-gate 
3867c478bd9Sstevel@tonic-gate 	/* Wake up waiting writers */
3877c478bd9Sstevel@tonic-gate 	if (qp->msg_snd_cnt)
3887c478bd9Sstevel@tonic-gate 		cv_broadcast(&qp->msg_snd_cv);
3897c478bd9Sstevel@tonic-gate }
3907c478bd9Sstevel@tonic-gate 
3917c478bd9Sstevel@tonic-gate static void
3927c478bd9Sstevel@tonic-gate msg_rmid(kipc_perm_t *perm)
3937c478bd9Sstevel@tonic-gate {
3947c478bd9Sstevel@tonic-gate 	kmsqid_t *qp = (kmsqid_t *)perm;
3957c478bd9Sstevel@tonic-gate 	struct msg *mp;
396b2eb1770Sudpa 	int		ii;
3977c478bd9Sstevel@tonic-gate 
3987c478bd9Sstevel@tonic-gate 
3997c478bd9Sstevel@tonic-gate 	while ((mp = list_head(&qp->msg_list)) != NULL)
4007c478bd9Sstevel@tonic-gate 		msgunlink(qp, mp);
4017c478bd9Sstevel@tonic-gate 	ASSERT(qp->msg_cbytes == 0);
4027c478bd9Sstevel@tonic-gate 
403*2c5b6df1Sdv142724 	/*
404*2c5b6df1Sdv142724 	 * Wake up everyone who is in a wait state of some sort
405*2c5b6df1Sdv142724 	 * for this message queue.
406*2c5b6df1Sdv142724 	 */
407*2c5b6df1Sdv142724 	for (ii = 0; ii <= MSG_MAX_QNUM; ii++) {
408*2c5b6df1Sdv142724 		msg_rcvq_wakeup_all(&qp->msg_wait_snd[ii]);
409*2c5b6df1Sdv142724 		msg_rcvq_wakeup_all(&qp->msg_wait_snd_ngt[ii]);
410b2eb1770Sudpa 	}
411*2c5b6df1Sdv142724 	msg_rcvq_wakeup_all(&qp->msg_cpy_block);
4127c478bd9Sstevel@tonic-gate 	if (qp->msg_snd_cnt)
4137c478bd9Sstevel@tonic-gate 		cv_broadcast(&qp->msg_snd_cv);
4147c478bd9Sstevel@tonic-gate }
4157c478bd9Sstevel@tonic-gate 
4167c478bd9Sstevel@tonic-gate /*
4177c478bd9Sstevel@tonic-gate  * msgctl system call.
4187c478bd9Sstevel@tonic-gate  *
4197c478bd9Sstevel@tonic-gate  * gets q lock (via ipc_lookup), releases before return.
4207c478bd9Sstevel@tonic-gate  * may call users of msg_lock
4217c478bd9Sstevel@tonic-gate  */
4227c478bd9Sstevel@tonic-gate static int
4237c478bd9Sstevel@tonic-gate msgctl(int msgid, int cmd, void *arg)
4247c478bd9Sstevel@tonic-gate {
4257c478bd9Sstevel@tonic-gate 	STRUCT_DECL(msqid_ds, ds);		/* SVR4 queue work area */
4267c478bd9Sstevel@tonic-gate 	kmsqid_t		*qp;		/* ptr to associated q */
427*2c5b6df1Sdv142724 	int			error;
4287c478bd9Sstevel@tonic-gate 	struct	cred		*cr;
4297c478bd9Sstevel@tonic-gate 	model_t	mdl = get_udatamodel();
4307c478bd9Sstevel@tonic-gate 	struct msqid_ds64	ds64;
4317c478bd9Sstevel@tonic-gate 	kmutex_t		*lock;
4327c478bd9Sstevel@tonic-gate 	proc_t			*pp = curproc;
4337c478bd9Sstevel@tonic-gate 
4347c478bd9Sstevel@tonic-gate 	STRUCT_INIT(ds, mdl);
4357c478bd9Sstevel@tonic-gate 	cr = CRED();
4367c478bd9Sstevel@tonic-gate 
4377c478bd9Sstevel@tonic-gate 	/*
4387c478bd9Sstevel@tonic-gate 	 * Perform pre- or non-lookup actions (e.g. copyins, RMID).
4397c478bd9Sstevel@tonic-gate 	 */
4407c478bd9Sstevel@tonic-gate 	switch (cmd) {
4417c478bd9Sstevel@tonic-gate 	case IPC_SET:
4427c478bd9Sstevel@tonic-gate 		if (copyin(arg, STRUCT_BUF(ds), STRUCT_SIZE(ds)))
4437c478bd9Sstevel@tonic-gate 			return (set_errno(EFAULT));
4447c478bd9Sstevel@tonic-gate 		break;
4457c478bd9Sstevel@tonic-gate 
4467c478bd9Sstevel@tonic-gate 	case IPC_SET64:
4477c478bd9Sstevel@tonic-gate 		if (copyin(arg, &ds64, sizeof (struct msqid_ds64)))
4487c478bd9Sstevel@tonic-gate 			return (set_errno(EFAULT));
4497c478bd9Sstevel@tonic-gate 		break;
4507c478bd9Sstevel@tonic-gate 
4517c478bd9Sstevel@tonic-gate 	case IPC_RMID:
4527c478bd9Sstevel@tonic-gate 		if (error = ipc_rmid(msq_svc, msgid, cr))
4537c478bd9Sstevel@tonic-gate 			return (set_errno(error));
4547c478bd9Sstevel@tonic-gate 		return (0);
4557c478bd9Sstevel@tonic-gate 	}
4567c478bd9Sstevel@tonic-gate 
4577c478bd9Sstevel@tonic-gate 	/*
4587c478bd9Sstevel@tonic-gate 	 * get msqid_ds for this msgid
4597c478bd9Sstevel@tonic-gate 	 */
4607c478bd9Sstevel@tonic-gate 	if ((lock = ipc_lookup(msq_svc, msgid, (kipc_perm_t **)&qp)) == NULL)
4617c478bd9Sstevel@tonic-gate 		return (set_errno(EINVAL));
4627c478bd9Sstevel@tonic-gate 
4637c478bd9Sstevel@tonic-gate 	switch (cmd) {
4647c478bd9Sstevel@tonic-gate 	case IPC_SET:
4657c478bd9Sstevel@tonic-gate 		if (STRUCT_FGET(ds, msg_qbytes) > qp->msg_qbytes &&
4667c478bd9Sstevel@tonic-gate 		    secpolicy_ipc_config(cr) != 0) {
4677c478bd9Sstevel@tonic-gate 			mutex_exit(lock);
4687c478bd9Sstevel@tonic-gate 			return (set_errno(EPERM));
4697c478bd9Sstevel@tonic-gate 		}
4707c478bd9Sstevel@tonic-gate 		if (error = ipcperm_set(msq_svc, cr, &qp->msg_perm,
4717c478bd9Sstevel@tonic-gate 		    &STRUCT_BUF(ds)->msg_perm, mdl)) {
4727c478bd9Sstevel@tonic-gate 			mutex_exit(lock);
4737c478bd9Sstevel@tonic-gate 			return (set_errno(error));
4747c478bd9Sstevel@tonic-gate 		}
4757c478bd9Sstevel@tonic-gate 		qp->msg_qbytes = STRUCT_FGET(ds, msg_qbytes);
4767c478bd9Sstevel@tonic-gate 		qp->msg_ctime = gethrestime_sec();
4777c478bd9Sstevel@tonic-gate 		break;
4787c478bd9Sstevel@tonic-gate 
4797c478bd9Sstevel@tonic-gate 	case IPC_STAT:
4807c478bd9Sstevel@tonic-gate 		if (error = ipcperm_access(&qp->msg_perm, MSG_R, cr)) {
4817c478bd9Sstevel@tonic-gate 			mutex_exit(lock);
4827c478bd9Sstevel@tonic-gate 			return (set_errno(error));
4837c478bd9Sstevel@tonic-gate 		}
4847c478bd9Sstevel@tonic-gate 
485*2c5b6df1Sdv142724 		if (qp->msg_rcv_cnt)
4867c478bd9Sstevel@tonic-gate 			qp->msg_perm.ipc_mode |= MSG_RWAIT;
4877c478bd9Sstevel@tonic-gate 		if (qp->msg_snd_cnt)
4887c478bd9Sstevel@tonic-gate 			qp->msg_perm.ipc_mode |= MSG_WWAIT;
4897c478bd9Sstevel@tonic-gate 		ipcperm_stat(&STRUCT_BUF(ds)->msg_perm, &qp->msg_perm, mdl);
4907c478bd9Sstevel@tonic-gate 		qp->msg_perm.ipc_mode &= ~(MSG_RWAIT|MSG_WWAIT);
4917c478bd9Sstevel@tonic-gate 		STRUCT_FSETP(ds, msg_first, NULL); 	/* kernel addr */
4927c478bd9Sstevel@tonic-gate 		STRUCT_FSETP(ds, msg_last, NULL);
4937c478bd9Sstevel@tonic-gate 		STRUCT_FSET(ds, msg_cbytes, qp->msg_cbytes);
4947c478bd9Sstevel@tonic-gate 		STRUCT_FSET(ds, msg_qnum, qp->msg_qnum);
4957c478bd9Sstevel@tonic-gate 		STRUCT_FSET(ds, msg_qbytes, qp->msg_qbytes);
4967c478bd9Sstevel@tonic-gate 		STRUCT_FSET(ds, msg_lspid, qp->msg_lspid);
4977c478bd9Sstevel@tonic-gate 		STRUCT_FSET(ds, msg_lrpid, qp->msg_lrpid);
4987c478bd9Sstevel@tonic-gate 		STRUCT_FSET(ds, msg_stime, qp->msg_stime);
4997c478bd9Sstevel@tonic-gate 		STRUCT_FSET(ds, msg_rtime, qp->msg_rtime);
5007c478bd9Sstevel@tonic-gate 		STRUCT_FSET(ds, msg_ctime, qp->msg_ctime);
5017c478bd9Sstevel@tonic-gate 		break;
5027c478bd9Sstevel@tonic-gate 
5037c478bd9Sstevel@tonic-gate 	case IPC_SET64:
5047c478bd9Sstevel@tonic-gate 		mutex_enter(&pp->p_lock);
5057c478bd9Sstevel@tonic-gate 		if ((ds64.msgx_qbytes > qp->msg_qbytes) &&
5067c478bd9Sstevel@tonic-gate 		    secpolicy_ipc_config(cr) != 0 &&
5077c478bd9Sstevel@tonic-gate 		    rctl_test(rc_process_msgmnb, pp->p_rctls, pp,
5087c478bd9Sstevel@tonic-gate 		    ds64.msgx_qbytes, RCA_SAFE) & RCT_DENY) {
5097c478bd9Sstevel@tonic-gate 			mutex_exit(&pp->p_lock);
5107c478bd9Sstevel@tonic-gate 			mutex_exit(lock);
5117c478bd9Sstevel@tonic-gate 			return (set_errno(EPERM));
5127c478bd9Sstevel@tonic-gate 		}
5137c478bd9Sstevel@tonic-gate 		mutex_exit(&pp->p_lock);
5147c478bd9Sstevel@tonic-gate 		if (error = ipcperm_set64(msq_svc, cr, &qp->msg_perm,
5157c478bd9Sstevel@tonic-gate 		    &ds64.msgx_perm)) {
5167c478bd9Sstevel@tonic-gate 			mutex_exit(lock);
5177c478bd9Sstevel@tonic-gate 			return (set_errno(error));
5187c478bd9Sstevel@tonic-gate 		}
5197c478bd9Sstevel@tonic-gate 		qp->msg_qbytes = ds64.msgx_qbytes;
5207c478bd9Sstevel@tonic-gate 		qp->msg_ctime = gethrestime_sec();
5217c478bd9Sstevel@tonic-gate 		break;
5227c478bd9Sstevel@tonic-gate 
5237c478bd9Sstevel@tonic-gate 	case IPC_STAT64:
524*2c5b6df1Sdv142724 		if (qp->msg_rcv_cnt)
5257c478bd9Sstevel@tonic-gate 			qp->msg_perm.ipc_mode |= MSG_RWAIT;
5267c478bd9Sstevel@tonic-gate 		if (qp->msg_snd_cnt)
5277c478bd9Sstevel@tonic-gate 			qp->msg_perm.ipc_mode |= MSG_WWAIT;
5287c478bd9Sstevel@tonic-gate 		ipcperm_stat64(&ds64.msgx_perm, &qp->msg_perm);
5297c478bd9Sstevel@tonic-gate 		qp->msg_perm.ipc_mode &= ~(MSG_RWAIT|MSG_WWAIT);
5307c478bd9Sstevel@tonic-gate 		ds64.msgx_cbytes = qp->msg_cbytes;
5317c478bd9Sstevel@tonic-gate 		ds64.msgx_qnum = qp->msg_qnum;
5327c478bd9Sstevel@tonic-gate 		ds64.msgx_qbytes = qp->msg_qbytes;
5337c478bd9Sstevel@tonic-gate 		ds64.msgx_lspid = qp->msg_lspid;
5347c478bd9Sstevel@tonic-gate 		ds64.msgx_lrpid = qp->msg_lrpid;
5357c478bd9Sstevel@tonic-gate 		ds64.msgx_stime = qp->msg_stime;
5367c478bd9Sstevel@tonic-gate 		ds64.msgx_rtime = qp->msg_rtime;
5377c478bd9Sstevel@tonic-gate 		ds64.msgx_ctime = qp->msg_ctime;
5387c478bd9Sstevel@tonic-gate 		break;
5397c478bd9Sstevel@tonic-gate 
5407c478bd9Sstevel@tonic-gate 	default:
5417c478bd9Sstevel@tonic-gate 		mutex_exit(lock);
5427c478bd9Sstevel@tonic-gate 		return (set_errno(EINVAL));
5437c478bd9Sstevel@tonic-gate 	}
5447c478bd9Sstevel@tonic-gate 
5457c478bd9Sstevel@tonic-gate 	mutex_exit(lock);
5467c478bd9Sstevel@tonic-gate 
5477c478bd9Sstevel@tonic-gate 	/*
5487c478bd9Sstevel@tonic-gate 	 * Do copyout last (after releasing mutex).
5497c478bd9Sstevel@tonic-gate 	 */
5507c478bd9Sstevel@tonic-gate 	switch (cmd) {
5517c478bd9Sstevel@tonic-gate 	case IPC_STAT:
5527c478bd9Sstevel@tonic-gate 		if (copyout(STRUCT_BUF(ds), arg, STRUCT_SIZE(ds)))
5537c478bd9Sstevel@tonic-gate 			return (set_errno(EFAULT));
5547c478bd9Sstevel@tonic-gate 		break;
5557c478bd9Sstevel@tonic-gate 
5567c478bd9Sstevel@tonic-gate 	case IPC_STAT64:
5577c478bd9Sstevel@tonic-gate 		if (copyout(&ds64, arg, sizeof (struct msqid_ds64)))
5587c478bd9Sstevel@tonic-gate 			return (set_errno(EFAULT));
5597c478bd9Sstevel@tonic-gate 		break;
5607c478bd9Sstevel@tonic-gate 	}
5617c478bd9Sstevel@tonic-gate 
5627c478bd9Sstevel@tonic-gate 	return (0);
5637c478bd9Sstevel@tonic-gate }
5647c478bd9Sstevel@tonic-gate 
5657c478bd9Sstevel@tonic-gate /*
5667c478bd9Sstevel@tonic-gate  * Remove all message queues associated with a given zone.  Called by
5677c478bd9Sstevel@tonic-gate  * zone_shutdown when the zone is halted.
5687c478bd9Sstevel@tonic-gate  */
5697c478bd9Sstevel@tonic-gate /*ARGSUSED1*/
5707c478bd9Sstevel@tonic-gate static void
5717c478bd9Sstevel@tonic-gate msg_remove_zone(zoneid_t zoneid, void *arg)
5727c478bd9Sstevel@tonic-gate {
5737c478bd9Sstevel@tonic-gate 	ipc_remove_zone(msq_svc, zoneid);
5747c478bd9Sstevel@tonic-gate }
5757c478bd9Sstevel@tonic-gate 
5767c478bd9Sstevel@tonic-gate /*
5777c478bd9Sstevel@tonic-gate  * msgget system call.
5787c478bd9Sstevel@tonic-gate  */
5797c478bd9Sstevel@tonic-gate static int
5807c478bd9Sstevel@tonic-gate msgget(key_t key, int msgflg)
5817c478bd9Sstevel@tonic-gate {
5827c478bd9Sstevel@tonic-gate 	kmsqid_t	*qp;
5837c478bd9Sstevel@tonic-gate 	kmutex_t	*lock;
5847c478bd9Sstevel@tonic-gate 	int		id, error;
585b2eb1770Sudpa 	int		ii;
5867c478bd9Sstevel@tonic-gate 	proc_t		*pp = curproc;
5877c478bd9Sstevel@tonic-gate 
5887c478bd9Sstevel@tonic-gate top:
5897c478bd9Sstevel@tonic-gate 	if (error = ipc_get(msq_svc, key, msgflg, (kipc_perm_t **)&qp, &lock))
5907c478bd9Sstevel@tonic-gate 		return (set_errno(error));
5917c478bd9Sstevel@tonic-gate 
5927c478bd9Sstevel@tonic-gate 	if (IPC_FREE(&qp->msg_perm)) {
5937c478bd9Sstevel@tonic-gate 		mutex_exit(lock);
5947c478bd9Sstevel@tonic-gate 		mutex_exit(&pp->p_lock);
5957c478bd9Sstevel@tonic-gate 
5967c478bd9Sstevel@tonic-gate 		list_create(&qp->msg_list, sizeof (struct msg),
5977c478bd9Sstevel@tonic-gate 		    offsetof(struct msg, msg_node));
5987c478bd9Sstevel@tonic-gate 		qp->msg_qnum = 0;
5997c478bd9Sstevel@tonic-gate 		qp->msg_lspid = qp->msg_lrpid = 0;
6007c478bd9Sstevel@tonic-gate 		qp->msg_stime = qp->msg_rtime = 0;
6017c478bd9Sstevel@tonic-gate 		qp->msg_ctime = gethrestime_sec();
602*2c5b6df1Sdv142724 		qp->msg_ngt_cnt = 0;
603*2c5b6df1Sdv142724 		qp->msg_neg_copy = 0;
604*2c5b6df1Sdv142724 		for (ii = 0; ii <= MSG_MAX_QNUM; ii++) {
605*2c5b6df1Sdv142724 			list_create(&qp->msg_wait_snd[ii],
606*2c5b6df1Sdv142724 			    sizeof (msgq_wakeup_t),
607*2c5b6df1Sdv142724 			    offsetof(msgq_wakeup_t, msgw_list));
608*2c5b6df1Sdv142724 			list_create(&qp->msg_wait_snd_ngt[ii],
609*2c5b6df1Sdv142724 			    sizeof (msgq_wakeup_t),
610*2c5b6df1Sdv142724 			    offsetof(msgq_wakeup_t, msgw_list));
611*2c5b6df1Sdv142724 		}
612*2c5b6df1Sdv142724 		/*
613*2c5b6df1Sdv142724 		 * The proper initialization of msg_lowest_type is to the
614*2c5b6df1Sdv142724 		 * highest possible value.  By doing this we guarantee that
615*2c5b6df1Sdv142724 		 * when the first send happens, the lowest type will be set
616*2c5b6df1Sdv142724 		 * properly.
617*2c5b6df1Sdv142724 		 */
618*2c5b6df1Sdv142724 		qp->msg_lowest_type = -1;
619*2c5b6df1Sdv142724 		list_create(&qp->msg_cpy_block,
620*2c5b6df1Sdv142724 		    sizeof (msgq_wakeup_t),
621*2c5b6df1Sdv142724 		    offsetof(msgq_wakeup_t, msgw_list));
622*2c5b6df1Sdv142724 		qp->msg_fnd_sndr = &msg_fnd_sndr[0];
623*2c5b6df1Sdv142724 		qp->msg_fnd_rdr = &msg_fnd_rdr[0];
624*2c5b6df1Sdv142724 		qp->msg_rcv_cnt = 0;
625b2eb1770Sudpa 		qp->msg_snd_cnt = 0;
6267c478bd9Sstevel@tonic-gate 
6277c478bd9Sstevel@tonic-gate 		if (error = ipc_commit_begin(msq_svc, key, msgflg,
6287c478bd9Sstevel@tonic-gate 		    (kipc_perm_t *)qp)) {
6297c478bd9Sstevel@tonic-gate 			if (error == EAGAIN)
6307c478bd9Sstevel@tonic-gate 				goto top;
6317c478bd9Sstevel@tonic-gate 			return (set_errno(error));
6327c478bd9Sstevel@tonic-gate 		}
6337c478bd9Sstevel@tonic-gate 		qp->msg_qbytes = rctl_enforced_value(rc_process_msgmnb,
6347c478bd9Sstevel@tonic-gate 		    pp->p_rctls, pp);
6357c478bd9Sstevel@tonic-gate 		qp->msg_qmax = rctl_enforced_value(rc_process_msgtql,
6367c478bd9Sstevel@tonic-gate 		    pp->p_rctls, pp);
6377c478bd9Sstevel@tonic-gate 		lock = ipc_commit_end(msq_svc, &qp->msg_perm);
6387c478bd9Sstevel@tonic-gate 	}
6397c478bd9Sstevel@tonic-gate #ifdef C2_AUDIT
6407c478bd9Sstevel@tonic-gate 	if (audit_active)
6417c478bd9Sstevel@tonic-gate 		audit_ipcget(AT_IPC_MSG, (void *)qp);
6427c478bd9Sstevel@tonic-gate #endif
6437c478bd9Sstevel@tonic-gate 	id = qp->msg_perm.ipc_id;
6447c478bd9Sstevel@tonic-gate 	mutex_exit(lock);
6457c478bd9Sstevel@tonic-gate 	return (id);
6467c478bd9Sstevel@tonic-gate }
6477c478bd9Sstevel@tonic-gate 
6487c478bd9Sstevel@tonic-gate static ssize_t
6497c478bd9Sstevel@tonic-gate msgrcv(int msqid, struct ipcmsgbuf *msgp, size_t msgsz, long msgtyp, int msgflg)
6507c478bd9Sstevel@tonic-gate {
6517c478bd9Sstevel@tonic-gate 	struct msg	*smp;	/* ptr to best msg on q */
6527c478bd9Sstevel@tonic-gate 	kmsqid_t	*qp;	/* ptr to associated q */
6537c478bd9Sstevel@tonic-gate 	kmutex_t	*lock;
6547c478bd9Sstevel@tonic-gate 	size_t		xtsz;	/* transfer byte count */
655*2c5b6df1Sdv142724 	int		error = 0;
6567c478bd9Sstevel@tonic-gate 	int		cvres;
657*2c5b6df1Sdv142724 	ulong_t		msg_hash;
658*2c5b6df1Sdv142724 	msgq_wakeup_t	msg_entry;
6597c478bd9Sstevel@tonic-gate 
6607c478bd9Sstevel@tonic-gate 	CPU_STATS_ADDQ(CPU, sys, msg, 1);	/* bump msg send/rcv count */
6617c478bd9Sstevel@tonic-gate 
662*2c5b6df1Sdv142724 	msg_hash = msg_type_hash(msgtyp);
663*2c5b6df1Sdv142724 	if ((lock = ipc_lookup(msq_svc, msqid, (kipc_perm_t **)&qp)) == NULL) {
6647c478bd9Sstevel@tonic-gate 		return ((ssize_t)set_errno(EINVAL));
665*2c5b6df1Sdv142724 	}
6667c478bd9Sstevel@tonic-gate 	ipc_hold(msq_svc, (kipc_perm_t *)qp);
6677c478bd9Sstevel@tonic-gate 
668*2c5b6df1Sdv142724 	if (error = ipcperm_access(&qp->msg_perm, MSG_R, CRED())) {
6697c478bd9Sstevel@tonic-gate 		goto msgrcv_out;
670*2c5b6df1Sdv142724 	}
6717c478bd9Sstevel@tonic-gate 
672*2c5b6df1Sdv142724 	/*
673*2c5b6df1Sdv142724 	 * Various information (including the condvar_t) required for the
674*2c5b6df1Sdv142724 	 * process to sleep is provided by it's stack.
675*2c5b6df1Sdv142724 	 */
676*2c5b6df1Sdv142724 	msg_entry.msgw_thrd = curthread;
677*2c5b6df1Sdv142724 	msg_entry.msgw_snd_wake = 0;
678*2c5b6df1Sdv142724 	msg_entry.msgw_type = msgtyp;
6797c478bd9Sstevel@tonic-gate findmsg:
680*2c5b6df1Sdv142724 	smp = msgrcv_lookup(qp, msgtyp);
6817c478bd9Sstevel@tonic-gate 
6827c478bd9Sstevel@tonic-gate 	if (smp) {
6837c478bd9Sstevel@tonic-gate 		/*
684*2c5b6df1Sdv142724 		 * We found a possible message to copy out.
6857c478bd9Sstevel@tonic-gate 		 */
6867c478bd9Sstevel@tonic-gate 		if ((smp->msg_flags & MSG_RCVCOPY) == 0) {
6877c478bd9Sstevel@tonic-gate 			/*
688*2c5b6df1Sdv142724 			 * It is available, attempt to copy it.
6897c478bd9Sstevel@tonic-gate 			 */
690*2c5b6df1Sdv142724 			error = msg_copyout(qp, msgtyp, &lock, &xtsz, msgsz,
691*2c5b6df1Sdv142724 			    smp, msgp, msgflg);
692*2c5b6df1Sdv142724 			/*
693*2c5b6df1Sdv142724 			 * Don't forget to wakeup a sleeper that blocked because
694*2c5b6df1Sdv142724 			 * we were copying things out.
695*2c5b6df1Sdv142724 			 */
696*2c5b6df1Sdv142724 			msg_wakeup_rdr(qp, &qp->msg_fnd_rdr, 0);
697*2c5b6df1Sdv142724 			goto msgrcv_out;
698*2c5b6df1Sdv142724 		}
699*2c5b6df1Sdv142724 		/*
700*2c5b6df1Sdv142724 		 * The selected message is being copied out, so block.  We do
701*2c5b6df1Sdv142724 		 * not need to wake the next person up on the msg_cpy_block list
702*2c5b6df1Sdv142724 		 * due to the fact some one is copying out and they will get
703*2c5b6df1Sdv142724 		 * things moving again once the copy is completed.
704*2c5b6df1Sdv142724 		 */
705*2c5b6df1Sdv142724 		cvres = msg_rcvq_sleep(&qp->msg_cpy_block,
706*2c5b6df1Sdv142724 		    &msg_entry, &lock, qp);
707*2c5b6df1Sdv142724 		error = msgq_check_err(qp, cvres);
708*2c5b6df1Sdv142724 		if (error) {
709*2c5b6df1Sdv142724 			goto msgrcv_out;
710*2c5b6df1Sdv142724 		}
711*2c5b6df1Sdv142724 		goto findmsg;
712*2c5b6df1Sdv142724 	}
713*2c5b6df1Sdv142724 	/*
714*2c5b6df1Sdv142724 	 * There isn't a message to copy out that matches the designated
715*2c5b6df1Sdv142724 	 * criteria.
716*2c5b6df1Sdv142724 	 */
717*2c5b6df1Sdv142724 	if (msgflg & IPC_NOWAIT) {
718*2c5b6df1Sdv142724 		error = ENOMSG;
719*2c5b6df1Sdv142724 		goto msgrcv_out;
720*2c5b6df1Sdv142724 	}
721*2c5b6df1Sdv142724 	msg_wakeup_rdr(qp,  &qp->msg_fnd_rdr, 0);
722*2c5b6df1Sdv142724 
723*2c5b6df1Sdv142724 	/*
724*2c5b6df1Sdv142724 	 * Wait for new message.  We keep the negative and positive types
725*2c5b6df1Sdv142724 	 * separate for performance reasons.
726*2c5b6df1Sdv142724 	 */
727*2c5b6df1Sdv142724 	msg_entry.msgw_snd_wake = 0;
728*2c5b6df1Sdv142724 	if (msgtyp >= 0) {
729*2c5b6df1Sdv142724 		cvres = msg_rcvq_sleep(&qp->msg_wait_snd[msg_hash],
730*2c5b6df1Sdv142724 		    &msg_entry, &lock, qp);
731*2c5b6df1Sdv142724 	} else {
732*2c5b6df1Sdv142724 		qp->msg_ngt_cnt++;
733*2c5b6df1Sdv142724 		cvres = msg_rcvq_sleep(&qp->msg_wait_snd_ngt[msg_hash],
734*2c5b6df1Sdv142724 		    &msg_entry, &lock, qp);
735*2c5b6df1Sdv142724 		qp->msg_ngt_cnt--;
736*2c5b6df1Sdv142724 	}
737*2c5b6df1Sdv142724 
738*2c5b6df1Sdv142724 	if (!(error = msgq_check_err(qp, cvres))) {
739*2c5b6df1Sdv142724 		goto findmsg;
740*2c5b6df1Sdv142724 	}
741*2c5b6df1Sdv142724 
742*2c5b6df1Sdv142724 msgrcv_out:
743*2c5b6df1Sdv142724 	if (error) {
744*2c5b6df1Sdv142724 		msg_wakeup_rdr(qp,  &qp->msg_fnd_rdr, 0);
745*2c5b6df1Sdv142724 		if (msg_entry.msgw_snd_wake) {
746*2c5b6df1Sdv142724 			msg_wakeup_rdr(qp, &qp->msg_fnd_sndr,
747*2c5b6df1Sdv142724 			    msg_entry.msgw_snd_wake);
748*2c5b6df1Sdv142724 		}
749*2c5b6df1Sdv142724 		ipc_rele(msq_svc, (kipc_perm_t *)qp);
750*2c5b6df1Sdv142724 		return ((ssize_t)set_errno(error));
751*2c5b6df1Sdv142724 	}
752*2c5b6df1Sdv142724 	ipc_rele(msq_svc, (kipc_perm_t *)qp);
753*2c5b6df1Sdv142724 	return ((ssize_t)xtsz);
754*2c5b6df1Sdv142724 }
755*2c5b6df1Sdv142724 
756*2c5b6df1Sdv142724 static int
757*2c5b6df1Sdv142724 msgq_check_err(kmsqid_t *qp, int cvres)
758*2c5b6df1Sdv142724 {
759*2c5b6df1Sdv142724 	if (IPC_FREE(&qp->msg_perm)) {
760*2c5b6df1Sdv142724 		return (EIDRM);
761*2c5b6df1Sdv142724 	}
762*2c5b6df1Sdv142724 
763*2c5b6df1Sdv142724 	if (cvres == 0) {
764*2c5b6df1Sdv142724 		return (EINTR);
765*2c5b6df1Sdv142724 	}
766*2c5b6df1Sdv142724 
767*2c5b6df1Sdv142724 	return (0);
768*2c5b6df1Sdv142724 }
769*2c5b6df1Sdv142724 
770*2c5b6df1Sdv142724 static int
771*2c5b6df1Sdv142724 msg_copyout(kmsqid_t *qp, long msgtyp, kmutex_t **lock, size_t *xtsz_ret,
772*2c5b6df1Sdv142724     size_t msgsz, struct msg *smp, struct ipcmsgbuf *msgp, int msgflg)
773*2c5b6df1Sdv142724 {
774*2c5b6df1Sdv142724 	size_t		xtsz;
775*2c5b6df1Sdv142724 	STRUCT_HANDLE(ipcmsgbuf, umsgp);
776*2c5b6df1Sdv142724 	model_t		mdl = get_udatamodel();
777*2c5b6df1Sdv142724 	int		copyerror = 0;
778*2c5b6df1Sdv142724 
779*2c5b6df1Sdv142724 	STRUCT_SET_HANDLE(umsgp, mdl, msgp);
7807c478bd9Sstevel@tonic-gate 	if (msgsz < smp->msg_size) {
7817c478bd9Sstevel@tonic-gate 		if ((msgflg & MSG_NOERROR) == 0) {
782*2c5b6df1Sdv142724 			return (E2BIG);
7837c478bd9Sstevel@tonic-gate 		} else {
7847c478bd9Sstevel@tonic-gate 			xtsz = msgsz;
7857c478bd9Sstevel@tonic-gate 		}
7867c478bd9Sstevel@tonic-gate 	} else {
7877c478bd9Sstevel@tonic-gate 		xtsz = smp->msg_size;
7887c478bd9Sstevel@tonic-gate 	}
789*2c5b6df1Sdv142724 	*xtsz_ret = xtsz;
7907c478bd9Sstevel@tonic-gate 
7917c478bd9Sstevel@tonic-gate 	/*
792*2c5b6df1Sdv142724 	 * To prevent a DOS attack we mark the message as being
793*2c5b6df1Sdv142724 	 * copied out and release mutex.  When the copy is completed
794*2c5b6df1Sdv142724 	 * we need to acquire the mutex and make the appropriate updates.
7957c478bd9Sstevel@tonic-gate 	 */
7967c478bd9Sstevel@tonic-gate 	ASSERT((smp->msg_flags & MSG_RCVCOPY) == 0);
7977c478bd9Sstevel@tonic-gate 	smp->msg_flags |= MSG_RCVCOPY;
7987c478bd9Sstevel@tonic-gate 	msg_hold(smp);
799*2c5b6df1Sdv142724 	if (msgtyp < 0) {
800*2c5b6df1Sdv142724 		ASSERT(qp->msg_neg_copy == 0);
801*2c5b6df1Sdv142724 		qp->msg_neg_copy = 1;
802*2c5b6df1Sdv142724 	}
803*2c5b6df1Sdv142724 	mutex_exit(*lock);
8047c478bd9Sstevel@tonic-gate 
8057c478bd9Sstevel@tonic-gate 	if (mdl == DATAMODEL_NATIVE) {
8067c478bd9Sstevel@tonic-gate 		copyerror = copyout(&smp->msg_type, msgp,
8077c478bd9Sstevel@tonic-gate 		    sizeof (smp->msg_type));
8087c478bd9Sstevel@tonic-gate 	} else {
8097c478bd9Sstevel@tonic-gate 		/*
8107c478bd9Sstevel@tonic-gate 		 * 32-bit callers need an imploded msg type.
8117c478bd9Sstevel@tonic-gate 		 */
8127c478bd9Sstevel@tonic-gate 		int32_t	msg_type32 = smp->msg_type;
8137c478bd9Sstevel@tonic-gate 
8147c478bd9Sstevel@tonic-gate 		copyerror = copyout(&msg_type32, msgp,
8157c478bd9Sstevel@tonic-gate 		    sizeof (msg_type32));
8167c478bd9Sstevel@tonic-gate 	}
8177c478bd9Sstevel@tonic-gate 
818*2c5b6df1Sdv142724 	if (copyerror == 0 && xtsz) {
8197c478bd9Sstevel@tonic-gate 		copyerror = copyout(smp->msg_addr,
8207c478bd9Sstevel@tonic-gate 		    STRUCT_FADDR(umsgp, mtext), xtsz);
821*2c5b6df1Sdv142724 	}
8227c478bd9Sstevel@tonic-gate 
8237c478bd9Sstevel@tonic-gate 	/*
824*2c5b6df1Sdv142724 	 * Reclaim the mutex and make sure the message queue still exists.
8257c478bd9Sstevel@tonic-gate 	 */
826*2c5b6df1Sdv142724 
827*2c5b6df1Sdv142724 	*lock = ipc_lock(msq_svc, qp->msg_perm.ipc_id);
828*2c5b6df1Sdv142724 	if (msgtyp < 0) {
829*2c5b6df1Sdv142724 		qp->msg_neg_copy = 0;
830*2c5b6df1Sdv142724 	}
8317c478bd9Sstevel@tonic-gate 	ASSERT(smp->msg_flags & MSG_RCVCOPY);
8327c478bd9Sstevel@tonic-gate 	smp->msg_flags &= ~MSG_RCVCOPY;
8337c478bd9Sstevel@tonic-gate 	msg_rele(smp);
8347c478bd9Sstevel@tonic-gate 	if (IPC_FREE(&qp->msg_perm)) {
835*2c5b6df1Sdv142724 		return (EIDRM);
8367c478bd9Sstevel@tonic-gate 	}
8377c478bd9Sstevel@tonic-gate 	if (copyerror) {
838*2c5b6df1Sdv142724 		return (EFAULT);
8397c478bd9Sstevel@tonic-gate 	}
8407c478bd9Sstevel@tonic-gate 	qp->msg_lrpid = ttoproc(curthread)->p_pid;
8417c478bd9Sstevel@tonic-gate 	qp->msg_rtime = gethrestime_sec();
8427c478bd9Sstevel@tonic-gate 	msgunlink(qp, smp);
843*2c5b6df1Sdv142724 	return (0);
8447c478bd9Sstevel@tonic-gate }
8457c478bd9Sstevel@tonic-gate 
846*2c5b6df1Sdv142724 static struct msg *
847*2c5b6df1Sdv142724 msgrcv_lookup(kmsqid_t *qp, long msgtyp)
848*2c5b6df1Sdv142724 {
849*2c5b6df1Sdv142724 	struct msg 		*smp = NULL;
850*2c5b6df1Sdv142724 	int			qp_low;
851*2c5b6df1Sdv142724 	struct msg		*mp;	/* ptr to msg on q */
852*2c5b6df1Sdv142724 	int			low_msgtype;
853*2c5b6df1Sdv142724 	static struct msg	neg_copy_smp;
854*2c5b6df1Sdv142724 
855*2c5b6df1Sdv142724 	mp = list_head(&qp->msg_list);
856*2c5b6df1Sdv142724 	if (msgtyp == 0) {
857*2c5b6df1Sdv142724 		smp = mp;
858*2c5b6df1Sdv142724 	} else {
859*2c5b6df1Sdv142724 		qp_low = qp->msg_lowest_type;
860*2c5b6df1Sdv142724 		if (msgtyp > 0) {
861*2c5b6df1Sdv142724 			/*
862*2c5b6df1Sdv142724 			 * If our lowest possible message type is larger than
863*2c5b6df1Sdv142724 			 * the message type desired, then we know there is
864*2c5b6df1Sdv142724 			 * no entry present.
865*2c5b6df1Sdv142724 			 */
866*2c5b6df1Sdv142724 			if (qp_low > msgtyp) {
867*2c5b6df1Sdv142724 				return (NULL);
868*2c5b6df1Sdv142724 			}
869*2c5b6df1Sdv142724 
870*2c5b6df1Sdv142724 			for (; mp; mp = list_next(&qp->msg_list, mp)) {
871*2c5b6df1Sdv142724 				if (msgtyp == mp->msg_type) {
872*2c5b6df1Sdv142724 					smp = mp;
873*2c5b6df1Sdv142724 					break;
874*2c5b6df1Sdv142724 				}
875*2c5b6df1Sdv142724 			}
8767c478bd9Sstevel@tonic-gate 		} else {
8777c478bd9Sstevel@tonic-gate 			/*
878*2c5b6df1Sdv142724 			 * We have kept track of the lowest possible message
879*2c5b6df1Sdv142724 			 * type on the send queue.  This allows us to terminate
880*2c5b6df1Sdv142724 			 * the search early if we find a message type of that
881*2c5b6df1Sdv142724 			 * type.  Note, the lowest type may not be the actual
882*2c5b6df1Sdv142724 			 * lowest value in the system, it is only guaranteed
883*2c5b6df1Sdv142724 			 * that there isn't a value lower than that.
8847c478bd9Sstevel@tonic-gate 			 */
885*2c5b6df1Sdv142724 			low_msgtype = -msgtyp;
886*2c5b6df1Sdv142724 			if (low_msgtype++ < qp_low) {
887*2c5b6df1Sdv142724 				return (NULL);
888*2c5b6df1Sdv142724 			}
889*2c5b6df1Sdv142724 			if (qp->msg_neg_copy) {
890*2c5b6df1Sdv142724 				neg_copy_smp.msg_flags = MSG_RCVCOPY;
891*2c5b6df1Sdv142724 				return (&neg_copy_smp);
892*2c5b6df1Sdv142724 			}
893*2c5b6df1Sdv142724 			for (; mp; mp = list_next(&qp->msg_list, mp)) {
894*2c5b6df1Sdv142724 				if (mp->msg_type < low_msgtype) {
895*2c5b6df1Sdv142724 					smp = mp;
896*2c5b6df1Sdv142724 					low_msgtype = mp->msg_type;
897*2c5b6df1Sdv142724 					if (low_msgtype == qp_low) {
898*2c5b6df1Sdv142724 						break;
8997c478bd9Sstevel@tonic-gate 					}
9007c478bd9Sstevel@tonic-gate 				}
9017c478bd9Sstevel@tonic-gate 			}
902*2c5b6df1Sdv142724 			if (smp) {
903*2c5b6df1Sdv142724 				/*
904*2c5b6df1Sdv142724 				 * Update the lowest message type.
905*2c5b6df1Sdv142724 				 */
906*2c5b6df1Sdv142724 				qp->msg_lowest_type = smp->msg_type;
9077c478bd9Sstevel@tonic-gate 			}
908*2c5b6df1Sdv142724 		}
909*2c5b6df1Sdv142724 	}
910*2c5b6df1Sdv142724 	return (smp);
9117c478bd9Sstevel@tonic-gate }
9127c478bd9Sstevel@tonic-gate 
9137c478bd9Sstevel@tonic-gate /*
9147c478bd9Sstevel@tonic-gate  * msgids system call.
9157c478bd9Sstevel@tonic-gate  */
9167c478bd9Sstevel@tonic-gate static int
9177c478bd9Sstevel@tonic-gate msgids(int *buf, uint_t nids, uint_t *pnids)
9187c478bd9Sstevel@tonic-gate {
9197c478bd9Sstevel@tonic-gate 	int error;
9207c478bd9Sstevel@tonic-gate 
9217c478bd9Sstevel@tonic-gate 	if (error = ipc_ids(msq_svc, buf, nids, pnids))
9227c478bd9Sstevel@tonic-gate 		return (set_errno(error));
9237c478bd9Sstevel@tonic-gate 
9247c478bd9Sstevel@tonic-gate 	return (0);
9257c478bd9Sstevel@tonic-gate }
9267c478bd9Sstevel@tonic-gate 
9277c478bd9Sstevel@tonic-gate #define	RND(x)		roundup((x), sizeof (size_t))
9287c478bd9Sstevel@tonic-gate #define	RND32(x)	roundup((x), sizeof (size32_t))
9297c478bd9Sstevel@tonic-gate 
9307c478bd9Sstevel@tonic-gate /*
9317c478bd9Sstevel@tonic-gate  * msgsnap system call.
9327c478bd9Sstevel@tonic-gate  */
9337c478bd9Sstevel@tonic-gate static int
9347c478bd9Sstevel@tonic-gate msgsnap(int msqid, caddr_t buf, size_t bufsz, long msgtyp)
9357c478bd9Sstevel@tonic-gate {
9367c478bd9Sstevel@tonic-gate 	struct msg	*mp;	/* ptr to msg on q */
9377c478bd9Sstevel@tonic-gate 	kmsqid_t	*qp;	/* ptr to associated q */
9387c478bd9Sstevel@tonic-gate 	kmutex_t	*lock;
9397c478bd9Sstevel@tonic-gate 	size_t		size;
9407c478bd9Sstevel@tonic-gate 	size_t		nmsg;
9417c478bd9Sstevel@tonic-gate 	struct msg	**snaplist;
9427c478bd9Sstevel@tonic-gate 	int		error, i;
9437c478bd9Sstevel@tonic-gate 	model_t		mdl = get_udatamodel();
9447c478bd9Sstevel@tonic-gate 	STRUCT_DECL(msgsnap_head, head);
9457c478bd9Sstevel@tonic-gate 	STRUCT_DECL(msgsnap_mhead, mhead);
9467c478bd9Sstevel@tonic-gate 
9477c478bd9Sstevel@tonic-gate 	STRUCT_INIT(head, mdl);
9487c478bd9Sstevel@tonic-gate 	STRUCT_INIT(mhead, mdl);
9497c478bd9Sstevel@tonic-gate 
9507c478bd9Sstevel@tonic-gate 	if (bufsz < STRUCT_SIZE(head))
9517c478bd9Sstevel@tonic-gate 		return (set_errno(EINVAL));
9527c478bd9Sstevel@tonic-gate 
9537c478bd9Sstevel@tonic-gate 	if ((lock = ipc_lookup(msq_svc, msqid, (kipc_perm_t **)&qp)) == NULL)
9547c478bd9Sstevel@tonic-gate 		return (set_errno(EINVAL));
9557c478bd9Sstevel@tonic-gate 
9567c478bd9Sstevel@tonic-gate 	if (error = ipcperm_access(&qp->msg_perm, MSG_R, CRED())) {
9577c478bd9Sstevel@tonic-gate 		mutex_exit(lock);
9587c478bd9Sstevel@tonic-gate 		return (set_errno(error));
9597c478bd9Sstevel@tonic-gate 	}
9607c478bd9Sstevel@tonic-gate 	ipc_hold(msq_svc, (kipc_perm_t *)qp);
9617c478bd9Sstevel@tonic-gate 
9627c478bd9Sstevel@tonic-gate 	/*
9637c478bd9Sstevel@tonic-gate 	 * First compute the required buffer size and
9647c478bd9Sstevel@tonic-gate 	 * the number of messages on the queue.
9657c478bd9Sstevel@tonic-gate 	 */
9667c478bd9Sstevel@tonic-gate 	size = nmsg = 0;
9677c478bd9Sstevel@tonic-gate 	for (mp = list_head(&qp->msg_list); mp;
9687c478bd9Sstevel@tonic-gate 	    mp = list_next(&qp->msg_list, mp)) {
9697c478bd9Sstevel@tonic-gate 		if (msgtyp == 0 ||
9707c478bd9Sstevel@tonic-gate 		    (msgtyp > 0 && msgtyp == mp->msg_type) ||
9717c478bd9Sstevel@tonic-gate 		    (msgtyp < 0 && mp->msg_type <= -msgtyp)) {
9727c478bd9Sstevel@tonic-gate 			nmsg++;
9737c478bd9Sstevel@tonic-gate 			if (mdl == DATAMODEL_NATIVE)
9747c478bd9Sstevel@tonic-gate 				size += RND(mp->msg_size);
9757c478bd9Sstevel@tonic-gate 			else
9767c478bd9Sstevel@tonic-gate 				size += RND32(mp->msg_size);
9777c478bd9Sstevel@tonic-gate 		}
9787c478bd9Sstevel@tonic-gate 	}
9797c478bd9Sstevel@tonic-gate 
9807c478bd9Sstevel@tonic-gate 	size += STRUCT_SIZE(head) + nmsg * STRUCT_SIZE(mhead);
9817c478bd9Sstevel@tonic-gate 	if (size > bufsz)
9827c478bd9Sstevel@tonic-gate 		nmsg = 0;
9837c478bd9Sstevel@tonic-gate 
9847c478bd9Sstevel@tonic-gate 	if (nmsg > 0) {
9857c478bd9Sstevel@tonic-gate 		/*
9867c478bd9Sstevel@tonic-gate 		 * Mark the messages as being copied.
9877c478bd9Sstevel@tonic-gate 		 */
9887c478bd9Sstevel@tonic-gate 		snaplist = (struct msg **)kmem_alloc(nmsg *
9897c478bd9Sstevel@tonic-gate 		    sizeof (struct msg *), KM_SLEEP);
9907c478bd9Sstevel@tonic-gate 		i = 0;
9917c478bd9Sstevel@tonic-gate 		for (mp = list_head(&qp->msg_list); mp;
9927c478bd9Sstevel@tonic-gate 		    mp = list_next(&qp->msg_list, mp)) {
9937c478bd9Sstevel@tonic-gate 			if (msgtyp == 0 ||
9947c478bd9Sstevel@tonic-gate 			    (msgtyp > 0 && msgtyp == mp->msg_type) ||
9957c478bd9Sstevel@tonic-gate 			    (msgtyp < 0 && mp->msg_type <= -msgtyp)) {
9967c478bd9Sstevel@tonic-gate 				msg_hold(mp);
9977c478bd9Sstevel@tonic-gate 				snaplist[i] = mp;
9987c478bd9Sstevel@tonic-gate 				i++;
9997c478bd9Sstevel@tonic-gate 			}
10007c478bd9Sstevel@tonic-gate 		}
10017c478bd9Sstevel@tonic-gate 	}
10027c478bd9Sstevel@tonic-gate 	mutex_exit(lock);
10037c478bd9Sstevel@tonic-gate 
10047c478bd9Sstevel@tonic-gate 	/*
10057c478bd9Sstevel@tonic-gate 	 * Copy out the buffer header.
10067c478bd9Sstevel@tonic-gate 	 */
10077c478bd9Sstevel@tonic-gate 	STRUCT_FSET(head, msgsnap_size, size);
10087c478bd9Sstevel@tonic-gate 	STRUCT_FSET(head, msgsnap_nmsg, nmsg);
10097c478bd9Sstevel@tonic-gate 	if (copyout(STRUCT_BUF(head), buf, STRUCT_SIZE(head)))
10107c478bd9Sstevel@tonic-gate 		error = EFAULT;
10117c478bd9Sstevel@tonic-gate 
10127c478bd9Sstevel@tonic-gate 	buf += STRUCT_SIZE(head);
10137c478bd9Sstevel@tonic-gate 
10147c478bd9Sstevel@tonic-gate 	/*
10157c478bd9Sstevel@tonic-gate 	 * Now copy out the messages one by one.
10167c478bd9Sstevel@tonic-gate 	 */
10177c478bd9Sstevel@tonic-gate 	for (i = 0; i < nmsg; i++) {
10187c478bd9Sstevel@tonic-gate 		mp = snaplist[i];
10197c478bd9Sstevel@tonic-gate 		if (error == 0) {
10207c478bd9Sstevel@tonic-gate 			STRUCT_FSET(mhead, msgsnap_mlen, mp->msg_size);
10217c478bd9Sstevel@tonic-gate 			STRUCT_FSET(mhead, msgsnap_mtype, mp->msg_type);
10227c478bd9Sstevel@tonic-gate 			if (copyout(STRUCT_BUF(mhead), buf, STRUCT_SIZE(mhead)))
10237c478bd9Sstevel@tonic-gate 				error = EFAULT;
10247c478bd9Sstevel@tonic-gate 			buf += STRUCT_SIZE(mhead);
10257c478bd9Sstevel@tonic-gate 
10267c478bd9Sstevel@tonic-gate 			if (error == 0 &&
10277c478bd9Sstevel@tonic-gate 			    mp->msg_size != 0 &&
10287c478bd9Sstevel@tonic-gate 			    copyout(mp->msg_addr, buf, mp->msg_size))
10297c478bd9Sstevel@tonic-gate 				error = EFAULT;
10307c478bd9Sstevel@tonic-gate 			if (mdl == DATAMODEL_NATIVE)
10317c478bd9Sstevel@tonic-gate 				buf += RND(mp->msg_size);
10327c478bd9Sstevel@tonic-gate 			else
10337c478bd9Sstevel@tonic-gate 				buf += RND32(mp->msg_size);
10347c478bd9Sstevel@tonic-gate 		}
10357c478bd9Sstevel@tonic-gate 		lock = ipc_lock(msq_svc, qp->msg_perm.ipc_id);
10367c478bd9Sstevel@tonic-gate 		msg_rele(mp);
10377c478bd9Sstevel@tonic-gate 		/* Check for msg q deleted or reallocated */
10387c478bd9Sstevel@tonic-gate 		if (IPC_FREE(&qp->msg_perm))
10397c478bd9Sstevel@tonic-gate 			error = EIDRM;
10407c478bd9Sstevel@tonic-gate 		mutex_exit(lock);
10417c478bd9Sstevel@tonic-gate 	}
10427c478bd9Sstevel@tonic-gate 
10437c478bd9Sstevel@tonic-gate 	(void) ipc_lock(msq_svc, qp->msg_perm.ipc_id);
10447c478bd9Sstevel@tonic-gate 	ipc_rele(msq_svc, (kipc_perm_t *)qp);
10457c478bd9Sstevel@tonic-gate 
10467c478bd9Sstevel@tonic-gate 	if (nmsg > 0)
10477c478bd9Sstevel@tonic-gate 		kmem_free(snaplist, nmsg * sizeof (struct msg *));
10487c478bd9Sstevel@tonic-gate 
10497c478bd9Sstevel@tonic-gate 	if (error)
10507c478bd9Sstevel@tonic-gate 		return (set_errno(error));
10517c478bd9Sstevel@tonic-gate 	return (0);
10527c478bd9Sstevel@tonic-gate }
10537c478bd9Sstevel@tonic-gate 
1054e50383f4Sdv142724 #define	MSG_PREALLOC_LIMIT 8192
1055e50383f4Sdv142724 
10567c478bd9Sstevel@tonic-gate /*
10577c478bd9Sstevel@tonic-gate  * msgsnd system call.
10587c478bd9Sstevel@tonic-gate  */
10597c478bd9Sstevel@tonic-gate static int
10607c478bd9Sstevel@tonic-gate msgsnd(int msqid, struct ipcmsgbuf *msgp, size_t msgsz, int msgflg)
10617c478bd9Sstevel@tonic-gate {
10627c478bd9Sstevel@tonic-gate 	kmsqid_t	*qp;
1063e50383f4Sdv142724 	kmutex_t	*lock = NULL;
10647c478bd9Sstevel@tonic-gate 	struct msg	*mp = NULL;
10657c478bd9Sstevel@tonic-gate 	long		type;
10667c478bd9Sstevel@tonic-gate 	int		error = 0;
10677c478bd9Sstevel@tonic-gate 	model_t		mdl = get_udatamodel();
10687c478bd9Sstevel@tonic-gate 	STRUCT_HANDLE(ipcmsgbuf, umsgp);
10697c478bd9Sstevel@tonic-gate 
10707c478bd9Sstevel@tonic-gate 	CPU_STATS_ADDQ(CPU, sys, msg, 1);	/* bump msg send/rcv count */
10717c478bd9Sstevel@tonic-gate 	STRUCT_SET_HANDLE(umsgp, mdl, msgp);
10727c478bd9Sstevel@tonic-gate 
10737c478bd9Sstevel@tonic-gate 	if (mdl == DATAMODEL_NATIVE) {
10747c478bd9Sstevel@tonic-gate 		if (copyin(msgp, &type, sizeof (type)))
10757c478bd9Sstevel@tonic-gate 			return (set_errno(EFAULT));
10767c478bd9Sstevel@tonic-gate 	} else {
10777c478bd9Sstevel@tonic-gate 		int32_t	type32;
10787c478bd9Sstevel@tonic-gate 		if (copyin(msgp, &type32, sizeof (type32)))
10797c478bd9Sstevel@tonic-gate 			return (set_errno(EFAULT));
10807c478bd9Sstevel@tonic-gate 		type = type32;
10817c478bd9Sstevel@tonic-gate 	}
10827c478bd9Sstevel@tonic-gate 
10837c478bd9Sstevel@tonic-gate 	if (type < 1)
10847c478bd9Sstevel@tonic-gate 		return (set_errno(EINVAL));
10857c478bd9Sstevel@tonic-gate 
1086e50383f4Sdv142724 	/*
1087e50383f4Sdv142724 	 * We want the value here large enough that most of the
1088e50383f4Sdv142724 	 * the message operations will use the "lockless" path,
1089e50383f4Sdv142724 	 * but small enough that a user can not reserve large
1090e50383f4Sdv142724 	 * chunks of kernel memory unless they have a valid
1091e50383f4Sdv142724 	 * reason to.
1092e50383f4Sdv142724 	 */
1093e50383f4Sdv142724 	if (msgsz <= MSG_PREALLOC_LIMIT) {
1094e50383f4Sdv142724 		/*
1095e50383f4Sdv142724 		 * We are small enough that we can afford to do the
1096e50383f4Sdv142724 		 * allocation now.  This saves dropping the lock
1097e50383f4Sdv142724 		 * and then reacquiring the lock.
1098e50383f4Sdv142724 		 */
1099e50383f4Sdv142724 		mp = kmem_zalloc(sizeof (struct msg), KM_SLEEP);
1100e50383f4Sdv142724 		mp->msg_copycnt = 1;
1101e50383f4Sdv142724 		mp->msg_size = msgsz;
1102e50383f4Sdv142724 		if (msgsz) {
1103e50383f4Sdv142724 			mp->msg_addr = kmem_alloc(msgsz, KM_SLEEP);
1104e50383f4Sdv142724 			if (copyin(STRUCT_FADDR(umsgp, mtext),
1105e50383f4Sdv142724 			    mp->msg_addr, msgsz) == -1) {
1106e50383f4Sdv142724 				error = EFAULT;
1107e50383f4Sdv142724 				goto msgsnd_out;
1108e50383f4Sdv142724 			}
1109e50383f4Sdv142724 		}
1110e50383f4Sdv142724 	}
1111e50383f4Sdv142724 
1112e50383f4Sdv142724 	if ((lock = ipc_lookup(msq_svc, msqid, (kipc_perm_t **)&qp)) == NULL) {
1113e50383f4Sdv142724 		error = EINVAL;
1114e50383f4Sdv142724 		goto msgsnd_out;
1115e50383f4Sdv142724 	}
1116e50383f4Sdv142724 
11177c478bd9Sstevel@tonic-gate 	ipc_hold(msq_svc, (kipc_perm_t *)qp);
11187c478bd9Sstevel@tonic-gate 
11197c478bd9Sstevel@tonic-gate 	if (msgsz > qp->msg_qbytes) {
11207c478bd9Sstevel@tonic-gate 		error = EINVAL;
11217c478bd9Sstevel@tonic-gate 		goto msgsnd_out;
11227c478bd9Sstevel@tonic-gate 	}
11237c478bd9Sstevel@tonic-gate 
11247c478bd9Sstevel@tonic-gate 	if (error = ipcperm_access(&qp->msg_perm, MSG_W, CRED()))
11257c478bd9Sstevel@tonic-gate 		goto msgsnd_out;
11267c478bd9Sstevel@tonic-gate 
11277c478bd9Sstevel@tonic-gate top:
11287c478bd9Sstevel@tonic-gate 	/*
11297c478bd9Sstevel@tonic-gate 	 * Allocate space on q, message header, & buffer space.
11307c478bd9Sstevel@tonic-gate 	 */
11317c478bd9Sstevel@tonic-gate 	ASSERT(qp->msg_qnum <= qp->msg_qmax);
11327c478bd9Sstevel@tonic-gate 	while ((msgsz > qp->msg_qbytes - qp->msg_cbytes) ||
11337c478bd9Sstevel@tonic-gate 	    (qp->msg_qnum == qp->msg_qmax)) {
11347c478bd9Sstevel@tonic-gate 		int cvres;
11357c478bd9Sstevel@tonic-gate 
11367c478bd9Sstevel@tonic-gate 		if (msgflg & IPC_NOWAIT) {
11377c478bd9Sstevel@tonic-gate 			error = EAGAIN;
11387c478bd9Sstevel@tonic-gate 			goto msgsnd_out;
11397c478bd9Sstevel@tonic-gate 		}
11407c478bd9Sstevel@tonic-gate 
11417c478bd9Sstevel@tonic-gate 		qp->msg_snd_cnt++;
11427c478bd9Sstevel@tonic-gate 		cvres = cv_wait_sig(&qp->msg_snd_cv, lock);
11437c478bd9Sstevel@tonic-gate 		lock = ipc_relock(msq_svc, qp->msg_perm.ipc_id, lock);
11447c478bd9Sstevel@tonic-gate 		qp->msg_snd_cnt--;
11457c478bd9Sstevel@tonic-gate 
1146*2c5b6df1Sdv142724 		if (error = msgq_check_err(qp, cvres)) {
11477c478bd9Sstevel@tonic-gate 			goto msgsnd_out;
11487c478bd9Sstevel@tonic-gate 		}
11497c478bd9Sstevel@tonic-gate 	}
11507c478bd9Sstevel@tonic-gate 
11517c478bd9Sstevel@tonic-gate 	if (mp == NULL) {
11527c478bd9Sstevel@tonic-gate 		int failure;
11537c478bd9Sstevel@tonic-gate 
11547c478bd9Sstevel@tonic-gate 		mutex_exit(lock);
1155e50383f4Sdv142724 		ASSERT(msgsz > 0);
11567c478bd9Sstevel@tonic-gate 		mp = kmem_zalloc(sizeof (struct msg), KM_SLEEP);
1157e50383f4Sdv142724 		mp->msg_addr = kmem_alloc(msgsz, KM_SLEEP);
11587c478bd9Sstevel@tonic-gate 		mp->msg_size = msgsz;
11597c478bd9Sstevel@tonic-gate 		mp->msg_copycnt = 1;
11607c478bd9Sstevel@tonic-gate 
1161e50383f4Sdv142724 		failure = (copyin(STRUCT_FADDR(umsgp, mtext),
11627c478bd9Sstevel@tonic-gate 		    mp->msg_addr, msgsz) == -1);
11637c478bd9Sstevel@tonic-gate 		lock = ipc_lock(msq_svc, qp->msg_perm.ipc_id);
11647c478bd9Sstevel@tonic-gate 		if (IPC_FREE(&qp->msg_perm)) {
11657c478bd9Sstevel@tonic-gate 			error = EIDRM;
11667c478bd9Sstevel@tonic-gate 			goto msgsnd_out;
11677c478bd9Sstevel@tonic-gate 		}
11687c478bd9Sstevel@tonic-gate 		if (failure) {
11697c478bd9Sstevel@tonic-gate 			error = EFAULT;
11707c478bd9Sstevel@tonic-gate 			goto msgsnd_out;
11717c478bd9Sstevel@tonic-gate 		}
11727c478bd9Sstevel@tonic-gate 		goto top;
11737c478bd9Sstevel@tonic-gate 	}
11747c478bd9Sstevel@tonic-gate 
11757c478bd9Sstevel@tonic-gate 	/*
11767c478bd9Sstevel@tonic-gate 	 * Everything is available, put msg on q.
11777c478bd9Sstevel@tonic-gate 	 */
11787c478bd9Sstevel@tonic-gate 	qp->msg_qnum++;
11797c478bd9Sstevel@tonic-gate 	qp->msg_cbytes += msgsz;
11807c478bd9Sstevel@tonic-gate 	qp->msg_lspid = curproc->p_pid;
11817c478bd9Sstevel@tonic-gate 	qp->msg_stime = gethrestime_sec();
11827c478bd9Sstevel@tonic-gate 	mp->msg_type = type;
1183*2c5b6df1Sdv142724 	if (qp->msg_lowest_type > type)
1184*2c5b6df1Sdv142724 		qp->msg_lowest_type = type;
11857c478bd9Sstevel@tonic-gate 	list_insert_tail(&qp->msg_list, mp);
1186b2eb1770Sudpa 	/*
1187*2c5b6df1Sdv142724 	 * Get the proper receiver going.
1188b2eb1770Sudpa 	 */
1189*2c5b6df1Sdv142724 	msg_wakeup_rdr(qp, &qp->msg_fnd_sndr, type);
11907c478bd9Sstevel@tonic-gate 
11917c478bd9Sstevel@tonic-gate msgsnd_out:
1192e50383f4Sdv142724 	if (lock)
11937c478bd9Sstevel@tonic-gate 		ipc_rele(msq_svc, (kipc_perm_t *)qp);	/* drops lock */
11947c478bd9Sstevel@tonic-gate 
11957c478bd9Sstevel@tonic-gate 	if (error) {
11967c478bd9Sstevel@tonic-gate 		if (mp)
11977c478bd9Sstevel@tonic-gate 			msg_rele(mp);
11987c478bd9Sstevel@tonic-gate 		return (set_errno(error));
11997c478bd9Sstevel@tonic-gate 	}
12007c478bd9Sstevel@tonic-gate 
12017c478bd9Sstevel@tonic-gate 	return (0);
12027c478bd9Sstevel@tonic-gate }
12037c478bd9Sstevel@tonic-gate 
1204*2c5b6df1Sdv142724 static void
1205*2c5b6df1Sdv142724 msg_wakeup_rdr(kmsqid_t *qp, msg_select_t **flist, long type)
1206*2c5b6df1Sdv142724 {
1207*2c5b6df1Sdv142724 	msg_select_t	*walker = *flist;
1208*2c5b6df1Sdv142724 	msgq_wakeup_t	*wakeup;
1209*2c5b6df1Sdv142724 	ulong_t		msg_hash;
1210*2c5b6df1Sdv142724 
1211*2c5b6df1Sdv142724 	msg_hash = msg_type_hash(type);
1212*2c5b6df1Sdv142724 
1213*2c5b6df1Sdv142724 	do {
1214*2c5b6df1Sdv142724 		wakeup = walker->selection(qp, msg_hash, type);
1215*2c5b6df1Sdv142724 		walker = walker->next_selection;
1216*2c5b6df1Sdv142724 	} while (!wakeup && walker != *flist);
1217*2c5b6df1Sdv142724 
1218*2c5b6df1Sdv142724 	*flist = (*flist)->next_selection;
1219*2c5b6df1Sdv142724 	if (wakeup) {
1220*2c5b6df1Sdv142724 		if (type) {
1221*2c5b6df1Sdv142724 			wakeup->msgw_snd_wake = type;
1222*2c5b6df1Sdv142724 		}
1223*2c5b6df1Sdv142724 		cv_signal(&wakeup->msgw_wake_cv);
1224*2c5b6df1Sdv142724 	}
1225*2c5b6df1Sdv142724 }
1226*2c5b6df1Sdv142724 
1227*2c5b6df1Sdv142724 static ulong_t
1228*2c5b6df1Sdv142724 msg_type_hash(long msg_type)
1229*2c5b6df1Sdv142724 {
1230*2c5b6df1Sdv142724 	long	temp;
1231*2c5b6df1Sdv142724 	ulong_t	hash;
1232*2c5b6df1Sdv142724 
1233*2c5b6df1Sdv142724 	if (msg_type < 0) {
1234*2c5b6df1Sdv142724 		/*
1235*2c5b6df1Sdv142724 		 * Negative message types are hashed over an
1236*2c5b6df1Sdv142724 		 * interval.  Any message type that hashes
1237*2c5b6df1Sdv142724 		 * beyond MSG_MAX_QNUM is automatically placed
1238*2c5b6df1Sdv142724 		 * in the last bucket.
1239*2c5b6df1Sdv142724 		 */
1240*2c5b6df1Sdv142724 		temp = -msg_type;
1241*2c5b6df1Sdv142724 		hash = temp / MSG_NEG_INTERVAL;
1242*2c5b6df1Sdv142724 		if (hash > MSG_MAX_QNUM) {
1243*2c5b6df1Sdv142724 			hash = MSG_MAX_QNUM;
1244*2c5b6df1Sdv142724 		}
1245*2c5b6df1Sdv142724 		return (hash);
1246*2c5b6df1Sdv142724 	}
1247*2c5b6df1Sdv142724 
1248*2c5b6df1Sdv142724 	/*
1249*2c5b6df1Sdv142724 	 * 0 or positive message type.  The first bucket is reserved for
1250*2c5b6df1Sdv142724 	 * message receivers of type 0, the other buckets we hash into.
1251*2c5b6df1Sdv142724 	 */
1252*2c5b6df1Sdv142724 	if (msg_type) {
1253*2c5b6df1Sdv142724 		return (1 + (msg_type % (MSG_MAX_QNUM)));
1254*2c5b6df1Sdv142724 	}
1255*2c5b6df1Sdv142724 	return (0);
1256*2c5b6df1Sdv142724 }
1257*2c5b6df1Sdv142724 
1258*2c5b6df1Sdv142724 /*
1259*2c5b6df1Sdv142724  * Routines to see if we have a receiver of type 0 either blocked waiting
1260*2c5b6df1Sdv142724  * for a message.  Simply return the first guy on the list.
1261*2c5b6df1Sdv142724  */
1262*2c5b6df1Sdv142724 
1263*2c5b6df1Sdv142724 static msgq_wakeup_t *
1264*2c5b6df1Sdv142724 /* LINTED */
1265*2c5b6df1Sdv142724 msg_fnd_any_snd(kmsqid_t *qp, int msg_hash, long type)
1266*2c5b6df1Sdv142724 {
1267*2c5b6df1Sdv142724 	return (list_head(&qp->msg_wait_snd[0]));
1268*2c5b6df1Sdv142724 }
1269*2c5b6df1Sdv142724 
1270*2c5b6df1Sdv142724 static msgq_wakeup_t *
1271*2c5b6df1Sdv142724 /* LINTED */
1272*2c5b6df1Sdv142724 msg_fnd_any_rdr(kmsqid_t *qp, int msg_hash, long type)
1273*2c5b6df1Sdv142724 {
1274*2c5b6df1Sdv142724 	return (list_head(&qp->msg_cpy_block));
1275*2c5b6df1Sdv142724 }
1276*2c5b6df1Sdv142724 
1277*2c5b6df1Sdv142724 static msgq_wakeup_t *
1278*2c5b6df1Sdv142724 msg_fnd_spc_snd(kmsqid_t *qp, int msg_hash, long type)
1279*2c5b6df1Sdv142724 {
1280*2c5b6df1Sdv142724 	msgq_wakeup_t	*walker;
1281*2c5b6df1Sdv142724 
1282*2c5b6df1Sdv142724 	walker = list_head(&qp->msg_wait_snd[msg_hash]);
1283*2c5b6df1Sdv142724 
1284*2c5b6df1Sdv142724 	while (walker && walker->msgw_type != type &&
1285*2c5b6df1Sdv142724 	    (walker = list_next(&qp->msg_wait_snd[msg_hash], walker)));
1286*2c5b6df1Sdv142724 	return (walker);
1287*2c5b6df1Sdv142724 }
1288*2c5b6df1Sdv142724 
1289*2c5b6df1Sdv142724 static msgq_wakeup_t *
1290*2c5b6df1Sdv142724 /* LINTED */
1291*2c5b6df1Sdv142724 msg_fnd_neg_snd(kmsqid_t *qp, int msg_hash, long type)
1292*2c5b6df1Sdv142724 {
1293*2c5b6df1Sdv142724 	msgq_wakeup_t	*qptr;
1294*2c5b6df1Sdv142724 	int		count;
1295*2c5b6df1Sdv142724 	int		check_index;
1296*2c5b6df1Sdv142724 	int		neg_index;
1297*2c5b6df1Sdv142724 	int		nbuckets;
1298*2c5b6df1Sdv142724 
1299*2c5b6df1Sdv142724 	if (!qp->msg_ngt_cnt) {
1300*2c5b6df1Sdv142724 		return (NULL);
1301*2c5b6df1Sdv142724 	}
1302*2c5b6df1Sdv142724 	neg_index = msg_type_hash(-type);
1303*2c5b6df1Sdv142724 
1304*2c5b6df1Sdv142724 	/*
1305*2c5b6df1Sdv142724 	 * Check for a match among the negative type queues.  Any buckets
1306*2c5b6df1Sdv142724 	 * at neg_index or larger can match the type.  Use the last send
1307*2c5b6df1Sdv142724 	 * time to randomize the starting bucket to prevent starvation.
1308*2c5b6df1Sdv142724 	 * Search all buckets from neg_index to MSG_MAX_QNUM, starting
1309*2c5b6df1Sdv142724 	 * from the random starting point, and wrapping around after
1310*2c5b6df1Sdv142724 	 * MSG_MAX_QNUM.
1311*2c5b6df1Sdv142724 	 */
1312*2c5b6df1Sdv142724 
1313*2c5b6df1Sdv142724 	nbuckets = MSG_MAX_QNUM - neg_index + 1;
1314*2c5b6df1Sdv142724 	check_index = neg_index + (qp->msg_stime % nbuckets);
1315*2c5b6df1Sdv142724 
1316*2c5b6df1Sdv142724 	for (count = nbuckets; count > 0; count--) {
1317*2c5b6df1Sdv142724 		qptr = list_head(&qp->msg_wait_snd_ngt[check_index]);
1318*2c5b6df1Sdv142724 		while (qptr) {
1319*2c5b6df1Sdv142724 			/*
1320*2c5b6df1Sdv142724 			 * The lowest hash bucket may actually contain
1321*2c5b6df1Sdv142724 			 * message types that are not valid for this
1322*2c5b6df1Sdv142724 			 * request.  This can happen due to the fact that
1323*2c5b6df1Sdv142724 			 * the message buckets actually contain a consecutive
1324*2c5b6df1Sdv142724 			 * range of types.
1325*2c5b6df1Sdv142724 			 */
1326*2c5b6df1Sdv142724 			if (-qptr->msgw_type >= type) {
1327*2c5b6df1Sdv142724 				return (qptr);
1328*2c5b6df1Sdv142724 			}
1329*2c5b6df1Sdv142724 			qptr = list_next(&qp->msg_wait_snd_ngt[msg_hash], qptr);
1330*2c5b6df1Sdv142724 		}
1331*2c5b6df1Sdv142724 
1332*2c5b6df1Sdv142724 		if (++check_index > MSG_MAX_QNUM) {
1333*2c5b6df1Sdv142724 			check_index = neg_index;
1334*2c5b6df1Sdv142724 		}
1335*2c5b6df1Sdv142724 	}
1336*2c5b6df1Sdv142724 	return (NULL);
1337*2c5b6df1Sdv142724 }
1338*2c5b6df1Sdv142724 
1339*2c5b6df1Sdv142724 static int
1340*2c5b6df1Sdv142724 msg_rcvq_sleep(list_t *queue, msgq_wakeup_t *entry, kmutex_t **lock,
1341*2c5b6df1Sdv142724     kmsqid_t *qp)
1342*2c5b6df1Sdv142724 {
1343*2c5b6df1Sdv142724 	int		cvres;
1344*2c5b6df1Sdv142724 
1345*2c5b6df1Sdv142724 	cv_init(&entry->msgw_wake_cv, NULL, 0, NULL);
1346*2c5b6df1Sdv142724 
1347*2c5b6df1Sdv142724 	list_insert_tail(queue, entry);
1348*2c5b6df1Sdv142724 
1349*2c5b6df1Sdv142724 	qp->msg_rcv_cnt++;
1350*2c5b6df1Sdv142724 	cvres = cv_wait_sig(&entry->msgw_wake_cv, *lock);
1351*2c5b6df1Sdv142724 	*lock = ipc_relock(msq_svc, qp->msg_perm.ipc_id, *lock);
1352*2c5b6df1Sdv142724 	qp->msg_rcv_cnt--;
1353*2c5b6df1Sdv142724 	/*
1354*2c5b6df1Sdv142724 	 * We have woken up, so remove ourselves from the waiter list.
1355*2c5b6df1Sdv142724 	 */
1356*2c5b6df1Sdv142724 	if (!IPC_FREE(&qp->msg_perm)) {
1357*2c5b6df1Sdv142724 		list_remove(queue, entry);
1358*2c5b6df1Sdv142724 	}
1359*2c5b6df1Sdv142724 
1360*2c5b6df1Sdv142724 	return (cvres);
1361*2c5b6df1Sdv142724 }
1362*2c5b6df1Sdv142724 
1363*2c5b6df1Sdv142724 static void
1364*2c5b6df1Sdv142724 msg_rcvq_wakeup_all(list_t *q_ptr)
1365*2c5b6df1Sdv142724 {
1366*2c5b6df1Sdv142724 	msgq_wakeup_t	*q_walk;
1367*2c5b6df1Sdv142724 
1368*2c5b6df1Sdv142724 	q_walk = (msgq_wakeup_t *)list_head(q_ptr);
1369*2c5b6df1Sdv142724 	while (q_walk) {
1370*2c5b6df1Sdv142724 		/*
1371*2c5b6df1Sdv142724 		 * Walk the entire list, wake every process up.
1372*2c5b6df1Sdv142724 		 */
1373*2c5b6df1Sdv142724 		cv_signal(&q_walk->msgw_wake_cv);
1374*2c5b6df1Sdv142724 		q_walk = list_next(q_ptr, q_walk);
1375*2c5b6df1Sdv142724 	}
1376*2c5b6df1Sdv142724 }
1377*2c5b6df1Sdv142724 
13787c478bd9Sstevel@tonic-gate /*
13797c478bd9Sstevel@tonic-gate  * msgsys - System entry point for msgctl, msgget, msgrcv, and msgsnd
13807c478bd9Sstevel@tonic-gate  * system calls.
13817c478bd9Sstevel@tonic-gate  */
13827c478bd9Sstevel@tonic-gate static ssize_t
13837c478bd9Sstevel@tonic-gate msgsys(int opcode, uintptr_t a1, uintptr_t a2, uintptr_t a3,
13847c478bd9Sstevel@tonic-gate 	uintptr_t a4, uintptr_t a5)
13857c478bd9Sstevel@tonic-gate {
13867c478bd9Sstevel@tonic-gate 	ssize_t error;
13877c478bd9Sstevel@tonic-gate 
13887c478bd9Sstevel@tonic-gate 	switch (opcode) {
13897c478bd9Sstevel@tonic-gate 	case MSGGET:
13907c478bd9Sstevel@tonic-gate 		error = msgget((key_t)a1, (int)a2);
13917c478bd9Sstevel@tonic-gate 		break;
13927c478bd9Sstevel@tonic-gate 	case MSGCTL:
13937c478bd9Sstevel@tonic-gate 		error = msgctl((int)a1, (int)a2, (void *)a3);
13947c478bd9Sstevel@tonic-gate 		break;
13957c478bd9Sstevel@tonic-gate 	case MSGRCV:
13967c478bd9Sstevel@tonic-gate 		error = msgrcv((int)a1, (struct ipcmsgbuf *)a2,
13977c478bd9Sstevel@tonic-gate 		    (size_t)a3, (long)a4, (int)a5);
13987c478bd9Sstevel@tonic-gate 		break;
13997c478bd9Sstevel@tonic-gate 	case MSGSND:
14007c478bd9Sstevel@tonic-gate 		error = msgsnd((int)a1, (struct ipcmsgbuf *)a2,
14017c478bd9Sstevel@tonic-gate 		    (size_t)a3, (int)a4);
14027c478bd9Sstevel@tonic-gate 		break;
14037c478bd9Sstevel@tonic-gate 	case MSGIDS:
14047c478bd9Sstevel@tonic-gate 		error = msgids((int *)a1, (uint_t)a2, (uint_t *)a3);
14057c478bd9Sstevel@tonic-gate 		break;
14067c478bd9Sstevel@tonic-gate 	case MSGSNAP:
14077c478bd9Sstevel@tonic-gate 		error = msgsnap((int)a1, (caddr_t)a2, (size_t)a3, (long)a4);
14087c478bd9Sstevel@tonic-gate 		break;
14097c478bd9Sstevel@tonic-gate 	default:
14107c478bd9Sstevel@tonic-gate 		error = set_errno(EINVAL);
14117c478bd9Sstevel@tonic-gate 		break;
14127c478bd9Sstevel@tonic-gate 	}
14137c478bd9Sstevel@tonic-gate 
14147c478bd9Sstevel@tonic-gate 	return (error);
14157c478bd9Sstevel@tonic-gate }
14167c478bd9Sstevel@tonic-gate 
14177c478bd9Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
14187c478bd9Sstevel@tonic-gate /*
14197c478bd9Sstevel@tonic-gate  * msgsys32 - System entry point for msgctl, msgget, msgrcv, and msgsnd
14207c478bd9Sstevel@tonic-gate  * system calls for 32-bit callers on LP64 kernel.
14217c478bd9Sstevel@tonic-gate  */
14227c478bd9Sstevel@tonic-gate static ssize32_t
14237c478bd9Sstevel@tonic-gate msgsys32(int opcode, uint32_t a1, uint32_t a2, uint32_t a3,
14247c478bd9Sstevel@tonic-gate 	uint32_t a4, uint32_t a5)
14257c478bd9Sstevel@tonic-gate {
14267c478bd9Sstevel@tonic-gate 	ssize_t error;
14277c478bd9Sstevel@tonic-gate 
14287c478bd9Sstevel@tonic-gate 	switch (opcode) {
14297c478bd9Sstevel@tonic-gate 	case MSGGET:
14307c478bd9Sstevel@tonic-gate 		error = msgget((key_t)a1, (int)a2);
14317c478bd9Sstevel@tonic-gate 		break;
14327c478bd9Sstevel@tonic-gate 	case MSGCTL:
14337c478bd9Sstevel@tonic-gate 		error = msgctl((int)a1, (int)a2, (void *)(uintptr_t)a3);
14347c478bd9Sstevel@tonic-gate 		break;
14357c478bd9Sstevel@tonic-gate 	case MSGRCV:
14367c478bd9Sstevel@tonic-gate 		error = msgrcv((int)a1, (struct ipcmsgbuf *)(uintptr_t)a2,
14377c478bd9Sstevel@tonic-gate 		    (size_t)a3, (long)(int32_t)a4, (int)a5);
14387c478bd9Sstevel@tonic-gate 		break;
14397c478bd9Sstevel@tonic-gate 	case MSGSND:
14407c478bd9Sstevel@tonic-gate 		error = msgsnd((int)a1, (struct ipcmsgbuf *)(uintptr_t)a2,
14417c478bd9Sstevel@tonic-gate 		    (size_t)(int32_t)a3, (int)a4);
14427c478bd9Sstevel@tonic-gate 		break;
14437c478bd9Sstevel@tonic-gate 	case MSGIDS:
14447c478bd9Sstevel@tonic-gate 		error = msgids((int *)(uintptr_t)a1, (uint_t)a2,
14457c478bd9Sstevel@tonic-gate 		    (uint_t *)(uintptr_t)a3);
14467c478bd9Sstevel@tonic-gate 		break;
14477c478bd9Sstevel@tonic-gate 	case MSGSNAP:
14487c478bd9Sstevel@tonic-gate 		error = msgsnap((int)a1, (caddr_t)(uintptr_t)a2, (size_t)a3,
14497c478bd9Sstevel@tonic-gate 		    (long)(int32_t)a4);
14507c478bd9Sstevel@tonic-gate 		break;
14517c478bd9Sstevel@tonic-gate 	default:
14527c478bd9Sstevel@tonic-gate 		error = set_errno(EINVAL);
14537c478bd9Sstevel@tonic-gate 		break;
14547c478bd9Sstevel@tonic-gate 	}
14557c478bd9Sstevel@tonic-gate 
14567c478bd9Sstevel@tonic-gate 	return (error);
14577c478bd9Sstevel@tonic-gate }
14587c478bd9Sstevel@tonic-gate #endif	/* SYSCALL32_IMPL */
1459