17c478bd9Sstevel@tonic-gate /* 27c478bd9Sstevel@tonic-gate * CDDL HEADER START 37c478bd9Sstevel@tonic-gate * 47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5b2eb1770Sudpa * Common Development and Distribution License (the "License"). 6b2eb1770Sudpa * You may not use this file except in compliance with the License. 77c478bd9Sstevel@tonic-gate * 87c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 97c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 107c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 117c478bd9Sstevel@tonic-gate * and limitations under the License. 127c478bd9Sstevel@tonic-gate * 137c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 147c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 157c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 167c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 177c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 187c478bd9Sstevel@tonic-gate * 197c478bd9Sstevel@tonic-gate * CDDL HEADER END 207c478bd9Sstevel@tonic-gate */ 217c478bd9Sstevel@tonic-gate /* 22*005d3febSMarek Pospisil * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 237c478bd9Sstevel@tonic-gate * Use is subject to license terms. 247c478bd9Sstevel@tonic-gate */ 257c478bd9Sstevel@tonic-gate 267c478bd9Sstevel@tonic-gate /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 277c478bd9Sstevel@tonic-gate /* All Rights Reserved */ 287c478bd9Sstevel@tonic-gate 297c478bd9Sstevel@tonic-gate 307c478bd9Sstevel@tonic-gate /* 317c478bd9Sstevel@tonic-gate * Inter-Process Communication Message Facility. 327c478bd9Sstevel@tonic-gate * 337c478bd9Sstevel@tonic-gate * See os/ipc.c for a description of common IPC functionality. 347c478bd9Sstevel@tonic-gate * 357c478bd9Sstevel@tonic-gate * Resource controls 367c478bd9Sstevel@tonic-gate * ----------------- 377c478bd9Sstevel@tonic-gate * 38824c205fSml93401 * Control: zone.max-msg-ids (rc_zone_msgmni) 39824c205fSml93401 * Description: Maximum number of message queue ids allowed a zone. 40824c205fSml93401 * 41824c205fSml93401 * When msgget() is used to allocate a message queue, one id is 42824c205fSml93401 * allocated. If the id allocation doesn't succeed, msgget() fails 43824c205fSml93401 * and errno is set to ENOSPC. Upon successful msgctl(, IPC_RMID) 44824c205fSml93401 * the id is deallocated. 45824c205fSml93401 * 467c478bd9Sstevel@tonic-gate * Control: project.max-msg-ids (rc_project_msgmni) 477c478bd9Sstevel@tonic-gate * Description: Maximum number of message queue ids allowed a project. 487c478bd9Sstevel@tonic-gate * 497c478bd9Sstevel@tonic-gate * When msgget() is used to allocate a message queue, one id is 507c478bd9Sstevel@tonic-gate * allocated. If the id allocation doesn't succeed, msgget() fails 517c478bd9Sstevel@tonic-gate * and errno is set to ENOSPC. Upon successful msgctl(, IPC_RMID) 527c478bd9Sstevel@tonic-gate * the id is deallocated. 537c478bd9Sstevel@tonic-gate * 547c478bd9Sstevel@tonic-gate * Control: process.max-msg-qbytes (rc_process_msgmnb) 557c478bd9Sstevel@tonic-gate * Description: Maximum number of bytes of messages on a message queue. 567c478bd9Sstevel@tonic-gate * 577c478bd9Sstevel@tonic-gate * When msgget() successfully allocates a message queue, the minimum 587c478bd9Sstevel@tonic-gate * enforced value of this limit is used to initialize msg_qbytes. 597c478bd9Sstevel@tonic-gate * 607c478bd9Sstevel@tonic-gate * Control: process.max-msg-messages (rc_process_msgtql) 617c478bd9Sstevel@tonic-gate * Description: Maximum number of messages on a message queue. 627c478bd9Sstevel@tonic-gate * 637c478bd9Sstevel@tonic-gate * When msgget() successfully allocates a message queue, the minimum 647c478bd9Sstevel@tonic-gate * enforced value of this limit is used to initialize a per-queue 657c478bd9Sstevel@tonic-gate * limit on the number of messages. 667c478bd9Sstevel@tonic-gate */ 677c478bd9Sstevel@tonic-gate 687c478bd9Sstevel@tonic-gate #include <sys/types.h> 697c478bd9Sstevel@tonic-gate #include <sys/t_lock.h> 707c478bd9Sstevel@tonic-gate #include <sys/param.h> 717c478bd9Sstevel@tonic-gate #include <sys/cred.h> 727c478bd9Sstevel@tonic-gate #include <sys/user.h> 737c478bd9Sstevel@tonic-gate #include <sys/proc.h> 747c478bd9Sstevel@tonic-gate #include <sys/time.h> 757c478bd9Sstevel@tonic-gate #include <sys/ipc.h> 767c478bd9Sstevel@tonic-gate #include <sys/ipc_impl.h> 777c478bd9Sstevel@tonic-gate #include <sys/msg.h> 787c478bd9Sstevel@tonic-gate #include <sys/msg_impl.h> 797c478bd9Sstevel@tonic-gate #include <sys/list.h> 807c478bd9Sstevel@tonic-gate #include <sys/systm.h> 817c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h> 827c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h> 837c478bd9Sstevel@tonic-gate #include <sys/kmem.h> 847c478bd9Sstevel@tonic-gate #include <sys/ddi.h> 857c478bd9Sstevel@tonic-gate #include <sys/errno.h> 867c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h> 877c478bd9Sstevel@tonic-gate #include <sys/debug.h> 887c478bd9Sstevel@tonic-gate #include <sys/project.h> 897c478bd9Sstevel@tonic-gate #include <sys/modctl.h> 907c478bd9Sstevel@tonic-gate #include <sys/syscall.h> 917c478bd9Sstevel@tonic-gate #include <sys/policy.h> 927c478bd9Sstevel@tonic-gate #include <sys/zone.h> 937c478bd9Sstevel@tonic-gate 947c478bd9Sstevel@tonic-gate #include <c2/audit.h> 957c478bd9Sstevel@tonic-gate 967c478bd9Sstevel@tonic-gate /* 977c478bd9Sstevel@tonic-gate * The following tunables are obsolete. Though for compatibility we 987c478bd9Sstevel@tonic-gate * still read and interpret msginfo_msgmnb, msginfo_msgmni, and 997c478bd9Sstevel@tonic-gate * msginfo_msgtql (see os/project.c and os/rctl_proc.c), the preferred 1007c478bd9Sstevel@tonic-gate * mechanism for administrating the IPC Message facility is through the 1017c478bd9Sstevel@tonic-gate * resource controls described at the top of this file. 1027c478bd9Sstevel@tonic-gate */ 1037c478bd9Sstevel@tonic-gate size_t msginfo_msgmax = 2048; /* (obsolete) */ 1047c478bd9Sstevel@tonic-gate size_t msginfo_msgmnb = 4096; /* (obsolete) */ 1057c478bd9Sstevel@tonic-gate int msginfo_msgmni = 50; /* (obsolete) */ 1067c478bd9Sstevel@tonic-gate int msginfo_msgtql = 40; /* (obsolete) */ 1077c478bd9Sstevel@tonic-gate int msginfo_msgssz = 8; /* (obsolete) */ 1087c478bd9Sstevel@tonic-gate int msginfo_msgmap = 0; /* (obsolete) */ 1097c478bd9Sstevel@tonic-gate ushort_t msginfo_msgseg = 1024; /* (obsolete) */ 1107c478bd9Sstevel@tonic-gate 111824c205fSml93401 extern rctl_hndl_t rc_zone_msgmni; 1127c478bd9Sstevel@tonic-gate extern rctl_hndl_t rc_project_msgmni; 1137c478bd9Sstevel@tonic-gate extern rctl_hndl_t rc_process_msgmnb; 1147c478bd9Sstevel@tonic-gate extern rctl_hndl_t rc_process_msgtql; 1157c478bd9Sstevel@tonic-gate static ipc_service_t *msq_svc; 1167c478bd9Sstevel@tonic-gate static zone_key_t msg_zone_key; 1177c478bd9Sstevel@tonic-gate 1187c478bd9Sstevel@tonic-gate static void msg_dtor(kipc_perm_t *); 1197c478bd9Sstevel@tonic-gate static void msg_rmid(kipc_perm_t *); 1207c478bd9Sstevel@tonic-gate static void msg_remove_zone(zoneid_t, void *); 1217c478bd9Sstevel@tonic-gate 1227c478bd9Sstevel@tonic-gate /* 1237c478bd9Sstevel@tonic-gate * Module linkage information for the kernel. 1247c478bd9Sstevel@tonic-gate */ 1257c478bd9Sstevel@tonic-gate static ssize_t msgsys(int opcode, uintptr_t a0, uintptr_t a1, uintptr_t a2, 1267c478bd9Sstevel@tonic-gate uintptr_t a4, uintptr_t a5); 1277c478bd9Sstevel@tonic-gate 1287c478bd9Sstevel@tonic-gate static struct sysent ipcmsg_sysent = { 1297c478bd9Sstevel@tonic-gate 6, 1307c478bd9Sstevel@tonic-gate #ifdef _LP64 1317c478bd9Sstevel@tonic-gate SE_ARGC | SE_NOUNLOAD | SE_64RVAL, 1327c478bd9Sstevel@tonic-gate #else 1337c478bd9Sstevel@tonic-gate SE_ARGC | SE_NOUNLOAD | SE_32RVAL1, 1347c478bd9Sstevel@tonic-gate #endif 1357c478bd9Sstevel@tonic-gate (int (*)())msgsys 1367c478bd9Sstevel@tonic-gate }; 1377c478bd9Sstevel@tonic-gate 1387c478bd9Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL 1397c478bd9Sstevel@tonic-gate static ssize32_t msgsys32(int opcode, uint32_t a0, uint32_t a1, uint32_t a2, 1407c478bd9Sstevel@tonic-gate uint32_t a4, uint32_t a5); 1417c478bd9Sstevel@tonic-gate 1427c478bd9Sstevel@tonic-gate static struct sysent ipcmsg_sysent32 = { 1437c478bd9Sstevel@tonic-gate 6, 1447c478bd9Sstevel@tonic-gate SE_ARGC | SE_NOUNLOAD | SE_32RVAL1, 1457c478bd9Sstevel@tonic-gate (int (*)())msgsys32 1467c478bd9Sstevel@tonic-gate }; 1477c478bd9Sstevel@tonic-gate #endif /* _SYSCALL32_IMPL */ 1487c478bd9Sstevel@tonic-gate 1497c478bd9Sstevel@tonic-gate static struct modlsys modlsys = { 1507c478bd9Sstevel@tonic-gate &mod_syscallops, "System V message facility", &ipcmsg_sysent 1517c478bd9Sstevel@tonic-gate }; 1527c478bd9Sstevel@tonic-gate 1537c478bd9Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL 1547c478bd9Sstevel@tonic-gate static struct modlsys modlsys32 = { 1557c478bd9Sstevel@tonic-gate &mod_syscallops32, "32-bit System V message facility", &ipcmsg_sysent32 1567c478bd9Sstevel@tonic-gate }; 1577c478bd9Sstevel@tonic-gate #endif 1587c478bd9Sstevel@tonic-gate 1592c5b6df1Sdv142724 /* 1602c5b6df1Sdv142724 * Big Theory statement for message queue correctness 1612c5b6df1Sdv142724 * 1622c5b6df1Sdv142724 * The msgrcv and msgsnd functions no longer uses cv_broadcast to wake up 1632c5b6df1Sdv142724 * receivers who are waiting for an event. Using the cv_broadcast method 1642c5b6df1Sdv142724 * resulted in negative scaling when the number of waiting receivers are large 1652c5b6df1Sdv142724 * (the thundering herd problem). Instead, the receivers waiting to receive a 1662c5b6df1Sdv142724 * message are now linked in a queue-like fashion and awaken one at a time in 1672c5b6df1Sdv142724 * a controlled manner. 1682c5b6df1Sdv142724 * 1692c5b6df1Sdv142724 * Receivers can block on two different classes of waiting list: 1702c5b6df1Sdv142724 * 1) "sendwait" list, which is the more complex list of the two. The 1712c5b6df1Sdv142724 * receiver will be awakened by a sender posting a new message. There 1722c5b6df1Sdv142724 * are two types of "sendwait" list used: 1732c5b6df1Sdv142724 * a) msg_wait_snd: handles all receivers who are looking for 1742c5b6df1Sdv142724 * a message type >= 0, but was unable to locate a match. 1752c5b6df1Sdv142724 * 1762c5b6df1Sdv142724 * slot 0: reserved for receivers that have designated they 1772c5b6df1Sdv142724 * will take any message type. 1782c5b6df1Sdv142724 * rest: consist of receivers requesting a specific type 1792c5b6df1Sdv142724 * but the type was not present. The entries are 1802c5b6df1Sdv142724 * hashed into a bucket in an attempt to keep 1812c5b6df1Sdv142724 * any list search relatively short. 1822c5b6df1Sdv142724 * b) msg_wait_snd_ngt: handles all receivers that have designated 1832c5b6df1Sdv142724 * a negative message type. Unlike msg_wait_snd, the hash bucket 1842c5b6df1Sdv142724 * serves a range of negative message types (-1 to -5, -6 to -10 1852c5b6df1Sdv142724 * and so forth), where the last bucket is reserved for all the 1862c5b6df1Sdv142724 * negative message types that hash outside of MSG_MAX_QNUM - 1. 1872c5b6df1Sdv142724 * This is done this way to simplify the operation of locating a 1882c5b6df1Sdv142724 * negative message type. 1892c5b6df1Sdv142724 * 1902c5b6df1Sdv142724 * 2) "copyout" list, where the receiver is awakened by another 1912c5b6df1Sdv142724 * receiver after a message is copied out. This is a linked list 1922c5b6df1Sdv142724 * of waiters that are awakened one at a time. Although the solution is 1932c5b6df1Sdv142724 * not optimal, the complexity that would be added in for waking 1942c5b6df1Sdv142724 * up the right entry far exceeds any potential pay back (too many 1952c5b6df1Sdv142724 * correctness and corner case issues). 1962c5b6df1Sdv142724 * 1972c5b6df1Sdv142724 * The lists are doubly linked. In the case of the "sendwait" 1982c5b6df1Sdv142724 * list, this allows the thread to remove itself from the list without having 1992c5b6df1Sdv142724 * to traverse the list. In the case of the "copyout" list it simply allows 2002c5b6df1Sdv142724 * us to use common functions with the "sendwait" list. 2012c5b6df1Sdv142724 * 2022c5b6df1Sdv142724 * To make sure receivers are not hung out to dry, we must guarantee: 2032c5b6df1Sdv142724 * 1. If any queued message matches any receiver, then at least one 2042c5b6df1Sdv142724 * matching receiver must be processing the request. 2052c5b6df1Sdv142724 * 2. Blocking on the copyout queue is only temporary while messages 2062c5b6df1Sdv142724 * are being copied out. The process is guaranted to wakeup 2072c5b6df1Sdv142724 * when it gets to front of the queue (copyout is a FIFO). 2082c5b6df1Sdv142724 * 2092c5b6df1Sdv142724 * Rules for blocking and waking up: 2102c5b6df1Sdv142724 * 1. A receiver entering msgrcv must examine all messages for a match 2112c5b6df1Sdv142724 * before blocking on a sendwait queue. 2122c5b6df1Sdv142724 * 2. If the receiver blocks because the message it chose is already 2132c5b6df1Sdv142724 * being copied out, then when it wakes up needs to start start 2142c5b6df1Sdv142724 * checking the messages from the beginning. 2152c5b6df1Sdv142724 * 3) When ever a process returns from msgrcv for any reason, if it 2162c5b6df1Sdv142724 * had attempted to copy a message or blocked waiting for a copy 2172c5b6df1Sdv142724 * to complete it needs to wakeup the next receiver blocked on 2182c5b6df1Sdv142724 * a copy out. 2192c5b6df1Sdv142724 * 4) When a message is sent, the sender selects a process waiting 2202c5b6df1Sdv142724 * for that type of message. This selection process rotates between 2212c5b6df1Sdv142724 * receivers types of 0, negative and positive to prevent starvation of 2222c5b6df1Sdv142724 * any one particular receiver type. 2232c5b6df1Sdv142724 * 5) The following are the scenarios for processes that are awakened 2242c5b6df1Sdv142724 * by a msgsnd: 2252c5b6df1Sdv142724 * a) The process finds the message and is able to copy 2262c5b6df1Sdv142724 * it out. Once complete, the process returns. 2272c5b6df1Sdv142724 * b) The message that was sent that triggered the wakeup is no 2282c5b6df1Sdv142724 * longer available (another process found the message first). 2292c5b6df1Sdv142724 * We issue a wakeup on copy queue and then go back to 2302c5b6df1Sdv142724 * sleep waiting for another matching message to be sent. 2312c5b6df1Sdv142724 * c) The message that was supposed to be processed was 2322c5b6df1Sdv142724 * already serviced by another process. However a different 2332c5b6df1Sdv142724 * message is present which we can service. The message 2342c5b6df1Sdv142724 * is copied and the process returns. 2352c5b6df1Sdv142724 * d) The message is found, but some sort of error occurs that 2362c5b6df1Sdv142724 * prevents the message from being copied. The receiver 2372c5b6df1Sdv142724 * wakes up the next sender that can service this message 2382c5b6df1Sdv142724 * type and returns an error to the caller. 2392c5b6df1Sdv142724 * e) The message is found, but it is marked as being copied 2402c5b6df1Sdv142724 * out. The receiver then goes to sleep on the copyout 2412c5b6df1Sdv142724 * queue where it will be awakened again sometime in the future. 2422c5b6df1Sdv142724 * 2432c5b6df1Sdv142724 * 2442c5b6df1Sdv142724 * 6) Whenever a message is found that matches the message type designated, 2452c5b6df1Sdv142724 * but is being copied out we have to block on the copyout queue. 2462c5b6df1Sdv142724 * After process copying finishes the copy out, it must wakeup (either 2472c5b6df1Sdv142724 * directly or indirectly) all receivers who blocked on its copyout, 2482c5b6df1Sdv142724 * so they are guaranteed a chance to examine the remaining messages. 2492c5b6df1Sdv142724 * This is implemented via a chain of wakeups: Y wakes X, who wakes Z, 2502c5b6df1Sdv142724 * and so on. The chain cannot be broken. This leads to the following 2512c5b6df1Sdv142724 * cases: 2522c5b6df1Sdv142724 * a) A receiver is finished copying the message (or encountered) 2532c5b6df1Sdv142724 * an error), the first entry on the copyout queue is woken 2542c5b6df1Sdv142724 * up. 2552c5b6df1Sdv142724 * b) When the receiver is woken up, it attempts to locate 2562c5b6df1Sdv142724 * a message type match. 2572c5b6df1Sdv142724 * c) If a message type is found and 2582c5b6df1Sdv142724 * -- MSG_RCVCOPY flag is not set, the message is 2592c5b6df1Sdv142724 * marked for copying out. Regardless of the copyout 2602c5b6df1Sdv142724 * success the next entry on the copyout queue is 2612c5b6df1Sdv142724 * awakened and the operation is completed. 2622c5b6df1Sdv142724 * -- MSG_RCVCOPY is set, we simply go back to sleep again 2632c5b6df1Sdv142724 * on the copyout queue. 2642c5b6df1Sdv142724 * d) If the message type is not found then we wakeup the next 2652c5b6df1Sdv142724 * process on the copyout queue. 266eb9fe4caSDavid Valin * 7) If a msgsnd is unable to complete for of any of the following reasons 267eb9fe4caSDavid Valin * a) the msgq has no space for the message 268eb9fe4caSDavid Valin * b) the maximum number of messages allowed has been reached 269eb9fe4caSDavid Valin * then one of two things happen: 270eb9fe4caSDavid Valin * 1) If the passed in msg_flag has IPC_NOWAIT set, then 271eb9fe4caSDavid Valin * an error is returned. 272eb9fe4caSDavid Valin * 2) The IPC_NOWAIT bit is not set in msg_flag, then the 273eb9fe4caSDavid Valin * the thread is placed to sleep until the request can be 274eb9fe4caSDavid Valin * serviced. 275eb9fe4caSDavid Valin * 8) When waking a thread waiting to send a message, a check is done to 276eb9fe4caSDavid Valin * verify that the operation being asked for by the thread will complete. 277eb9fe4caSDavid Valin * This decision making process is done in a loop where the oldest request 278eb9fe4caSDavid Valin * is checked first. The search will continue until there is no more 279eb9fe4caSDavid Valin * room on the msgq or we have checked all the waiters. 2802c5b6df1Sdv142724 */ 2812c5b6df1Sdv142724 282e5994f96Sdv142724 static uint_t msg_type_hash(long); 2832c5b6df1Sdv142724 static int msgq_check_err(kmsqid_t *qp, int cvres); 2842c5b6df1Sdv142724 static int msg_rcvq_sleep(list_t *, msgq_wakeup_t *, kmutex_t **, 2852c5b6df1Sdv142724 kmsqid_t *); 2862c5b6df1Sdv142724 static int msg_copyout(kmsqid_t *, long, kmutex_t **, size_t *, size_t, 2872c5b6df1Sdv142724 struct msg *, struct ipcmsgbuf *, int); 2882c5b6df1Sdv142724 static void msg_rcvq_wakeup_all(list_t *); 289eb9fe4caSDavid Valin static void msg_wakeup_senders(kmsqid_t *); 2902c5b6df1Sdv142724 static void msg_wakeup_rdr(kmsqid_t *, msg_select_t **, long); 2912c5b6df1Sdv142724 static msgq_wakeup_t *msg_fnd_any_snd(kmsqid_t *, int, long); 2922c5b6df1Sdv142724 static msgq_wakeup_t *msg_fnd_any_rdr(kmsqid_t *, int, long); 2932c5b6df1Sdv142724 static msgq_wakeup_t *msg_fnd_neg_snd(kmsqid_t *, int, long); 2942c5b6df1Sdv142724 static msgq_wakeup_t *msg_fnd_spc_snd(kmsqid_t *, int, long); 2952c5b6df1Sdv142724 static struct msg *msgrcv_lookup(kmsqid_t *, long); 2962c5b6df1Sdv142724 2972c5b6df1Sdv142724 msg_select_t msg_fnd_sndr[] = { 2982c5b6df1Sdv142724 { msg_fnd_any_snd, &msg_fnd_sndr[1] }, 2992c5b6df1Sdv142724 { msg_fnd_spc_snd, &msg_fnd_sndr[2] }, 3002c5b6df1Sdv142724 { msg_fnd_neg_snd, &msg_fnd_sndr[0] } 3012c5b6df1Sdv142724 }; 3022c5b6df1Sdv142724 3032c5b6df1Sdv142724 msg_select_t msg_fnd_rdr[1] = { 3042c5b6df1Sdv142724 { msg_fnd_any_rdr, &msg_fnd_rdr[0] }, 3052c5b6df1Sdv142724 }; 3062c5b6df1Sdv142724 3077c478bd9Sstevel@tonic-gate static struct modlinkage modlinkage = { 3087c478bd9Sstevel@tonic-gate MODREV_1, 3097c478bd9Sstevel@tonic-gate &modlsys, 3107c478bd9Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL 3117c478bd9Sstevel@tonic-gate &modlsys32, 3127c478bd9Sstevel@tonic-gate #endif 3137c478bd9Sstevel@tonic-gate NULL 3147c478bd9Sstevel@tonic-gate }; 3157c478bd9Sstevel@tonic-gate 316eb9fe4caSDavid Valin #define MSG_SMALL_INIT (size_t)-1 3177c478bd9Sstevel@tonic-gate int 3187c478bd9Sstevel@tonic-gate _init(void) 3197c478bd9Sstevel@tonic-gate { 3207c478bd9Sstevel@tonic-gate int result; 3217c478bd9Sstevel@tonic-gate 322824c205fSml93401 msq_svc = ipcs_create("msqids", rc_project_msgmni, rc_zone_msgmni, 323824c205fSml93401 sizeof (kmsqid_t), msg_dtor, msg_rmid, AT_IPC_MSG, 324824c205fSml93401 offsetof(ipc_rqty_t, ipcq_msgmni)); 3257c478bd9Sstevel@tonic-gate zone_key_create(&msg_zone_key, NULL, msg_remove_zone, NULL); 3267c478bd9Sstevel@tonic-gate 3277c478bd9Sstevel@tonic-gate if ((result = mod_install(&modlinkage)) == 0) 3287c478bd9Sstevel@tonic-gate return (0); 3297c478bd9Sstevel@tonic-gate 3307c478bd9Sstevel@tonic-gate (void) zone_key_delete(msg_zone_key); 3317c478bd9Sstevel@tonic-gate ipcs_destroy(msq_svc); 3327c478bd9Sstevel@tonic-gate 3337c478bd9Sstevel@tonic-gate return (result); 3347c478bd9Sstevel@tonic-gate } 3357c478bd9Sstevel@tonic-gate 3367c478bd9Sstevel@tonic-gate int 3377c478bd9Sstevel@tonic-gate _fini(void) 3387c478bd9Sstevel@tonic-gate { 3397c478bd9Sstevel@tonic-gate return (EBUSY); 3407c478bd9Sstevel@tonic-gate } 3417c478bd9Sstevel@tonic-gate 3427c478bd9Sstevel@tonic-gate int 3437c478bd9Sstevel@tonic-gate _info(struct modinfo *modinfop) 3447c478bd9Sstevel@tonic-gate { 3457c478bd9Sstevel@tonic-gate return (mod_info(&modlinkage, modinfop)); 3467c478bd9Sstevel@tonic-gate } 3477c478bd9Sstevel@tonic-gate 3487c478bd9Sstevel@tonic-gate static void 3497c478bd9Sstevel@tonic-gate msg_dtor(kipc_perm_t *perm) 3507c478bd9Sstevel@tonic-gate { 3517c478bd9Sstevel@tonic-gate kmsqid_t *qp = (kmsqid_t *)perm; 352b2eb1770Sudpa int ii; 3537c478bd9Sstevel@tonic-gate 3542c5b6df1Sdv142724 for (ii = 0; ii <= MSG_MAX_QNUM; ii++) { 3552c5b6df1Sdv142724 ASSERT(list_is_empty(&qp->msg_wait_snd[ii])); 3562c5b6df1Sdv142724 ASSERT(list_is_empty(&qp->msg_wait_snd_ngt[ii])); 3572c5b6df1Sdv142724 list_destroy(&qp->msg_wait_snd[ii]); 3582c5b6df1Sdv142724 list_destroy(&qp->msg_wait_snd_ngt[ii]); 3592c5b6df1Sdv142724 } 3602c5b6df1Sdv142724 ASSERT(list_is_empty(&qp->msg_cpy_block)); 361eb9fe4caSDavid Valin ASSERT(list_is_empty(&qp->msg_wait_rcv)); 3622c5b6df1Sdv142724 list_destroy(&qp->msg_cpy_block); 3637c478bd9Sstevel@tonic-gate ASSERT(qp->msg_snd_cnt == 0); 3647c478bd9Sstevel@tonic-gate ASSERT(qp->msg_cbytes == 0); 3657c478bd9Sstevel@tonic-gate list_destroy(&qp->msg_list); 366eb9fe4caSDavid Valin list_destroy(&qp->msg_wait_rcv); 3677c478bd9Sstevel@tonic-gate } 3687c478bd9Sstevel@tonic-gate 3697c478bd9Sstevel@tonic-gate 3707c478bd9Sstevel@tonic-gate #define msg_hold(mp) (mp)->msg_copycnt++ 3717c478bd9Sstevel@tonic-gate 3727c478bd9Sstevel@tonic-gate /* 3737c478bd9Sstevel@tonic-gate * msg_rele - decrement the reference count on the message. When count 3747c478bd9Sstevel@tonic-gate * reaches zero, free message header and contents. 3757c478bd9Sstevel@tonic-gate */ 3767c478bd9Sstevel@tonic-gate static void 3777c478bd9Sstevel@tonic-gate msg_rele(struct msg *mp) 3787c478bd9Sstevel@tonic-gate { 3797c478bd9Sstevel@tonic-gate ASSERT(mp->msg_copycnt > 0); 3807c478bd9Sstevel@tonic-gate if (mp->msg_copycnt-- == 1) { 3817c478bd9Sstevel@tonic-gate if (mp->msg_addr) 3827c478bd9Sstevel@tonic-gate kmem_free(mp->msg_addr, mp->msg_size); 3837c478bd9Sstevel@tonic-gate kmem_free(mp, sizeof (struct msg)); 3847c478bd9Sstevel@tonic-gate } 3857c478bd9Sstevel@tonic-gate } 3867c478bd9Sstevel@tonic-gate 3877c478bd9Sstevel@tonic-gate /* 3887c478bd9Sstevel@tonic-gate * msgunlink - Unlink msg from queue, decrement byte count and wake up anyone 3897c478bd9Sstevel@tonic-gate * waiting for free bytes on queue. 3907c478bd9Sstevel@tonic-gate * 3917c478bd9Sstevel@tonic-gate * Called with queue locked. 3927c478bd9Sstevel@tonic-gate */ 3937c478bd9Sstevel@tonic-gate static void 3947c478bd9Sstevel@tonic-gate msgunlink(kmsqid_t *qp, struct msg *mp) 3957c478bd9Sstevel@tonic-gate { 3967c478bd9Sstevel@tonic-gate list_remove(&qp->msg_list, mp); 3977c478bd9Sstevel@tonic-gate qp->msg_qnum--; 3987c478bd9Sstevel@tonic-gate qp->msg_cbytes -= mp->msg_size; 3997c478bd9Sstevel@tonic-gate msg_rele(mp); 4007c478bd9Sstevel@tonic-gate 4017c478bd9Sstevel@tonic-gate /* Wake up waiting writers */ 402eb9fe4caSDavid Valin msg_wakeup_senders(qp); 4037c478bd9Sstevel@tonic-gate } 4047c478bd9Sstevel@tonic-gate 4057c478bd9Sstevel@tonic-gate static void 4067c478bd9Sstevel@tonic-gate msg_rmid(kipc_perm_t *perm) 4077c478bd9Sstevel@tonic-gate { 4087c478bd9Sstevel@tonic-gate kmsqid_t *qp = (kmsqid_t *)perm; 4097c478bd9Sstevel@tonic-gate struct msg *mp; 410b2eb1770Sudpa int ii; 4117c478bd9Sstevel@tonic-gate 4127c478bd9Sstevel@tonic-gate 4137c478bd9Sstevel@tonic-gate while ((mp = list_head(&qp->msg_list)) != NULL) 4147c478bd9Sstevel@tonic-gate msgunlink(qp, mp); 4157c478bd9Sstevel@tonic-gate ASSERT(qp->msg_cbytes == 0); 4167c478bd9Sstevel@tonic-gate 4172c5b6df1Sdv142724 /* 4182c5b6df1Sdv142724 * Wake up everyone who is in a wait state of some sort 4192c5b6df1Sdv142724 * for this message queue. 4202c5b6df1Sdv142724 */ 4212c5b6df1Sdv142724 for (ii = 0; ii <= MSG_MAX_QNUM; ii++) { 4222c5b6df1Sdv142724 msg_rcvq_wakeup_all(&qp->msg_wait_snd[ii]); 4232c5b6df1Sdv142724 msg_rcvq_wakeup_all(&qp->msg_wait_snd_ngt[ii]); 424b2eb1770Sudpa } 4252c5b6df1Sdv142724 msg_rcvq_wakeup_all(&qp->msg_cpy_block); 426eb9fe4caSDavid Valin msg_rcvq_wakeup_all(&qp->msg_wait_rcv); 4277c478bd9Sstevel@tonic-gate } 4287c478bd9Sstevel@tonic-gate 4297c478bd9Sstevel@tonic-gate /* 4307c478bd9Sstevel@tonic-gate * msgctl system call. 4317c478bd9Sstevel@tonic-gate * 4327c478bd9Sstevel@tonic-gate * gets q lock (via ipc_lookup), releases before return. 4337c478bd9Sstevel@tonic-gate * may call users of msg_lock 4347c478bd9Sstevel@tonic-gate */ 4357c478bd9Sstevel@tonic-gate static int 4367c478bd9Sstevel@tonic-gate msgctl(int msgid, int cmd, void *arg) 4377c478bd9Sstevel@tonic-gate { 4387c478bd9Sstevel@tonic-gate STRUCT_DECL(msqid_ds, ds); /* SVR4 queue work area */ 4397c478bd9Sstevel@tonic-gate kmsqid_t *qp; /* ptr to associated q */ 4402c5b6df1Sdv142724 int error; 4417c478bd9Sstevel@tonic-gate struct cred *cr; 4427c478bd9Sstevel@tonic-gate model_t mdl = get_udatamodel(); 4437c478bd9Sstevel@tonic-gate struct msqid_ds64 ds64; 4447c478bd9Sstevel@tonic-gate kmutex_t *lock; 4457c478bd9Sstevel@tonic-gate proc_t *pp = curproc; 4467c478bd9Sstevel@tonic-gate 4477c478bd9Sstevel@tonic-gate STRUCT_INIT(ds, mdl); 4487c478bd9Sstevel@tonic-gate cr = CRED(); 4497c478bd9Sstevel@tonic-gate 4507c478bd9Sstevel@tonic-gate /* 4517c478bd9Sstevel@tonic-gate * Perform pre- or non-lookup actions (e.g. copyins, RMID). 4527c478bd9Sstevel@tonic-gate */ 4537c478bd9Sstevel@tonic-gate switch (cmd) { 4547c478bd9Sstevel@tonic-gate case IPC_SET: 4557c478bd9Sstevel@tonic-gate if (copyin(arg, STRUCT_BUF(ds), STRUCT_SIZE(ds))) 4567c478bd9Sstevel@tonic-gate return (set_errno(EFAULT)); 4577c478bd9Sstevel@tonic-gate break; 4587c478bd9Sstevel@tonic-gate 4597c478bd9Sstevel@tonic-gate case IPC_SET64: 4607c478bd9Sstevel@tonic-gate if (copyin(arg, &ds64, sizeof (struct msqid_ds64))) 4617c478bd9Sstevel@tonic-gate return (set_errno(EFAULT)); 4627c478bd9Sstevel@tonic-gate break; 4637c478bd9Sstevel@tonic-gate 4647c478bd9Sstevel@tonic-gate case IPC_RMID: 4657c478bd9Sstevel@tonic-gate if (error = ipc_rmid(msq_svc, msgid, cr)) 4667c478bd9Sstevel@tonic-gate return (set_errno(error)); 4677c478bd9Sstevel@tonic-gate return (0); 4687c478bd9Sstevel@tonic-gate } 4697c478bd9Sstevel@tonic-gate 4707c478bd9Sstevel@tonic-gate /* 4717c478bd9Sstevel@tonic-gate * get msqid_ds for this msgid 4727c478bd9Sstevel@tonic-gate */ 4737c478bd9Sstevel@tonic-gate if ((lock = ipc_lookup(msq_svc, msgid, (kipc_perm_t **)&qp)) == NULL) 4747c478bd9Sstevel@tonic-gate return (set_errno(EINVAL)); 4757c478bd9Sstevel@tonic-gate 4767c478bd9Sstevel@tonic-gate switch (cmd) { 4777c478bd9Sstevel@tonic-gate case IPC_SET: 4787c478bd9Sstevel@tonic-gate if (STRUCT_FGET(ds, msg_qbytes) > qp->msg_qbytes && 4797c478bd9Sstevel@tonic-gate secpolicy_ipc_config(cr) != 0) { 4807c478bd9Sstevel@tonic-gate mutex_exit(lock); 4817c478bd9Sstevel@tonic-gate return (set_errno(EPERM)); 4827c478bd9Sstevel@tonic-gate } 4837c478bd9Sstevel@tonic-gate if (error = ipcperm_set(msq_svc, cr, &qp->msg_perm, 4847c478bd9Sstevel@tonic-gate &STRUCT_BUF(ds)->msg_perm, mdl)) { 4857c478bd9Sstevel@tonic-gate mutex_exit(lock); 4867c478bd9Sstevel@tonic-gate return (set_errno(error)); 4877c478bd9Sstevel@tonic-gate } 4887c478bd9Sstevel@tonic-gate qp->msg_qbytes = STRUCT_FGET(ds, msg_qbytes); 4897c478bd9Sstevel@tonic-gate qp->msg_ctime = gethrestime_sec(); 4907c478bd9Sstevel@tonic-gate break; 4917c478bd9Sstevel@tonic-gate 4927c478bd9Sstevel@tonic-gate case IPC_STAT: 4937c478bd9Sstevel@tonic-gate if (error = ipcperm_access(&qp->msg_perm, MSG_R, cr)) { 4947c478bd9Sstevel@tonic-gate mutex_exit(lock); 4957c478bd9Sstevel@tonic-gate return (set_errno(error)); 4967c478bd9Sstevel@tonic-gate } 4977c478bd9Sstevel@tonic-gate 4982c5b6df1Sdv142724 if (qp->msg_rcv_cnt) 4997c478bd9Sstevel@tonic-gate qp->msg_perm.ipc_mode |= MSG_RWAIT; 5007c478bd9Sstevel@tonic-gate if (qp->msg_snd_cnt) 5017c478bd9Sstevel@tonic-gate qp->msg_perm.ipc_mode |= MSG_WWAIT; 5027c478bd9Sstevel@tonic-gate ipcperm_stat(&STRUCT_BUF(ds)->msg_perm, &qp->msg_perm, mdl); 5037c478bd9Sstevel@tonic-gate qp->msg_perm.ipc_mode &= ~(MSG_RWAIT|MSG_WWAIT); 5047c478bd9Sstevel@tonic-gate STRUCT_FSETP(ds, msg_first, NULL); /* kernel addr */ 5057c478bd9Sstevel@tonic-gate STRUCT_FSETP(ds, msg_last, NULL); 5067c478bd9Sstevel@tonic-gate STRUCT_FSET(ds, msg_cbytes, qp->msg_cbytes); 5077c478bd9Sstevel@tonic-gate STRUCT_FSET(ds, msg_qnum, qp->msg_qnum); 5087c478bd9Sstevel@tonic-gate STRUCT_FSET(ds, msg_qbytes, qp->msg_qbytes); 5097c478bd9Sstevel@tonic-gate STRUCT_FSET(ds, msg_lspid, qp->msg_lspid); 5107c478bd9Sstevel@tonic-gate STRUCT_FSET(ds, msg_lrpid, qp->msg_lrpid); 5117c478bd9Sstevel@tonic-gate STRUCT_FSET(ds, msg_stime, qp->msg_stime); 5127c478bd9Sstevel@tonic-gate STRUCT_FSET(ds, msg_rtime, qp->msg_rtime); 5137c478bd9Sstevel@tonic-gate STRUCT_FSET(ds, msg_ctime, qp->msg_ctime); 5147c478bd9Sstevel@tonic-gate break; 5157c478bd9Sstevel@tonic-gate 5167c478bd9Sstevel@tonic-gate case IPC_SET64: 5177c478bd9Sstevel@tonic-gate mutex_enter(&pp->p_lock); 5187c478bd9Sstevel@tonic-gate if ((ds64.msgx_qbytes > qp->msg_qbytes) && 5197c478bd9Sstevel@tonic-gate secpolicy_ipc_config(cr) != 0 && 5207c478bd9Sstevel@tonic-gate rctl_test(rc_process_msgmnb, pp->p_rctls, pp, 5217c478bd9Sstevel@tonic-gate ds64.msgx_qbytes, RCA_SAFE) & RCT_DENY) { 5227c478bd9Sstevel@tonic-gate mutex_exit(&pp->p_lock); 5237c478bd9Sstevel@tonic-gate mutex_exit(lock); 5247c478bd9Sstevel@tonic-gate return (set_errno(EPERM)); 5257c478bd9Sstevel@tonic-gate } 5267c478bd9Sstevel@tonic-gate mutex_exit(&pp->p_lock); 5277c478bd9Sstevel@tonic-gate if (error = ipcperm_set64(msq_svc, cr, &qp->msg_perm, 5287c478bd9Sstevel@tonic-gate &ds64.msgx_perm)) { 5297c478bd9Sstevel@tonic-gate mutex_exit(lock); 5307c478bd9Sstevel@tonic-gate return (set_errno(error)); 5317c478bd9Sstevel@tonic-gate } 5327c478bd9Sstevel@tonic-gate qp->msg_qbytes = ds64.msgx_qbytes; 5337c478bd9Sstevel@tonic-gate qp->msg_ctime = gethrestime_sec(); 5347c478bd9Sstevel@tonic-gate break; 5357c478bd9Sstevel@tonic-gate 5367c478bd9Sstevel@tonic-gate case IPC_STAT64: 5372c5b6df1Sdv142724 if (qp->msg_rcv_cnt) 5387c478bd9Sstevel@tonic-gate qp->msg_perm.ipc_mode |= MSG_RWAIT; 5397c478bd9Sstevel@tonic-gate if (qp->msg_snd_cnt) 5407c478bd9Sstevel@tonic-gate qp->msg_perm.ipc_mode |= MSG_WWAIT; 5417c478bd9Sstevel@tonic-gate ipcperm_stat64(&ds64.msgx_perm, &qp->msg_perm); 5427c478bd9Sstevel@tonic-gate qp->msg_perm.ipc_mode &= ~(MSG_RWAIT|MSG_WWAIT); 5437c478bd9Sstevel@tonic-gate ds64.msgx_cbytes = qp->msg_cbytes; 5447c478bd9Sstevel@tonic-gate ds64.msgx_qnum = qp->msg_qnum; 5457c478bd9Sstevel@tonic-gate ds64.msgx_qbytes = qp->msg_qbytes; 5467c478bd9Sstevel@tonic-gate ds64.msgx_lspid = qp->msg_lspid; 5477c478bd9Sstevel@tonic-gate ds64.msgx_lrpid = qp->msg_lrpid; 5487c478bd9Sstevel@tonic-gate ds64.msgx_stime = qp->msg_stime; 5497c478bd9Sstevel@tonic-gate ds64.msgx_rtime = qp->msg_rtime; 5507c478bd9Sstevel@tonic-gate ds64.msgx_ctime = qp->msg_ctime; 5517c478bd9Sstevel@tonic-gate break; 5527c478bd9Sstevel@tonic-gate 5537c478bd9Sstevel@tonic-gate default: 5547c478bd9Sstevel@tonic-gate mutex_exit(lock); 5557c478bd9Sstevel@tonic-gate return (set_errno(EINVAL)); 5567c478bd9Sstevel@tonic-gate } 5577c478bd9Sstevel@tonic-gate 5587c478bd9Sstevel@tonic-gate mutex_exit(lock); 5597c478bd9Sstevel@tonic-gate 5607c478bd9Sstevel@tonic-gate /* 5617c478bd9Sstevel@tonic-gate * Do copyout last (after releasing mutex). 5627c478bd9Sstevel@tonic-gate */ 5637c478bd9Sstevel@tonic-gate switch (cmd) { 5647c478bd9Sstevel@tonic-gate case IPC_STAT: 5657c478bd9Sstevel@tonic-gate if (copyout(STRUCT_BUF(ds), arg, STRUCT_SIZE(ds))) 5667c478bd9Sstevel@tonic-gate return (set_errno(EFAULT)); 5677c478bd9Sstevel@tonic-gate break; 5687c478bd9Sstevel@tonic-gate 5697c478bd9Sstevel@tonic-gate case IPC_STAT64: 5707c478bd9Sstevel@tonic-gate if (copyout(&ds64, arg, sizeof (struct msqid_ds64))) 5717c478bd9Sstevel@tonic-gate return (set_errno(EFAULT)); 5727c478bd9Sstevel@tonic-gate break; 5737c478bd9Sstevel@tonic-gate } 5747c478bd9Sstevel@tonic-gate 5757c478bd9Sstevel@tonic-gate return (0); 5767c478bd9Sstevel@tonic-gate } 5777c478bd9Sstevel@tonic-gate 5787c478bd9Sstevel@tonic-gate /* 5797c478bd9Sstevel@tonic-gate * Remove all message queues associated with a given zone. Called by 5807c478bd9Sstevel@tonic-gate * zone_shutdown when the zone is halted. 5817c478bd9Sstevel@tonic-gate */ 5827c478bd9Sstevel@tonic-gate /*ARGSUSED1*/ 5837c478bd9Sstevel@tonic-gate static void 5847c478bd9Sstevel@tonic-gate msg_remove_zone(zoneid_t zoneid, void *arg) 5857c478bd9Sstevel@tonic-gate { 5867c478bd9Sstevel@tonic-gate ipc_remove_zone(msq_svc, zoneid); 5877c478bd9Sstevel@tonic-gate } 5887c478bd9Sstevel@tonic-gate 5897c478bd9Sstevel@tonic-gate /* 5907c478bd9Sstevel@tonic-gate * msgget system call. 5917c478bd9Sstevel@tonic-gate */ 5927c478bd9Sstevel@tonic-gate static int 5937c478bd9Sstevel@tonic-gate msgget(key_t key, int msgflg) 5947c478bd9Sstevel@tonic-gate { 5957c478bd9Sstevel@tonic-gate kmsqid_t *qp; 5967c478bd9Sstevel@tonic-gate kmutex_t *lock; 5977c478bd9Sstevel@tonic-gate int id, error; 598b2eb1770Sudpa int ii; 5997c478bd9Sstevel@tonic-gate proc_t *pp = curproc; 6007c478bd9Sstevel@tonic-gate 6017c478bd9Sstevel@tonic-gate top: 6027c478bd9Sstevel@tonic-gate if (error = ipc_get(msq_svc, key, msgflg, (kipc_perm_t **)&qp, &lock)) 6037c478bd9Sstevel@tonic-gate return (set_errno(error)); 6047c478bd9Sstevel@tonic-gate 6057c478bd9Sstevel@tonic-gate if (IPC_FREE(&qp->msg_perm)) { 6067c478bd9Sstevel@tonic-gate mutex_exit(lock); 6077c478bd9Sstevel@tonic-gate mutex_exit(&pp->p_lock); 6087c478bd9Sstevel@tonic-gate 6097c478bd9Sstevel@tonic-gate list_create(&qp->msg_list, sizeof (struct msg), 6107c478bd9Sstevel@tonic-gate offsetof(struct msg, msg_node)); 6117c478bd9Sstevel@tonic-gate qp->msg_qnum = 0; 6127c478bd9Sstevel@tonic-gate qp->msg_lspid = qp->msg_lrpid = 0; 6137c478bd9Sstevel@tonic-gate qp->msg_stime = qp->msg_rtime = 0; 6147c478bd9Sstevel@tonic-gate qp->msg_ctime = gethrestime_sec(); 6152c5b6df1Sdv142724 qp->msg_ngt_cnt = 0; 6162c5b6df1Sdv142724 qp->msg_neg_copy = 0; 6172c5b6df1Sdv142724 for (ii = 0; ii <= MSG_MAX_QNUM; ii++) { 6182c5b6df1Sdv142724 list_create(&qp->msg_wait_snd[ii], 6192c5b6df1Sdv142724 sizeof (msgq_wakeup_t), 6202c5b6df1Sdv142724 offsetof(msgq_wakeup_t, msgw_list)); 6212c5b6df1Sdv142724 list_create(&qp->msg_wait_snd_ngt[ii], 6222c5b6df1Sdv142724 sizeof (msgq_wakeup_t), 6232c5b6df1Sdv142724 offsetof(msgq_wakeup_t, msgw_list)); 6242c5b6df1Sdv142724 } 6252c5b6df1Sdv142724 /* 6262c5b6df1Sdv142724 * The proper initialization of msg_lowest_type is to the 6272c5b6df1Sdv142724 * highest possible value. By doing this we guarantee that 6282c5b6df1Sdv142724 * when the first send happens, the lowest type will be set 6292c5b6df1Sdv142724 * properly. 6302c5b6df1Sdv142724 */ 631eb9fe4caSDavid Valin qp->msg_lowest_type = MSG_SMALL_INIT; 6322c5b6df1Sdv142724 list_create(&qp->msg_cpy_block, 6332c5b6df1Sdv142724 sizeof (msgq_wakeup_t), 6342c5b6df1Sdv142724 offsetof(msgq_wakeup_t, msgw_list)); 635eb9fe4caSDavid Valin list_create(&qp->msg_wait_rcv, 636eb9fe4caSDavid Valin sizeof (msgq_wakeup_t), 637eb9fe4caSDavid Valin offsetof(msgq_wakeup_t, msgw_list)); 6382c5b6df1Sdv142724 qp->msg_fnd_sndr = &msg_fnd_sndr[0]; 6392c5b6df1Sdv142724 qp->msg_fnd_rdr = &msg_fnd_rdr[0]; 6402c5b6df1Sdv142724 qp->msg_rcv_cnt = 0; 641b2eb1770Sudpa qp->msg_snd_cnt = 0; 642eb9fe4caSDavid Valin qp->msg_snd_smallest = MSG_SMALL_INIT; 6437c478bd9Sstevel@tonic-gate 6447c478bd9Sstevel@tonic-gate if (error = ipc_commit_begin(msq_svc, key, msgflg, 6457c478bd9Sstevel@tonic-gate (kipc_perm_t *)qp)) { 6467c478bd9Sstevel@tonic-gate if (error == EAGAIN) 6477c478bd9Sstevel@tonic-gate goto top; 6487c478bd9Sstevel@tonic-gate return (set_errno(error)); 6497c478bd9Sstevel@tonic-gate } 6507c478bd9Sstevel@tonic-gate qp->msg_qbytes = rctl_enforced_value(rc_process_msgmnb, 6517c478bd9Sstevel@tonic-gate pp->p_rctls, pp); 6527c478bd9Sstevel@tonic-gate qp->msg_qmax = rctl_enforced_value(rc_process_msgtql, 6537c478bd9Sstevel@tonic-gate pp->p_rctls, pp); 6547c478bd9Sstevel@tonic-gate lock = ipc_commit_end(msq_svc, &qp->msg_perm); 6557c478bd9Sstevel@tonic-gate } 656*005d3febSMarek Pospisil 657*005d3febSMarek Pospisil if (AU_AUDITING()) 6587c478bd9Sstevel@tonic-gate audit_ipcget(AT_IPC_MSG, (void *)qp); 659*005d3febSMarek Pospisil 6607c478bd9Sstevel@tonic-gate id = qp->msg_perm.ipc_id; 6617c478bd9Sstevel@tonic-gate mutex_exit(lock); 6627c478bd9Sstevel@tonic-gate return (id); 6637c478bd9Sstevel@tonic-gate } 6647c478bd9Sstevel@tonic-gate 6657c478bd9Sstevel@tonic-gate static ssize_t 6667c478bd9Sstevel@tonic-gate msgrcv(int msqid, struct ipcmsgbuf *msgp, size_t msgsz, long msgtyp, int msgflg) 6677c478bd9Sstevel@tonic-gate { 6687c478bd9Sstevel@tonic-gate struct msg *smp; /* ptr to best msg on q */ 6697c478bd9Sstevel@tonic-gate kmsqid_t *qp; /* ptr to associated q */ 6707c478bd9Sstevel@tonic-gate kmutex_t *lock; 6717c478bd9Sstevel@tonic-gate size_t xtsz; /* transfer byte count */ 6722c5b6df1Sdv142724 int error = 0; 6737c478bd9Sstevel@tonic-gate int cvres; 674e5994f96Sdv142724 uint_t msg_hash; 6752c5b6df1Sdv142724 msgq_wakeup_t msg_entry; 6767c478bd9Sstevel@tonic-gate 6777c478bd9Sstevel@tonic-gate CPU_STATS_ADDQ(CPU, sys, msg, 1); /* bump msg send/rcv count */ 6787c478bd9Sstevel@tonic-gate 6792c5b6df1Sdv142724 msg_hash = msg_type_hash(msgtyp); 6802c5b6df1Sdv142724 if ((lock = ipc_lookup(msq_svc, msqid, (kipc_perm_t **)&qp)) == NULL) { 6817c478bd9Sstevel@tonic-gate return ((ssize_t)set_errno(EINVAL)); 6822c5b6df1Sdv142724 } 6837c478bd9Sstevel@tonic-gate ipc_hold(msq_svc, (kipc_perm_t *)qp); 6847c478bd9Sstevel@tonic-gate 6852c5b6df1Sdv142724 if (error = ipcperm_access(&qp->msg_perm, MSG_R, CRED())) { 6867c478bd9Sstevel@tonic-gate goto msgrcv_out; 6872c5b6df1Sdv142724 } 6887c478bd9Sstevel@tonic-gate 6892c5b6df1Sdv142724 /* 6902c5b6df1Sdv142724 * Various information (including the condvar_t) required for the 6912c5b6df1Sdv142724 * process to sleep is provided by it's stack. 6922c5b6df1Sdv142724 */ 6932c5b6df1Sdv142724 msg_entry.msgw_thrd = curthread; 6942c5b6df1Sdv142724 msg_entry.msgw_snd_wake = 0; 6952c5b6df1Sdv142724 msg_entry.msgw_type = msgtyp; 6967c478bd9Sstevel@tonic-gate findmsg: 6972c5b6df1Sdv142724 smp = msgrcv_lookup(qp, msgtyp); 6987c478bd9Sstevel@tonic-gate 6997c478bd9Sstevel@tonic-gate if (smp) { 7007c478bd9Sstevel@tonic-gate /* 7012c5b6df1Sdv142724 * We found a possible message to copy out. 7027c478bd9Sstevel@tonic-gate */ 7037c478bd9Sstevel@tonic-gate if ((smp->msg_flags & MSG_RCVCOPY) == 0) { 704e5994f96Sdv142724 long t = msg_entry.msgw_snd_wake; 7056344fb25Sqiao long copy_type = smp->msg_type; 7066344fb25Sqiao 7077c478bd9Sstevel@tonic-gate /* 7082c5b6df1Sdv142724 * It is available, attempt to copy it. 7097c478bd9Sstevel@tonic-gate */ 7102c5b6df1Sdv142724 error = msg_copyout(qp, msgtyp, &lock, &xtsz, msgsz, 7112c5b6df1Sdv142724 smp, msgp, msgflg); 712e5994f96Sdv142724 713e5994f96Sdv142724 /* 714e5994f96Sdv142724 * It is possible to consume a different message 715e5994f96Sdv142724 * type then what originally awakened for (negative 716e5994f96Sdv142724 * types). If this happens a check must be done to 717e5994f96Sdv142724 * to determine if another receiver is available 718e5994f96Sdv142724 * for the waking message type, Failure to do this 719e5994f96Sdv142724 * can result in a message on the queue that can be 720e5994f96Sdv142724 * serviced by a sleeping receiver. 721e5994f96Sdv142724 */ 7226344fb25Sqiao if (!error && t && (copy_type != t)) 723e5994f96Sdv142724 msg_wakeup_rdr(qp, &qp->msg_fnd_sndr, t); 724e5994f96Sdv142724 7252c5b6df1Sdv142724 /* 7262c5b6df1Sdv142724 * Don't forget to wakeup a sleeper that blocked because 7272c5b6df1Sdv142724 * we were copying things out. 7282c5b6df1Sdv142724 */ 7292c5b6df1Sdv142724 msg_wakeup_rdr(qp, &qp->msg_fnd_rdr, 0); 7302c5b6df1Sdv142724 goto msgrcv_out; 7312c5b6df1Sdv142724 } 7322c5b6df1Sdv142724 /* 7332c5b6df1Sdv142724 * The selected message is being copied out, so block. We do 7342c5b6df1Sdv142724 * not need to wake the next person up on the msg_cpy_block list 7352c5b6df1Sdv142724 * due to the fact some one is copying out and they will get 7362c5b6df1Sdv142724 * things moving again once the copy is completed. 7372c5b6df1Sdv142724 */ 7382c5b6df1Sdv142724 cvres = msg_rcvq_sleep(&qp->msg_cpy_block, 7392c5b6df1Sdv142724 &msg_entry, &lock, qp); 7402c5b6df1Sdv142724 error = msgq_check_err(qp, cvres); 7412c5b6df1Sdv142724 if (error) { 7422c5b6df1Sdv142724 goto msgrcv_out; 7432c5b6df1Sdv142724 } 7442c5b6df1Sdv142724 goto findmsg; 7452c5b6df1Sdv142724 } 7462c5b6df1Sdv142724 /* 7472c5b6df1Sdv142724 * There isn't a message to copy out that matches the designated 7482c5b6df1Sdv142724 * criteria. 7492c5b6df1Sdv142724 */ 7502c5b6df1Sdv142724 if (msgflg & IPC_NOWAIT) { 7512c5b6df1Sdv142724 error = ENOMSG; 7522c5b6df1Sdv142724 goto msgrcv_out; 7532c5b6df1Sdv142724 } 7542c5b6df1Sdv142724 msg_wakeup_rdr(qp, &qp->msg_fnd_rdr, 0); 7552c5b6df1Sdv142724 7562c5b6df1Sdv142724 /* 7572c5b6df1Sdv142724 * Wait for new message. We keep the negative and positive types 7582c5b6df1Sdv142724 * separate for performance reasons. 7592c5b6df1Sdv142724 */ 7602c5b6df1Sdv142724 msg_entry.msgw_snd_wake = 0; 7612c5b6df1Sdv142724 if (msgtyp >= 0) { 7622c5b6df1Sdv142724 cvres = msg_rcvq_sleep(&qp->msg_wait_snd[msg_hash], 7632c5b6df1Sdv142724 &msg_entry, &lock, qp); 7642c5b6df1Sdv142724 } else { 7652c5b6df1Sdv142724 qp->msg_ngt_cnt++; 7662c5b6df1Sdv142724 cvres = msg_rcvq_sleep(&qp->msg_wait_snd_ngt[msg_hash], 7672c5b6df1Sdv142724 &msg_entry, &lock, qp); 7682c5b6df1Sdv142724 qp->msg_ngt_cnt--; 7692c5b6df1Sdv142724 } 7702c5b6df1Sdv142724 7712c5b6df1Sdv142724 if (!(error = msgq_check_err(qp, cvres))) { 7722c5b6df1Sdv142724 goto findmsg; 7732c5b6df1Sdv142724 } 7742c5b6df1Sdv142724 7752c5b6df1Sdv142724 msgrcv_out: 7762c5b6df1Sdv142724 if (error) { 7772c5b6df1Sdv142724 msg_wakeup_rdr(qp, &qp->msg_fnd_rdr, 0); 7782c5b6df1Sdv142724 if (msg_entry.msgw_snd_wake) { 7792c5b6df1Sdv142724 msg_wakeup_rdr(qp, &qp->msg_fnd_sndr, 7802c5b6df1Sdv142724 msg_entry.msgw_snd_wake); 7812c5b6df1Sdv142724 } 7822c5b6df1Sdv142724 ipc_rele(msq_svc, (kipc_perm_t *)qp); 7832c5b6df1Sdv142724 return ((ssize_t)set_errno(error)); 7842c5b6df1Sdv142724 } 7852c5b6df1Sdv142724 ipc_rele(msq_svc, (kipc_perm_t *)qp); 7862c5b6df1Sdv142724 return ((ssize_t)xtsz); 7872c5b6df1Sdv142724 } 7882c5b6df1Sdv142724 7892c5b6df1Sdv142724 static int 7902c5b6df1Sdv142724 msgq_check_err(kmsqid_t *qp, int cvres) 7912c5b6df1Sdv142724 { 7922c5b6df1Sdv142724 if (IPC_FREE(&qp->msg_perm)) { 7932c5b6df1Sdv142724 return (EIDRM); 7942c5b6df1Sdv142724 } 7952c5b6df1Sdv142724 7962c5b6df1Sdv142724 if (cvres == 0) { 7972c5b6df1Sdv142724 return (EINTR); 7982c5b6df1Sdv142724 } 7992c5b6df1Sdv142724 8002c5b6df1Sdv142724 return (0); 8012c5b6df1Sdv142724 } 8022c5b6df1Sdv142724 8032c5b6df1Sdv142724 static int 8042c5b6df1Sdv142724 msg_copyout(kmsqid_t *qp, long msgtyp, kmutex_t **lock, size_t *xtsz_ret, 8052c5b6df1Sdv142724 size_t msgsz, struct msg *smp, struct ipcmsgbuf *msgp, int msgflg) 8062c5b6df1Sdv142724 { 8072c5b6df1Sdv142724 size_t xtsz; 8082c5b6df1Sdv142724 STRUCT_HANDLE(ipcmsgbuf, umsgp); 8092c5b6df1Sdv142724 model_t mdl = get_udatamodel(); 8102c5b6df1Sdv142724 int copyerror = 0; 8112c5b6df1Sdv142724 8122c5b6df1Sdv142724 STRUCT_SET_HANDLE(umsgp, mdl, msgp); 8137c478bd9Sstevel@tonic-gate if (msgsz < smp->msg_size) { 8147c478bd9Sstevel@tonic-gate if ((msgflg & MSG_NOERROR) == 0) { 8152c5b6df1Sdv142724 return (E2BIG); 8167c478bd9Sstevel@tonic-gate } else { 8177c478bd9Sstevel@tonic-gate xtsz = msgsz; 8187c478bd9Sstevel@tonic-gate } 8197c478bd9Sstevel@tonic-gate } else { 8207c478bd9Sstevel@tonic-gate xtsz = smp->msg_size; 8217c478bd9Sstevel@tonic-gate } 8222c5b6df1Sdv142724 *xtsz_ret = xtsz; 8237c478bd9Sstevel@tonic-gate 8247c478bd9Sstevel@tonic-gate /* 8252c5b6df1Sdv142724 * To prevent a DOS attack we mark the message as being 8262c5b6df1Sdv142724 * copied out and release mutex. When the copy is completed 8272c5b6df1Sdv142724 * we need to acquire the mutex and make the appropriate updates. 8287c478bd9Sstevel@tonic-gate */ 8297c478bd9Sstevel@tonic-gate ASSERT((smp->msg_flags & MSG_RCVCOPY) == 0); 8307c478bd9Sstevel@tonic-gate smp->msg_flags |= MSG_RCVCOPY; 8317c478bd9Sstevel@tonic-gate msg_hold(smp); 8322c5b6df1Sdv142724 if (msgtyp < 0) { 8332c5b6df1Sdv142724 ASSERT(qp->msg_neg_copy == 0); 8342c5b6df1Sdv142724 qp->msg_neg_copy = 1; 8352c5b6df1Sdv142724 } 8362c5b6df1Sdv142724 mutex_exit(*lock); 8377c478bd9Sstevel@tonic-gate 8387c478bd9Sstevel@tonic-gate if (mdl == DATAMODEL_NATIVE) { 8397c478bd9Sstevel@tonic-gate copyerror = copyout(&smp->msg_type, msgp, 8407c478bd9Sstevel@tonic-gate sizeof (smp->msg_type)); 8417c478bd9Sstevel@tonic-gate } else { 8427c478bd9Sstevel@tonic-gate /* 8437c478bd9Sstevel@tonic-gate * 32-bit callers need an imploded msg type. 8447c478bd9Sstevel@tonic-gate */ 8457c478bd9Sstevel@tonic-gate int32_t msg_type32 = smp->msg_type; 8467c478bd9Sstevel@tonic-gate 8477c478bd9Sstevel@tonic-gate copyerror = copyout(&msg_type32, msgp, 8487c478bd9Sstevel@tonic-gate sizeof (msg_type32)); 8497c478bd9Sstevel@tonic-gate } 8507c478bd9Sstevel@tonic-gate 8512c5b6df1Sdv142724 if (copyerror == 0 && xtsz) { 8527c478bd9Sstevel@tonic-gate copyerror = copyout(smp->msg_addr, 8537c478bd9Sstevel@tonic-gate STRUCT_FADDR(umsgp, mtext), xtsz); 8542c5b6df1Sdv142724 } 8557c478bd9Sstevel@tonic-gate 8567c478bd9Sstevel@tonic-gate /* 8572c5b6df1Sdv142724 * Reclaim the mutex and make sure the message queue still exists. 8587c478bd9Sstevel@tonic-gate */ 8592c5b6df1Sdv142724 8602c5b6df1Sdv142724 *lock = ipc_lock(msq_svc, qp->msg_perm.ipc_id); 8612c5b6df1Sdv142724 if (msgtyp < 0) { 8622c5b6df1Sdv142724 qp->msg_neg_copy = 0; 8632c5b6df1Sdv142724 } 8647c478bd9Sstevel@tonic-gate ASSERT(smp->msg_flags & MSG_RCVCOPY); 8657c478bd9Sstevel@tonic-gate smp->msg_flags &= ~MSG_RCVCOPY; 8667c478bd9Sstevel@tonic-gate msg_rele(smp); 8677c478bd9Sstevel@tonic-gate if (IPC_FREE(&qp->msg_perm)) { 8682c5b6df1Sdv142724 return (EIDRM); 8697c478bd9Sstevel@tonic-gate } 8707c478bd9Sstevel@tonic-gate if (copyerror) { 8712c5b6df1Sdv142724 return (EFAULT); 8727c478bd9Sstevel@tonic-gate } 8737c478bd9Sstevel@tonic-gate qp->msg_lrpid = ttoproc(curthread)->p_pid; 8747c478bd9Sstevel@tonic-gate qp->msg_rtime = gethrestime_sec(); 8757c478bd9Sstevel@tonic-gate msgunlink(qp, smp); 8762c5b6df1Sdv142724 return (0); 8777c478bd9Sstevel@tonic-gate } 8787c478bd9Sstevel@tonic-gate 8792c5b6df1Sdv142724 static struct msg * 8802c5b6df1Sdv142724 msgrcv_lookup(kmsqid_t *qp, long msgtyp) 8812c5b6df1Sdv142724 { 8822c5b6df1Sdv142724 struct msg *smp = NULL; 883e5994f96Sdv142724 long qp_low; 8842c5b6df1Sdv142724 struct msg *mp; /* ptr to msg on q */ 885e5994f96Sdv142724 long low_msgtype; 8862c5b6df1Sdv142724 static struct msg neg_copy_smp; 8872c5b6df1Sdv142724 8882c5b6df1Sdv142724 mp = list_head(&qp->msg_list); 8892c5b6df1Sdv142724 if (msgtyp == 0) { 8902c5b6df1Sdv142724 smp = mp; 8912c5b6df1Sdv142724 } else { 8922c5b6df1Sdv142724 qp_low = qp->msg_lowest_type; 8932c5b6df1Sdv142724 if (msgtyp > 0) { 8942c5b6df1Sdv142724 /* 8952c5b6df1Sdv142724 * If our lowest possible message type is larger than 8962c5b6df1Sdv142724 * the message type desired, then we know there is 8972c5b6df1Sdv142724 * no entry present. 8982c5b6df1Sdv142724 */ 8992c5b6df1Sdv142724 if (qp_low > msgtyp) { 9002c5b6df1Sdv142724 return (NULL); 9012c5b6df1Sdv142724 } 9022c5b6df1Sdv142724 9032c5b6df1Sdv142724 for (; mp; mp = list_next(&qp->msg_list, mp)) { 9042c5b6df1Sdv142724 if (msgtyp == mp->msg_type) { 9052c5b6df1Sdv142724 smp = mp; 9062c5b6df1Sdv142724 break; 9072c5b6df1Sdv142724 } 9082c5b6df1Sdv142724 } 9097c478bd9Sstevel@tonic-gate } else { 9107c478bd9Sstevel@tonic-gate /* 9112c5b6df1Sdv142724 * We have kept track of the lowest possible message 9122c5b6df1Sdv142724 * type on the send queue. This allows us to terminate 9132c5b6df1Sdv142724 * the search early if we find a message type of that 9142c5b6df1Sdv142724 * type. Note, the lowest type may not be the actual 9152c5b6df1Sdv142724 * lowest value in the system, it is only guaranteed 9162c5b6df1Sdv142724 * that there isn't a value lower than that. 9177c478bd9Sstevel@tonic-gate */ 9182c5b6df1Sdv142724 low_msgtype = -msgtyp; 919e5994f96Sdv142724 if (low_msgtype < qp_low) { 9202c5b6df1Sdv142724 return (NULL); 9212c5b6df1Sdv142724 } 9222c5b6df1Sdv142724 if (qp->msg_neg_copy) { 9232c5b6df1Sdv142724 neg_copy_smp.msg_flags = MSG_RCVCOPY; 9242c5b6df1Sdv142724 return (&neg_copy_smp); 9252c5b6df1Sdv142724 } 9262c5b6df1Sdv142724 for (; mp; mp = list_next(&qp->msg_list, mp)) { 927e5994f96Sdv142724 if (mp->msg_type <= low_msgtype && 928e5994f96Sdv142724 !(smp && smp->msg_type <= mp->msg_type)) { 9292c5b6df1Sdv142724 smp = mp; 9302c5b6df1Sdv142724 low_msgtype = mp->msg_type; 9312c5b6df1Sdv142724 if (low_msgtype == qp_low) { 9322c5b6df1Sdv142724 break; 9337c478bd9Sstevel@tonic-gate } 9347c478bd9Sstevel@tonic-gate } 9357c478bd9Sstevel@tonic-gate } 9362c5b6df1Sdv142724 if (smp) { 9372c5b6df1Sdv142724 /* 9382c5b6df1Sdv142724 * Update the lowest message type. 9392c5b6df1Sdv142724 */ 9402c5b6df1Sdv142724 qp->msg_lowest_type = smp->msg_type; 9417c478bd9Sstevel@tonic-gate } 9422c5b6df1Sdv142724 } 9432c5b6df1Sdv142724 } 9442c5b6df1Sdv142724 return (smp); 9457c478bd9Sstevel@tonic-gate } 9467c478bd9Sstevel@tonic-gate 9477c478bd9Sstevel@tonic-gate /* 9487c478bd9Sstevel@tonic-gate * msgids system call. 9497c478bd9Sstevel@tonic-gate */ 9507c478bd9Sstevel@tonic-gate static int 9517c478bd9Sstevel@tonic-gate msgids(int *buf, uint_t nids, uint_t *pnids) 9527c478bd9Sstevel@tonic-gate { 9537c478bd9Sstevel@tonic-gate int error; 9547c478bd9Sstevel@tonic-gate 9557c478bd9Sstevel@tonic-gate if (error = ipc_ids(msq_svc, buf, nids, pnids)) 9567c478bd9Sstevel@tonic-gate return (set_errno(error)); 9577c478bd9Sstevel@tonic-gate 9587c478bd9Sstevel@tonic-gate return (0); 9597c478bd9Sstevel@tonic-gate } 9607c478bd9Sstevel@tonic-gate 9617c478bd9Sstevel@tonic-gate #define RND(x) roundup((x), sizeof (size_t)) 9627c478bd9Sstevel@tonic-gate #define RND32(x) roundup((x), sizeof (size32_t)) 9637c478bd9Sstevel@tonic-gate 9647c478bd9Sstevel@tonic-gate /* 9657c478bd9Sstevel@tonic-gate * msgsnap system call. 9667c478bd9Sstevel@tonic-gate */ 9677c478bd9Sstevel@tonic-gate static int 9687c478bd9Sstevel@tonic-gate msgsnap(int msqid, caddr_t buf, size_t bufsz, long msgtyp) 9697c478bd9Sstevel@tonic-gate { 9707c478bd9Sstevel@tonic-gate struct msg *mp; /* ptr to msg on q */ 9717c478bd9Sstevel@tonic-gate kmsqid_t *qp; /* ptr to associated q */ 9727c478bd9Sstevel@tonic-gate kmutex_t *lock; 9737c478bd9Sstevel@tonic-gate size_t size; 9747c478bd9Sstevel@tonic-gate size_t nmsg; 9757c478bd9Sstevel@tonic-gate struct msg **snaplist; 9767c478bd9Sstevel@tonic-gate int error, i; 9777c478bd9Sstevel@tonic-gate model_t mdl = get_udatamodel(); 9787c478bd9Sstevel@tonic-gate STRUCT_DECL(msgsnap_head, head); 9797c478bd9Sstevel@tonic-gate STRUCT_DECL(msgsnap_mhead, mhead); 9807c478bd9Sstevel@tonic-gate 9817c478bd9Sstevel@tonic-gate STRUCT_INIT(head, mdl); 9827c478bd9Sstevel@tonic-gate STRUCT_INIT(mhead, mdl); 9837c478bd9Sstevel@tonic-gate 9847c478bd9Sstevel@tonic-gate if (bufsz < STRUCT_SIZE(head)) 9857c478bd9Sstevel@tonic-gate return (set_errno(EINVAL)); 9867c478bd9Sstevel@tonic-gate 9877c478bd9Sstevel@tonic-gate if ((lock = ipc_lookup(msq_svc, msqid, (kipc_perm_t **)&qp)) == NULL) 9887c478bd9Sstevel@tonic-gate return (set_errno(EINVAL)); 9897c478bd9Sstevel@tonic-gate 9907c478bd9Sstevel@tonic-gate if (error = ipcperm_access(&qp->msg_perm, MSG_R, CRED())) { 9917c478bd9Sstevel@tonic-gate mutex_exit(lock); 9927c478bd9Sstevel@tonic-gate return (set_errno(error)); 9937c478bd9Sstevel@tonic-gate } 9947c478bd9Sstevel@tonic-gate ipc_hold(msq_svc, (kipc_perm_t *)qp); 9957c478bd9Sstevel@tonic-gate 9967c478bd9Sstevel@tonic-gate /* 9977c478bd9Sstevel@tonic-gate * First compute the required buffer size and 9987c478bd9Sstevel@tonic-gate * the number of messages on the queue. 9997c478bd9Sstevel@tonic-gate */ 10007c478bd9Sstevel@tonic-gate size = nmsg = 0; 10017c478bd9Sstevel@tonic-gate for (mp = list_head(&qp->msg_list); mp; 10027c478bd9Sstevel@tonic-gate mp = list_next(&qp->msg_list, mp)) { 10037c478bd9Sstevel@tonic-gate if (msgtyp == 0 || 10047c478bd9Sstevel@tonic-gate (msgtyp > 0 && msgtyp == mp->msg_type) || 10057c478bd9Sstevel@tonic-gate (msgtyp < 0 && mp->msg_type <= -msgtyp)) { 10067c478bd9Sstevel@tonic-gate nmsg++; 10077c478bd9Sstevel@tonic-gate if (mdl == DATAMODEL_NATIVE) 10087c478bd9Sstevel@tonic-gate size += RND(mp->msg_size); 10097c478bd9Sstevel@tonic-gate else 10107c478bd9Sstevel@tonic-gate size += RND32(mp->msg_size); 10117c478bd9Sstevel@tonic-gate } 10127c478bd9Sstevel@tonic-gate } 10137c478bd9Sstevel@tonic-gate 10147c478bd9Sstevel@tonic-gate size += STRUCT_SIZE(head) + nmsg * STRUCT_SIZE(mhead); 10157c478bd9Sstevel@tonic-gate if (size > bufsz) 10167c478bd9Sstevel@tonic-gate nmsg = 0; 10177c478bd9Sstevel@tonic-gate 10187c478bd9Sstevel@tonic-gate if (nmsg > 0) { 10197c478bd9Sstevel@tonic-gate /* 10207c478bd9Sstevel@tonic-gate * Mark the messages as being copied. 10217c478bd9Sstevel@tonic-gate */ 10227c478bd9Sstevel@tonic-gate snaplist = (struct msg **)kmem_alloc(nmsg * 10237c478bd9Sstevel@tonic-gate sizeof (struct msg *), KM_SLEEP); 10247c478bd9Sstevel@tonic-gate i = 0; 10257c478bd9Sstevel@tonic-gate for (mp = list_head(&qp->msg_list); mp; 10267c478bd9Sstevel@tonic-gate mp = list_next(&qp->msg_list, mp)) { 10277c478bd9Sstevel@tonic-gate if (msgtyp == 0 || 10287c478bd9Sstevel@tonic-gate (msgtyp > 0 && msgtyp == mp->msg_type) || 10297c478bd9Sstevel@tonic-gate (msgtyp < 0 && mp->msg_type <= -msgtyp)) { 10307c478bd9Sstevel@tonic-gate msg_hold(mp); 10317c478bd9Sstevel@tonic-gate snaplist[i] = mp; 10327c478bd9Sstevel@tonic-gate i++; 10337c478bd9Sstevel@tonic-gate } 10347c478bd9Sstevel@tonic-gate } 10357c478bd9Sstevel@tonic-gate } 10367c478bd9Sstevel@tonic-gate mutex_exit(lock); 10377c478bd9Sstevel@tonic-gate 10387c478bd9Sstevel@tonic-gate /* 10397c478bd9Sstevel@tonic-gate * Copy out the buffer header. 10407c478bd9Sstevel@tonic-gate */ 10417c478bd9Sstevel@tonic-gate STRUCT_FSET(head, msgsnap_size, size); 10427c478bd9Sstevel@tonic-gate STRUCT_FSET(head, msgsnap_nmsg, nmsg); 10437c478bd9Sstevel@tonic-gate if (copyout(STRUCT_BUF(head), buf, STRUCT_SIZE(head))) 10447c478bd9Sstevel@tonic-gate error = EFAULT; 10457c478bd9Sstevel@tonic-gate 10467c478bd9Sstevel@tonic-gate buf += STRUCT_SIZE(head); 10477c478bd9Sstevel@tonic-gate 10487c478bd9Sstevel@tonic-gate /* 10497c478bd9Sstevel@tonic-gate * Now copy out the messages one by one. 10507c478bd9Sstevel@tonic-gate */ 10517c478bd9Sstevel@tonic-gate for (i = 0; i < nmsg; i++) { 10527c478bd9Sstevel@tonic-gate mp = snaplist[i]; 10537c478bd9Sstevel@tonic-gate if (error == 0) { 10547c478bd9Sstevel@tonic-gate STRUCT_FSET(mhead, msgsnap_mlen, mp->msg_size); 10557c478bd9Sstevel@tonic-gate STRUCT_FSET(mhead, msgsnap_mtype, mp->msg_type); 10567c478bd9Sstevel@tonic-gate if (copyout(STRUCT_BUF(mhead), buf, STRUCT_SIZE(mhead))) 10577c478bd9Sstevel@tonic-gate error = EFAULT; 10587c478bd9Sstevel@tonic-gate buf += STRUCT_SIZE(mhead); 10597c478bd9Sstevel@tonic-gate 10607c478bd9Sstevel@tonic-gate if (error == 0 && 10617c478bd9Sstevel@tonic-gate mp->msg_size != 0 && 10627c478bd9Sstevel@tonic-gate copyout(mp->msg_addr, buf, mp->msg_size)) 10637c478bd9Sstevel@tonic-gate error = EFAULT; 10647c478bd9Sstevel@tonic-gate if (mdl == DATAMODEL_NATIVE) 10657c478bd9Sstevel@tonic-gate buf += RND(mp->msg_size); 10667c478bd9Sstevel@tonic-gate else 10677c478bd9Sstevel@tonic-gate buf += RND32(mp->msg_size); 10687c478bd9Sstevel@tonic-gate } 10697c478bd9Sstevel@tonic-gate lock = ipc_lock(msq_svc, qp->msg_perm.ipc_id); 10707c478bd9Sstevel@tonic-gate msg_rele(mp); 10717c478bd9Sstevel@tonic-gate /* Check for msg q deleted or reallocated */ 10727c478bd9Sstevel@tonic-gate if (IPC_FREE(&qp->msg_perm)) 10737c478bd9Sstevel@tonic-gate error = EIDRM; 10747c478bd9Sstevel@tonic-gate mutex_exit(lock); 10757c478bd9Sstevel@tonic-gate } 10767c478bd9Sstevel@tonic-gate 10777c478bd9Sstevel@tonic-gate (void) ipc_lock(msq_svc, qp->msg_perm.ipc_id); 10787c478bd9Sstevel@tonic-gate ipc_rele(msq_svc, (kipc_perm_t *)qp); 10797c478bd9Sstevel@tonic-gate 10807c478bd9Sstevel@tonic-gate if (nmsg > 0) 10817c478bd9Sstevel@tonic-gate kmem_free(snaplist, nmsg * sizeof (struct msg *)); 10827c478bd9Sstevel@tonic-gate 10837c478bd9Sstevel@tonic-gate if (error) 10847c478bd9Sstevel@tonic-gate return (set_errno(error)); 10857c478bd9Sstevel@tonic-gate return (0); 10867c478bd9Sstevel@tonic-gate } 10877c478bd9Sstevel@tonic-gate 1088e50383f4Sdv142724 #define MSG_PREALLOC_LIMIT 8192 1089e50383f4Sdv142724 10907c478bd9Sstevel@tonic-gate /* 10917c478bd9Sstevel@tonic-gate * msgsnd system call. 10927c478bd9Sstevel@tonic-gate */ 10937c478bd9Sstevel@tonic-gate static int 10947c478bd9Sstevel@tonic-gate msgsnd(int msqid, struct ipcmsgbuf *msgp, size_t msgsz, int msgflg) 10957c478bd9Sstevel@tonic-gate { 10967c478bd9Sstevel@tonic-gate kmsqid_t *qp; 1097e50383f4Sdv142724 kmutex_t *lock = NULL; 10987c478bd9Sstevel@tonic-gate struct msg *mp = NULL; 10997c478bd9Sstevel@tonic-gate long type; 1100eb9fe4caSDavid Valin int error = 0, wait_wakeup = 0; 1101eb9fe4caSDavid Valin msgq_wakeup_t msg_entry; 11027c478bd9Sstevel@tonic-gate model_t mdl = get_udatamodel(); 11037c478bd9Sstevel@tonic-gate STRUCT_HANDLE(ipcmsgbuf, umsgp); 11047c478bd9Sstevel@tonic-gate 11057c478bd9Sstevel@tonic-gate CPU_STATS_ADDQ(CPU, sys, msg, 1); /* bump msg send/rcv count */ 11067c478bd9Sstevel@tonic-gate STRUCT_SET_HANDLE(umsgp, mdl, msgp); 11077c478bd9Sstevel@tonic-gate 11087c478bd9Sstevel@tonic-gate if (mdl == DATAMODEL_NATIVE) { 11097c478bd9Sstevel@tonic-gate if (copyin(msgp, &type, sizeof (type))) 11107c478bd9Sstevel@tonic-gate return (set_errno(EFAULT)); 11117c478bd9Sstevel@tonic-gate } else { 11127c478bd9Sstevel@tonic-gate int32_t type32; 11137c478bd9Sstevel@tonic-gate if (copyin(msgp, &type32, sizeof (type32))) 11147c478bd9Sstevel@tonic-gate return (set_errno(EFAULT)); 11157c478bd9Sstevel@tonic-gate type = type32; 11167c478bd9Sstevel@tonic-gate } 11177c478bd9Sstevel@tonic-gate 11187c478bd9Sstevel@tonic-gate if (type < 1) 11197c478bd9Sstevel@tonic-gate return (set_errno(EINVAL)); 11207c478bd9Sstevel@tonic-gate 1121e50383f4Sdv142724 /* 1122e50383f4Sdv142724 * We want the value here large enough that most of the 1123e50383f4Sdv142724 * the message operations will use the "lockless" path, 1124e50383f4Sdv142724 * but small enough that a user can not reserve large 1125e50383f4Sdv142724 * chunks of kernel memory unless they have a valid 1126e50383f4Sdv142724 * reason to. 1127e50383f4Sdv142724 */ 1128e50383f4Sdv142724 if (msgsz <= MSG_PREALLOC_LIMIT) { 1129e50383f4Sdv142724 /* 1130e50383f4Sdv142724 * We are small enough that we can afford to do the 1131e50383f4Sdv142724 * allocation now. This saves dropping the lock 1132e50383f4Sdv142724 * and then reacquiring the lock. 1133e50383f4Sdv142724 */ 1134e50383f4Sdv142724 mp = kmem_zalloc(sizeof (struct msg), KM_SLEEP); 1135e50383f4Sdv142724 mp->msg_copycnt = 1; 1136e50383f4Sdv142724 mp->msg_size = msgsz; 1137e50383f4Sdv142724 if (msgsz) { 1138e50383f4Sdv142724 mp->msg_addr = kmem_alloc(msgsz, KM_SLEEP); 1139e50383f4Sdv142724 if (copyin(STRUCT_FADDR(umsgp, mtext), 1140e50383f4Sdv142724 mp->msg_addr, msgsz) == -1) { 1141e50383f4Sdv142724 error = EFAULT; 1142e50383f4Sdv142724 goto msgsnd_out; 1143e50383f4Sdv142724 } 1144e50383f4Sdv142724 } 1145e50383f4Sdv142724 } 1146e50383f4Sdv142724 1147e50383f4Sdv142724 if ((lock = ipc_lookup(msq_svc, msqid, (kipc_perm_t **)&qp)) == NULL) { 1148e50383f4Sdv142724 error = EINVAL; 1149e50383f4Sdv142724 goto msgsnd_out; 1150e50383f4Sdv142724 } 1151e50383f4Sdv142724 11527c478bd9Sstevel@tonic-gate ipc_hold(msq_svc, (kipc_perm_t *)qp); 11537c478bd9Sstevel@tonic-gate 11547c478bd9Sstevel@tonic-gate if (msgsz > qp->msg_qbytes) { 11557c478bd9Sstevel@tonic-gate error = EINVAL; 11567c478bd9Sstevel@tonic-gate goto msgsnd_out; 11577c478bd9Sstevel@tonic-gate } 11587c478bd9Sstevel@tonic-gate 11597c478bd9Sstevel@tonic-gate if (error = ipcperm_access(&qp->msg_perm, MSG_W, CRED())) 11607c478bd9Sstevel@tonic-gate goto msgsnd_out; 11617c478bd9Sstevel@tonic-gate 11627c478bd9Sstevel@tonic-gate top: 11637c478bd9Sstevel@tonic-gate /* 11647c478bd9Sstevel@tonic-gate * Allocate space on q, message header, & buffer space. 11657c478bd9Sstevel@tonic-gate */ 11667c478bd9Sstevel@tonic-gate ASSERT(qp->msg_qnum <= qp->msg_qmax); 11677c478bd9Sstevel@tonic-gate while ((msgsz > qp->msg_qbytes - qp->msg_cbytes) || 11687c478bd9Sstevel@tonic-gate (qp->msg_qnum == qp->msg_qmax)) { 11697c478bd9Sstevel@tonic-gate int cvres; 11707c478bd9Sstevel@tonic-gate 11717c478bd9Sstevel@tonic-gate if (msgflg & IPC_NOWAIT) { 11727c478bd9Sstevel@tonic-gate error = EAGAIN; 11737c478bd9Sstevel@tonic-gate goto msgsnd_out; 11747c478bd9Sstevel@tonic-gate } 11757c478bd9Sstevel@tonic-gate 1176eb9fe4caSDavid Valin wait_wakeup = 0; 11777c478bd9Sstevel@tonic-gate qp->msg_snd_cnt++; 1178eb9fe4caSDavid Valin msg_entry.msgw_snd_size = msgsz; 1179eb9fe4caSDavid Valin msg_entry.msgw_thrd = curthread; 1180eb9fe4caSDavid Valin msg_entry.msgw_type = type; 1181eb9fe4caSDavid Valin cv_init(&msg_entry.msgw_wake_cv, NULL, 0, NULL); 1182eb9fe4caSDavid Valin list_insert_tail(&qp->msg_wait_rcv, &msg_entry); 1183eb9fe4caSDavid Valin if (qp->msg_snd_smallest > msgsz) 1184eb9fe4caSDavid Valin qp->msg_snd_smallest = msgsz; 1185eb9fe4caSDavid Valin cvres = cv_wait_sig(&msg_entry.msgw_wake_cv, lock); 11867c478bd9Sstevel@tonic-gate lock = ipc_relock(msq_svc, qp->msg_perm.ipc_id, lock); 11877c478bd9Sstevel@tonic-gate qp->msg_snd_cnt--; 1188eb9fe4caSDavid Valin if (list_link_active(&msg_entry.msgw_list)) 1189eb9fe4caSDavid Valin list_remove(&qp->msg_wait_rcv, &msg_entry); 11902c5b6df1Sdv142724 if (error = msgq_check_err(qp, cvres)) { 11917c478bd9Sstevel@tonic-gate goto msgsnd_out; 11927c478bd9Sstevel@tonic-gate } 1193eb9fe4caSDavid Valin wait_wakeup = 1; 11947c478bd9Sstevel@tonic-gate } 11957c478bd9Sstevel@tonic-gate 11967c478bd9Sstevel@tonic-gate if (mp == NULL) { 11977c478bd9Sstevel@tonic-gate int failure; 11987c478bd9Sstevel@tonic-gate 11997c478bd9Sstevel@tonic-gate mutex_exit(lock); 1200e50383f4Sdv142724 ASSERT(msgsz > 0); 12017c478bd9Sstevel@tonic-gate mp = kmem_zalloc(sizeof (struct msg), KM_SLEEP); 1202e50383f4Sdv142724 mp->msg_addr = kmem_alloc(msgsz, KM_SLEEP); 12037c478bd9Sstevel@tonic-gate mp->msg_size = msgsz; 12047c478bd9Sstevel@tonic-gate mp->msg_copycnt = 1; 12057c478bd9Sstevel@tonic-gate 1206e50383f4Sdv142724 failure = (copyin(STRUCT_FADDR(umsgp, mtext), 12077c478bd9Sstevel@tonic-gate mp->msg_addr, msgsz) == -1); 12087c478bd9Sstevel@tonic-gate lock = ipc_lock(msq_svc, qp->msg_perm.ipc_id); 12097c478bd9Sstevel@tonic-gate if (IPC_FREE(&qp->msg_perm)) { 12107c478bd9Sstevel@tonic-gate error = EIDRM; 12117c478bd9Sstevel@tonic-gate goto msgsnd_out; 12127c478bd9Sstevel@tonic-gate } 12137c478bd9Sstevel@tonic-gate if (failure) { 12147c478bd9Sstevel@tonic-gate error = EFAULT; 12157c478bd9Sstevel@tonic-gate goto msgsnd_out; 12167c478bd9Sstevel@tonic-gate } 12177c478bd9Sstevel@tonic-gate goto top; 12187c478bd9Sstevel@tonic-gate } 12197c478bd9Sstevel@tonic-gate 12207c478bd9Sstevel@tonic-gate /* 12217c478bd9Sstevel@tonic-gate * Everything is available, put msg on q. 12227c478bd9Sstevel@tonic-gate */ 12237c478bd9Sstevel@tonic-gate qp->msg_qnum++; 12247c478bd9Sstevel@tonic-gate qp->msg_cbytes += msgsz; 12257c478bd9Sstevel@tonic-gate qp->msg_lspid = curproc->p_pid; 12267c478bd9Sstevel@tonic-gate qp->msg_stime = gethrestime_sec(); 12277c478bd9Sstevel@tonic-gate mp->msg_type = type; 12282c5b6df1Sdv142724 if (qp->msg_lowest_type > type) 12292c5b6df1Sdv142724 qp->msg_lowest_type = type; 12307c478bd9Sstevel@tonic-gate list_insert_tail(&qp->msg_list, mp); 1231b2eb1770Sudpa /* 12322c5b6df1Sdv142724 * Get the proper receiver going. 1233b2eb1770Sudpa */ 12342c5b6df1Sdv142724 msg_wakeup_rdr(qp, &qp->msg_fnd_sndr, type); 12357c478bd9Sstevel@tonic-gate 12367c478bd9Sstevel@tonic-gate msgsnd_out: 1237eb9fe4caSDavid Valin /* 1238eb9fe4caSDavid Valin * We were woken up from the send wait list, but an 1239eb9fe4caSDavid Valin * an error occured on placing the message onto the 1240eb9fe4caSDavid Valin * msg queue. Given that, we need to do the wakeup 1241eb9fe4caSDavid Valin * dance again. 1242eb9fe4caSDavid Valin */ 1243eb9fe4caSDavid Valin 1244eb9fe4caSDavid Valin if (wait_wakeup && error) { 1245eb9fe4caSDavid Valin msg_wakeup_senders(qp); 1246eb9fe4caSDavid Valin } 1247e50383f4Sdv142724 if (lock) 12487c478bd9Sstevel@tonic-gate ipc_rele(msq_svc, (kipc_perm_t *)qp); /* drops lock */ 12497c478bd9Sstevel@tonic-gate 12507c478bd9Sstevel@tonic-gate if (error) { 12517c478bd9Sstevel@tonic-gate if (mp) 12527c478bd9Sstevel@tonic-gate msg_rele(mp); 12537c478bd9Sstevel@tonic-gate return (set_errno(error)); 12547c478bd9Sstevel@tonic-gate } 12557c478bd9Sstevel@tonic-gate 12567c478bd9Sstevel@tonic-gate return (0); 12577c478bd9Sstevel@tonic-gate } 12587c478bd9Sstevel@tonic-gate 12592c5b6df1Sdv142724 static void 12602c5b6df1Sdv142724 msg_wakeup_rdr(kmsqid_t *qp, msg_select_t **flist, long type) 12612c5b6df1Sdv142724 { 12622c5b6df1Sdv142724 msg_select_t *walker = *flist; 12632c5b6df1Sdv142724 msgq_wakeup_t *wakeup; 1264e5994f96Sdv142724 uint_t msg_hash; 12652c5b6df1Sdv142724 12662c5b6df1Sdv142724 msg_hash = msg_type_hash(type); 12672c5b6df1Sdv142724 12682c5b6df1Sdv142724 do { 12692c5b6df1Sdv142724 wakeup = walker->selection(qp, msg_hash, type); 12702c5b6df1Sdv142724 walker = walker->next_selection; 12712c5b6df1Sdv142724 } while (!wakeup && walker != *flist); 12722c5b6df1Sdv142724 12732c5b6df1Sdv142724 *flist = (*flist)->next_selection; 12742c5b6df1Sdv142724 if (wakeup) { 12752c5b6df1Sdv142724 if (type) { 12762c5b6df1Sdv142724 wakeup->msgw_snd_wake = type; 12772c5b6df1Sdv142724 } 12782c5b6df1Sdv142724 cv_signal(&wakeup->msgw_wake_cv); 12792c5b6df1Sdv142724 } 12802c5b6df1Sdv142724 } 12812c5b6df1Sdv142724 1282e5994f96Sdv142724 static uint_t 12832c5b6df1Sdv142724 msg_type_hash(long msg_type) 12842c5b6df1Sdv142724 { 12852c5b6df1Sdv142724 if (msg_type < 0) { 1286e5994f96Sdv142724 long hash = -msg_type / MSG_NEG_INTERVAL; 12872c5b6df1Sdv142724 /* 12882c5b6df1Sdv142724 * Negative message types are hashed over an 12892c5b6df1Sdv142724 * interval. Any message type that hashes 12902c5b6df1Sdv142724 * beyond MSG_MAX_QNUM is automatically placed 12912c5b6df1Sdv142724 * in the last bucket. 12922c5b6df1Sdv142724 */ 1293e5994f96Sdv142724 if (hash > MSG_MAX_QNUM) 12942c5b6df1Sdv142724 hash = MSG_MAX_QNUM; 12952c5b6df1Sdv142724 return (hash); 12962c5b6df1Sdv142724 } 12972c5b6df1Sdv142724 12982c5b6df1Sdv142724 /* 12992c5b6df1Sdv142724 * 0 or positive message type. The first bucket is reserved for 13002c5b6df1Sdv142724 * message receivers of type 0, the other buckets we hash into. 13012c5b6df1Sdv142724 */ 1302e5994f96Sdv142724 if (msg_type) 1303e5994f96Sdv142724 return (1 + (msg_type % MSG_MAX_QNUM)); 13042c5b6df1Sdv142724 return (0); 13052c5b6df1Sdv142724 } 13062c5b6df1Sdv142724 13072c5b6df1Sdv142724 /* 13082c5b6df1Sdv142724 * Routines to see if we have a receiver of type 0 either blocked waiting 13092c5b6df1Sdv142724 * for a message. Simply return the first guy on the list. 13102c5b6df1Sdv142724 */ 13112c5b6df1Sdv142724 13122c5b6df1Sdv142724 static msgq_wakeup_t * 1313e5994f96Sdv142724 /* ARGSUSED */ 13142c5b6df1Sdv142724 msg_fnd_any_snd(kmsqid_t *qp, int msg_hash, long type) 13152c5b6df1Sdv142724 { 1316e5994f96Sdv142724 msgq_wakeup_t *walker; 1317e5994f96Sdv142724 1318e5994f96Sdv142724 walker = list_head(&qp->msg_wait_snd[0]); 1319e5994f96Sdv142724 1320e5994f96Sdv142724 if (walker) 1321e5994f96Sdv142724 list_remove(&qp->msg_wait_snd[0], walker); 1322e5994f96Sdv142724 return (walker); 13232c5b6df1Sdv142724 } 13242c5b6df1Sdv142724 13252c5b6df1Sdv142724 static msgq_wakeup_t * 1326e5994f96Sdv142724 /* ARGSUSED */ 13272c5b6df1Sdv142724 msg_fnd_any_rdr(kmsqid_t *qp, int msg_hash, long type) 13282c5b6df1Sdv142724 { 1329e5994f96Sdv142724 msgq_wakeup_t *walker; 1330e5994f96Sdv142724 1331e5994f96Sdv142724 walker = list_head(&qp->msg_cpy_block); 1332e5994f96Sdv142724 if (walker) 1333e5994f96Sdv142724 list_remove(&qp->msg_cpy_block, walker); 1334e5994f96Sdv142724 return (walker); 13352c5b6df1Sdv142724 } 13362c5b6df1Sdv142724 13372c5b6df1Sdv142724 static msgq_wakeup_t * 13382c5b6df1Sdv142724 msg_fnd_spc_snd(kmsqid_t *qp, int msg_hash, long type) 13392c5b6df1Sdv142724 { 13402c5b6df1Sdv142724 msgq_wakeup_t *walker; 13412c5b6df1Sdv142724 13422c5b6df1Sdv142724 walker = list_head(&qp->msg_wait_snd[msg_hash]); 13432c5b6df1Sdv142724 1344e5994f96Sdv142724 while (walker && walker->msgw_type != type) 1345e5994f96Sdv142724 walker = list_next(&qp->msg_wait_snd[msg_hash], walker); 1346e5994f96Sdv142724 if (walker) 1347e5994f96Sdv142724 list_remove(&qp->msg_wait_snd[msg_hash], walker); 13482c5b6df1Sdv142724 return (walker); 13492c5b6df1Sdv142724 } 13502c5b6df1Sdv142724 1351e5994f96Sdv142724 /* ARGSUSED */ 13522c5b6df1Sdv142724 static msgq_wakeup_t * 13532c5b6df1Sdv142724 msg_fnd_neg_snd(kmsqid_t *qp, int msg_hash, long type) 13542c5b6df1Sdv142724 { 13552c5b6df1Sdv142724 msgq_wakeup_t *qptr; 13562c5b6df1Sdv142724 int count; 13572c5b6df1Sdv142724 int check_index; 13582c5b6df1Sdv142724 int neg_index; 13592c5b6df1Sdv142724 int nbuckets; 13602c5b6df1Sdv142724 13612c5b6df1Sdv142724 if (!qp->msg_ngt_cnt) { 13622c5b6df1Sdv142724 return (NULL); 13632c5b6df1Sdv142724 } 13642c5b6df1Sdv142724 neg_index = msg_type_hash(-type); 13652c5b6df1Sdv142724 13662c5b6df1Sdv142724 /* 13672c5b6df1Sdv142724 * Check for a match among the negative type queues. Any buckets 13682c5b6df1Sdv142724 * at neg_index or larger can match the type. Use the last send 13692c5b6df1Sdv142724 * time to randomize the starting bucket to prevent starvation. 13702c5b6df1Sdv142724 * Search all buckets from neg_index to MSG_MAX_QNUM, starting 13712c5b6df1Sdv142724 * from the random starting point, and wrapping around after 13722c5b6df1Sdv142724 * MSG_MAX_QNUM. 13732c5b6df1Sdv142724 */ 13742c5b6df1Sdv142724 13752c5b6df1Sdv142724 nbuckets = MSG_MAX_QNUM - neg_index + 1; 13762c5b6df1Sdv142724 check_index = neg_index + (qp->msg_stime % nbuckets); 13772c5b6df1Sdv142724 13782c5b6df1Sdv142724 for (count = nbuckets; count > 0; count--) { 13792c5b6df1Sdv142724 qptr = list_head(&qp->msg_wait_snd_ngt[check_index]); 13802c5b6df1Sdv142724 while (qptr) { 13812c5b6df1Sdv142724 /* 13822c5b6df1Sdv142724 * The lowest hash bucket may actually contain 13832c5b6df1Sdv142724 * message types that are not valid for this 13842c5b6df1Sdv142724 * request. This can happen due to the fact that 13852c5b6df1Sdv142724 * the message buckets actually contain a consecutive 13862c5b6df1Sdv142724 * range of types. 13872c5b6df1Sdv142724 */ 13882c5b6df1Sdv142724 if (-qptr->msgw_type >= type) { 1389e5994f96Sdv142724 list_remove(&qp->msg_wait_snd_ngt[check_index], 1390e5994f96Sdv142724 qptr); 13912c5b6df1Sdv142724 return (qptr); 13922c5b6df1Sdv142724 } 1393e5994f96Sdv142724 qptr = list_next(&qp->msg_wait_snd_ngt[check_index], 1394e5994f96Sdv142724 qptr); 13952c5b6df1Sdv142724 } 13962c5b6df1Sdv142724 if (++check_index > MSG_MAX_QNUM) { 13972c5b6df1Sdv142724 check_index = neg_index; 13982c5b6df1Sdv142724 } 13992c5b6df1Sdv142724 } 14002c5b6df1Sdv142724 return (NULL); 14012c5b6df1Sdv142724 } 14022c5b6df1Sdv142724 14032c5b6df1Sdv142724 static int 14042c5b6df1Sdv142724 msg_rcvq_sleep(list_t *queue, msgq_wakeup_t *entry, kmutex_t **lock, 14052c5b6df1Sdv142724 kmsqid_t *qp) 14062c5b6df1Sdv142724 { 14072c5b6df1Sdv142724 int cvres; 14082c5b6df1Sdv142724 14092c5b6df1Sdv142724 cv_init(&entry->msgw_wake_cv, NULL, 0, NULL); 14102c5b6df1Sdv142724 14112c5b6df1Sdv142724 list_insert_tail(queue, entry); 14122c5b6df1Sdv142724 14132c5b6df1Sdv142724 qp->msg_rcv_cnt++; 14142c5b6df1Sdv142724 cvres = cv_wait_sig(&entry->msgw_wake_cv, *lock); 14152c5b6df1Sdv142724 *lock = ipc_relock(msq_svc, qp->msg_perm.ipc_id, *lock); 14162c5b6df1Sdv142724 qp->msg_rcv_cnt--; 1417e5994f96Sdv142724 1418e5994f96Sdv142724 if (list_link_active(&entry->msgw_list)) { 14192c5b6df1Sdv142724 /* 1420e5994f96Sdv142724 * We woke up unexpectedly, remove ourself. 14212c5b6df1Sdv142724 */ 14222c5b6df1Sdv142724 list_remove(queue, entry); 1423e5994f96Sdv142724 } 14242c5b6df1Sdv142724 14252c5b6df1Sdv142724 return (cvres); 14262c5b6df1Sdv142724 } 14272c5b6df1Sdv142724 14282c5b6df1Sdv142724 static void 14292c5b6df1Sdv142724 msg_rcvq_wakeup_all(list_t *q_ptr) 14302c5b6df1Sdv142724 { 14312c5b6df1Sdv142724 msgq_wakeup_t *q_walk; 14322c5b6df1Sdv142724 1433e5994f96Sdv142724 while (q_walk = list_head(q_ptr)) { 1434e5994f96Sdv142724 list_remove(q_ptr, q_walk); 14352c5b6df1Sdv142724 cv_signal(&q_walk->msgw_wake_cv); 14362c5b6df1Sdv142724 } 14372c5b6df1Sdv142724 } 14382c5b6df1Sdv142724 14397c478bd9Sstevel@tonic-gate /* 14407c478bd9Sstevel@tonic-gate * msgsys - System entry point for msgctl, msgget, msgrcv, and msgsnd 14417c478bd9Sstevel@tonic-gate * system calls. 14427c478bd9Sstevel@tonic-gate */ 14437c478bd9Sstevel@tonic-gate static ssize_t 14447c478bd9Sstevel@tonic-gate msgsys(int opcode, uintptr_t a1, uintptr_t a2, uintptr_t a3, 14457c478bd9Sstevel@tonic-gate uintptr_t a4, uintptr_t a5) 14467c478bd9Sstevel@tonic-gate { 14477c478bd9Sstevel@tonic-gate ssize_t error; 14487c478bd9Sstevel@tonic-gate 14497c478bd9Sstevel@tonic-gate switch (opcode) { 14507c478bd9Sstevel@tonic-gate case MSGGET: 14517c478bd9Sstevel@tonic-gate error = msgget((key_t)a1, (int)a2); 14527c478bd9Sstevel@tonic-gate break; 14537c478bd9Sstevel@tonic-gate case MSGCTL: 14547c478bd9Sstevel@tonic-gate error = msgctl((int)a1, (int)a2, (void *)a3); 14557c478bd9Sstevel@tonic-gate break; 14567c478bd9Sstevel@tonic-gate case MSGRCV: 14577c478bd9Sstevel@tonic-gate error = msgrcv((int)a1, (struct ipcmsgbuf *)a2, 14587c478bd9Sstevel@tonic-gate (size_t)a3, (long)a4, (int)a5); 14597c478bd9Sstevel@tonic-gate break; 14607c478bd9Sstevel@tonic-gate case MSGSND: 14617c478bd9Sstevel@tonic-gate error = msgsnd((int)a1, (struct ipcmsgbuf *)a2, 14627c478bd9Sstevel@tonic-gate (size_t)a3, (int)a4); 14637c478bd9Sstevel@tonic-gate break; 14647c478bd9Sstevel@tonic-gate case MSGIDS: 14657c478bd9Sstevel@tonic-gate error = msgids((int *)a1, (uint_t)a2, (uint_t *)a3); 14667c478bd9Sstevel@tonic-gate break; 14677c478bd9Sstevel@tonic-gate case MSGSNAP: 14687c478bd9Sstevel@tonic-gate error = msgsnap((int)a1, (caddr_t)a2, (size_t)a3, (long)a4); 14697c478bd9Sstevel@tonic-gate break; 14707c478bd9Sstevel@tonic-gate default: 14717c478bd9Sstevel@tonic-gate error = set_errno(EINVAL); 14727c478bd9Sstevel@tonic-gate break; 14737c478bd9Sstevel@tonic-gate } 14747c478bd9Sstevel@tonic-gate 14757c478bd9Sstevel@tonic-gate return (error); 14767c478bd9Sstevel@tonic-gate } 14777c478bd9Sstevel@tonic-gate 1478eb9fe4caSDavid Valin /* 1479eb9fe4caSDavid Valin * Determine if a writer who is waiting can process its message. If so 1480eb9fe4caSDavid Valin * wake it up. 1481eb9fe4caSDavid Valin */ 1482eb9fe4caSDavid Valin static void 1483eb9fe4caSDavid Valin msg_wakeup_senders(kmsqid_t *qp) 1484eb9fe4caSDavid Valin 1485eb9fe4caSDavid Valin { 1486eb9fe4caSDavid Valin struct msgq_wakeup *ptr, *optr; 1487eb9fe4caSDavid Valin size_t avail, smallest; 1488eb9fe4caSDavid Valin int msgs_out; 1489eb9fe4caSDavid Valin 1490eb9fe4caSDavid Valin /* 1491eb9fe4caSDavid Valin * Is there a writer waiting, and if so, can it be serviced? If 1492eb9fe4caSDavid Valin * not return back to the caller. 1493eb9fe4caSDavid Valin */ 1494eb9fe4caSDavid Valin if (IPC_FREE(&qp->msg_perm) || qp->msg_qnum >= qp->msg_qmax) 1495eb9fe4caSDavid Valin return; 1496eb9fe4caSDavid Valin 1497eb9fe4caSDavid Valin avail = qp->msg_qbytes - qp->msg_cbytes; 1498eb9fe4caSDavid Valin if (avail < qp->msg_snd_smallest) 1499eb9fe4caSDavid Valin return; 1500eb9fe4caSDavid Valin 1501eb9fe4caSDavid Valin ptr = list_head(&qp->msg_wait_rcv); 1502eb9fe4caSDavid Valin if (ptr == NULL) { 1503eb9fe4caSDavid Valin qp->msg_snd_smallest = MSG_SMALL_INIT; 1504eb9fe4caSDavid Valin return; 1505eb9fe4caSDavid Valin } 1506eb9fe4caSDavid Valin optr = ptr; 1507eb9fe4caSDavid Valin 1508eb9fe4caSDavid Valin /* 1509eb9fe4caSDavid Valin * smallest: minimum message size of all queued writers 1510eb9fe4caSDavid Valin * 1511eb9fe4caSDavid Valin * avail: amount of space left on the msgq 1512eb9fe4caSDavid Valin * if all the writers we have woken up are successful. 1513eb9fe4caSDavid Valin * 1514eb9fe4caSDavid Valin * msgs_out: is the number of messages on the message queue if 1515eb9fe4caSDavid Valin * all the writers we have woken up are successful. 1516eb9fe4caSDavid Valin */ 1517eb9fe4caSDavid Valin 1518eb9fe4caSDavid Valin smallest = MSG_SMALL_INIT; 1519eb9fe4caSDavid Valin msgs_out = qp->msg_qnum; 1520eb9fe4caSDavid Valin while (ptr) { 1521eb9fe4caSDavid Valin ptr = list_next(&qp->msg_wait_rcv, ptr); 1522eb9fe4caSDavid Valin if (optr->msgw_snd_size <= avail) { 1523eb9fe4caSDavid Valin list_remove(&qp->msg_wait_rcv, optr); 1524eb9fe4caSDavid Valin avail -= optr->msgw_snd_size; 1525eb9fe4caSDavid Valin cv_signal(&optr->msgw_wake_cv); 1526eb9fe4caSDavid Valin msgs_out++; 1527eb9fe4caSDavid Valin if (msgs_out == qp->msg_qmax || 1528eb9fe4caSDavid Valin avail < qp->msg_snd_smallest) 1529eb9fe4caSDavid Valin break; 1530eb9fe4caSDavid Valin } else { 1531eb9fe4caSDavid Valin if (smallest > optr->msgw_snd_size) 1532eb9fe4caSDavid Valin smallest = optr->msgw_snd_size; 1533eb9fe4caSDavid Valin } 1534eb9fe4caSDavid Valin optr = ptr; 1535eb9fe4caSDavid Valin } 1536eb9fe4caSDavid Valin 1537eb9fe4caSDavid Valin /* 1538eb9fe4caSDavid Valin * Reset the smallest message size if the entire list has been visited 1539eb9fe4caSDavid Valin */ 1540eb9fe4caSDavid Valin if (ptr == NULL && smallest != MSG_SMALL_INIT) 1541eb9fe4caSDavid Valin qp->msg_snd_smallest = smallest; 1542eb9fe4caSDavid Valin } 1543eb9fe4caSDavid Valin 15447c478bd9Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL 15457c478bd9Sstevel@tonic-gate /* 15467c478bd9Sstevel@tonic-gate * msgsys32 - System entry point for msgctl, msgget, msgrcv, and msgsnd 15477c478bd9Sstevel@tonic-gate * system calls for 32-bit callers on LP64 kernel. 15487c478bd9Sstevel@tonic-gate */ 15497c478bd9Sstevel@tonic-gate static ssize32_t 15507c478bd9Sstevel@tonic-gate msgsys32(int opcode, uint32_t a1, uint32_t a2, uint32_t a3, 15517c478bd9Sstevel@tonic-gate uint32_t a4, uint32_t a5) 15527c478bd9Sstevel@tonic-gate { 15537c478bd9Sstevel@tonic-gate ssize_t error; 15547c478bd9Sstevel@tonic-gate 15557c478bd9Sstevel@tonic-gate switch (opcode) { 15567c478bd9Sstevel@tonic-gate case MSGGET: 15577c478bd9Sstevel@tonic-gate error = msgget((key_t)a1, (int)a2); 15587c478bd9Sstevel@tonic-gate break; 15597c478bd9Sstevel@tonic-gate case MSGCTL: 15607c478bd9Sstevel@tonic-gate error = msgctl((int)a1, (int)a2, (void *)(uintptr_t)a3); 15617c478bd9Sstevel@tonic-gate break; 15627c478bd9Sstevel@tonic-gate case MSGRCV: 15637c478bd9Sstevel@tonic-gate error = msgrcv((int)a1, (struct ipcmsgbuf *)(uintptr_t)a2, 15647c478bd9Sstevel@tonic-gate (size_t)a3, (long)(int32_t)a4, (int)a5); 15657c478bd9Sstevel@tonic-gate break; 15667c478bd9Sstevel@tonic-gate case MSGSND: 15677c478bd9Sstevel@tonic-gate error = msgsnd((int)a1, (struct ipcmsgbuf *)(uintptr_t)a2, 15687c478bd9Sstevel@tonic-gate (size_t)(int32_t)a3, (int)a4); 15697c478bd9Sstevel@tonic-gate break; 15707c478bd9Sstevel@tonic-gate case MSGIDS: 15717c478bd9Sstevel@tonic-gate error = msgids((int *)(uintptr_t)a1, (uint_t)a2, 15727c478bd9Sstevel@tonic-gate (uint_t *)(uintptr_t)a3); 15737c478bd9Sstevel@tonic-gate break; 15747c478bd9Sstevel@tonic-gate case MSGSNAP: 15757c478bd9Sstevel@tonic-gate error = msgsnap((int)a1, (caddr_t)(uintptr_t)a2, (size_t)a3, 15767c478bd9Sstevel@tonic-gate (long)(int32_t)a4); 15777c478bd9Sstevel@tonic-gate break; 15787c478bd9Sstevel@tonic-gate default: 15797c478bd9Sstevel@tonic-gate error = set_errno(EINVAL); 15807c478bd9Sstevel@tonic-gate break; 15817c478bd9Sstevel@tonic-gate } 15827c478bd9Sstevel@tonic-gate 15837c478bd9Sstevel@tonic-gate return (error); 15847c478bd9Sstevel@tonic-gate } 15857c478bd9Sstevel@tonic-gate #endif /* SYSCALL32_IMPL */ 1586