xref: /titanic_51/usr/src/uts/common/os/ipc.c (revision 7c478bd95313f5f23a4c958a745db2134aa03244)
1*7c478bd9Sstevel@tonic-gate /*
2*7c478bd9Sstevel@tonic-gate  * CDDL HEADER START
3*7c478bd9Sstevel@tonic-gate  *
4*7c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*7c478bd9Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
6*7c478bd9Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
7*7c478bd9Sstevel@tonic-gate  * with the License.
8*7c478bd9Sstevel@tonic-gate  *
9*7c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10*7c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
11*7c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
12*7c478bd9Sstevel@tonic-gate  * and limitations under the License.
13*7c478bd9Sstevel@tonic-gate  *
14*7c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
15*7c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16*7c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
17*7c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
18*7c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
19*7c478bd9Sstevel@tonic-gate  *
20*7c478bd9Sstevel@tonic-gate  * CDDL HEADER END
21*7c478bd9Sstevel@tonic-gate  */
22*7c478bd9Sstevel@tonic-gate /*
23*7c478bd9Sstevel@tonic-gate  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
24*7c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
25*7c478bd9Sstevel@tonic-gate  */
26*7c478bd9Sstevel@tonic-gate 
27*7c478bd9Sstevel@tonic-gate /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
28*7c478bd9Sstevel@tonic-gate /*	  All Rights Reserved  	*/
29*7c478bd9Sstevel@tonic-gate 
30*7c478bd9Sstevel@tonic-gate 
31*7c478bd9Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
32*7c478bd9Sstevel@tonic-gate 
33*7c478bd9Sstevel@tonic-gate /*
34*7c478bd9Sstevel@tonic-gate  * Common Inter-Process Communication routines.
35*7c478bd9Sstevel@tonic-gate  *
36*7c478bd9Sstevel@tonic-gate  * Overview
37*7c478bd9Sstevel@tonic-gate  * --------
38*7c478bd9Sstevel@tonic-gate  *
39*7c478bd9Sstevel@tonic-gate  * The System V inter-process communication (IPC) facilities provide
40*7c478bd9Sstevel@tonic-gate  * three services, message queues, semaphore arrays, and shared memory
41*7c478bd9Sstevel@tonic-gate  * segments, which are mananged using filesystem-like namespaces.
42*7c478bd9Sstevel@tonic-gate  * Unlike a filesystem, these namespaces aren't mounted and accessible
43*7c478bd9Sstevel@tonic-gate  * via a path -- a special API is used to interact with the different
44*7c478bd9Sstevel@tonic-gate  * facilities (nothing precludes a VFS-based interface, but the
45*7c478bd9Sstevel@tonic-gate  * standards require the special APIs).  Furthermore, these special
46*7c478bd9Sstevel@tonic-gate  * APIs don't use file descriptors, nor do they have an equivalent.
47*7c478bd9Sstevel@tonic-gate  * This means that every operation which acts on an object needs to
48*7c478bd9Sstevel@tonic-gate  * perform the quivalent of a lookup, which in turn means that every
49*7c478bd9Sstevel@tonic-gate  * operation can fail if the specified object doesn't exist in the
50*7c478bd9Sstevel@tonic-gate  * facility's namespace.
51*7c478bd9Sstevel@tonic-gate  *
52*7c478bd9Sstevel@tonic-gate  * Objects
53*7c478bd9Sstevel@tonic-gate  * -------
54*7c478bd9Sstevel@tonic-gate  *
55*7c478bd9Sstevel@tonic-gate  * Each object in a namespace has a unique ID, which is assigned by the
56*7c478bd9Sstevel@tonic-gate  * system and is used to identify the object when performing operations
57*7c478bd9Sstevel@tonic-gate  * on it.  An object can also have a key, which is selected by the user
58*7c478bd9Sstevel@tonic-gate  * at allocation time and is used as a primitive rendezvous mechanism.
59*7c478bd9Sstevel@tonic-gate  * An object without a key is said to have a "private" key.
60*7c478bd9Sstevel@tonic-gate  *
61*7c478bd9Sstevel@tonic-gate  * To perform an operation on an object given its key, one must first
62*7c478bd9Sstevel@tonic-gate  * perform a lookup and obtain its ID.  The ID is then used to identify
63*7c478bd9Sstevel@tonic-gate  * the object when performing the operation.  If the object has a
64*7c478bd9Sstevel@tonic-gate  * private key, the ID must be known or obtained by other means.
65*7c478bd9Sstevel@tonic-gate  *
66*7c478bd9Sstevel@tonic-gate  * Each object in the namespace has a creator uid and gid, as well as
67*7c478bd9Sstevel@tonic-gate  * an owner uid and gid.  Both are initialized with the ruid and rgid
68*7c478bd9Sstevel@tonic-gate  * of the process which created the object.  The creator or current
69*7c478bd9Sstevel@tonic-gate  * owner has the ability to change the owner of the object.
70*7c478bd9Sstevel@tonic-gate  *
71*7c478bd9Sstevel@tonic-gate  * Each object in the namespace has a set of file-like permissions,
72*7c478bd9Sstevel@tonic-gate  * which, in conjunction with the creator and owner uid and gid,
73*7c478bd9Sstevel@tonic-gate  * control read and write access to the object (execute is ignored).
74*7c478bd9Sstevel@tonic-gate  *
75*7c478bd9Sstevel@tonic-gate  * Each object also has a creator project, which is used to account for
76*7c478bd9Sstevel@tonic-gate  * its resource usage.
77*7c478bd9Sstevel@tonic-gate  *
78*7c478bd9Sstevel@tonic-gate  * Operations
79*7c478bd9Sstevel@tonic-gate  * ----------
80*7c478bd9Sstevel@tonic-gate  *
81*7c478bd9Sstevel@tonic-gate  * There are five operations which all three facilities have in
82*7c478bd9Sstevel@tonic-gate  * common: GET, SET, STAT, RMID, and IDS.
83*7c478bd9Sstevel@tonic-gate  *
84*7c478bd9Sstevel@tonic-gate  * GET, like open, is used to allocate a new object or obtain an
85*7c478bd9Sstevel@tonic-gate  * existing one (using its key).  It takes a key, a set of flags and
86*7c478bd9Sstevel@tonic-gate  * mode bits, and optionally facility-specific arguments.  If the key
87*7c478bd9Sstevel@tonic-gate  * is IPC_PRIVATE, a new object with the requested mode bits and
88*7c478bd9Sstevel@tonic-gate  * facility-specific attributes is created.  If the key isn't
89*7c478bd9Sstevel@tonic-gate  * IPC_PRIVATE, the GET will attempt to look up the specified key and
90*7c478bd9Sstevel@tonic-gate  * either return that or create a new key depending on the state of the
91*7c478bd9Sstevel@tonic-gate  * IPC_CREAT and IPC_EXCL flags, much like open.  If GET needs to
92*7c478bd9Sstevel@tonic-gate  * allocate an object, it can fail if there is insufficient space in
93*7c478bd9Sstevel@tonic-gate  * the namespace (the maximum number of ids for the facility has been
94*7c478bd9Sstevel@tonic-gate  * exceeded) or if the facility-specific initialization fails.  If GET
95*7c478bd9Sstevel@tonic-gate  * finds an object it can return, it can still fail if that object's
96*7c478bd9Sstevel@tonic-gate  * permissions or facility-specific attributes are less than those
97*7c478bd9Sstevel@tonic-gate  * requested.
98*7c478bd9Sstevel@tonic-gate  *
99*7c478bd9Sstevel@tonic-gate  * SET is used to adjust facility-specific parameters of an object, in
100*7c478bd9Sstevel@tonic-gate  * addition to the owner uid and gid, and mode bits.  It can fail if
101*7c478bd9Sstevel@tonic-gate  * the caller isn't the creator or owner.
102*7c478bd9Sstevel@tonic-gate  *
103*7c478bd9Sstevel@tonic-gate  * STAT is used to obtain information about an object including the
104*7c478bd9Sstevel@tonic-gate  * general attributes object described as well as facility-specific
105*7c478bd9Sstevel@tonic-gate  * information.  It can fail if the caller doesn't have read
106*7c478bd9Sstevel@tonic-gate  * permission.
107*7c478bd9Sstevel@tonic-gate  *
108*7c478bd9Sstevel@tonic-gate  * RMID removes an object from the namespace.  Subsequent operations
109*7c478bd9Sstevel@tonic-gate  * using the object's ID or key will fail (until another object is
110*7c478bd9Sstevel@tonic-gate  * created with the same key or ID).  Since an RMID may be performed
111*7c478bd9Sstevel@tonic-gate  * asynchronously with other operations, it is possible that other
112*7c478bd9Sstevel@tonic-gate  * threads and/or processes will have references to the object.  While
113*7c478bd9Sstevel@tonic-gate  * a facility may have actions which need to be performed at RMID time,
114*7c478bd9Sstevel@tonic-gate  * only when all references are dropped can the object be destroyed.
115*7c478bd9Sstevel@tonic-gate  * RMID will fail if the caller isn't the creator or owner.
116*7c478bd9Sstevel@tonic-gate  *
117*7c478bd9Sstevel@tonic-gate  * IDS obtains a list of all IDs in a facility's namespace.  There are
118*7c478bd9Sstevel@tonic-gate  * no facility-specific behaviors of IDS.
119*7c478bd9Sstevel@tonic-gate  *
120*7c478bd9Sstevel@tonic-gate  * Design
121*7c478bd9Sstevel@tonic-gate  * ------
122*7c478bd9Sstevel@tonic-gate  *
123*7c478bd9Sstevel@tonic-gate  * Because some IPC facilities provide services whose operations must
124*7c478bd9Sstevel@tonic-gate  * scale, a mechanism which allows fast, concurrent access to
125*7c478bd9Sstevel@tonic-gate  * individual objects is needed.  Of primary importance is object
126*7c478bd9Sstevel@tonic-gate  * lookup based on ID (SET, STAT, others).  Allocation (GET),
127*7c478bd9Sstevel@tonic-gate  * deallocation (RMID), ID enumeration (IDS), and key lookups (GET) are
128*7c478bd9Sstevel@tonic-gate  * lesser concerns, but should be implemented in such a way that ID
129*7c478bd9Sstevel@tonic-gate  * lookup isn't affected (at least not in the common case).
130*7c478bd9Sstevel@tonic-gate  *
131*7c478bd9Sstevel@tonic-gate  * Starting from the bottom up, each object is represented by a
132*7c478bd9Sstevel@tonic-gate  * structure, the first member of which must be a kipc_perm_t.  The
133*7c478bd9Sstevel@tonic-gate  * kipc_perm_t contains the information described above in "Objects", a
134*7c478bd9Sstevel@tonic-gate  * reference count (since the object may continue to exist after it has
135*7c478bd9Sstevel@tonic-gate  * been removed from the namespace), as well as some additional
136*7c478bd9Sstevel@tonic-gate  * metadata used to manage data structure membership.  These objects
137*7c478bd9Sstevel@tonic-gate  * are dynamically allocated.
138*7c478bd9Sstevel@tonic-gate  *
139*7c478bd9Sstevel@tonic-gate  * Above the objects is a power-of-two sized table of ID slots.  Each
140*7c478bd9Sstevel@tonic-gate  * slot contains a pointer to an object, a sequence number, and a
141*7c478bd9Sstevel@tonic-gate  * lock.  An object's ID is a function of its slot's index in the table
142*7c478bd9Sstevel@tonic-gate  * and its slot's sequence number.  Every time a slot is released (via
143*7c478bd9Sstevel@tonic-gate  * RMID) its sequence number is increased.  Strictly speaking, the
144*7c478bd9Sstevel@tonic-gate  * sequence number is unnecessary.  However, checking the sequence
145*7c478bd9Sstevel@tonic-gate  * number after a lookup provides a certain degree of robustness
146*7c478bd9Sstevel@tonic-gate  * against the use of stale IDs (useful since nothing else does).  When
147*7c478bd9Sstevel@tonic-gate  * the table fills up, it is resized (see Locking, below).
148*7c478bd9Sstevel@tonic-gate  *
149*7c478bd9Sstevel@tonic-gate  * Of an ID's 31 bits (an ID is, as defined by the standards, a signed
150*7c478bd9Sstevel@tonic-gate  * int) the top IPC_SEQ_BITS are used for the sequence number with the
151*7c478bd9Sstevel@tonic-gate  * remainder holding the index into the table.  The size of the table
152*7c478bd9Sstevel@tonic-gate  * is therefore bounded at 2 ^ (31 - IPC_SEQ_BITS) slots.
153*7c478bd9Sstevel@tonic-gate  *
154*7c478bd9Sstevel@tonic-gate  * Managing this table is the ipc_service structure.  It contains a
155*7c478bd9Sstevel@tonic-gate  * pointer to the dynamically allocated ID table, a namespace-global
156*7c478bd9Sstevel@tonic-gate  * lock, an id_space for managing the free space in the table, and
157*7c478bd9Sstevel@tonic-gate  * sundry other metadata necessary for the maintenance of the
158*7c478bd9Sstevel@tonic-gate  * namespace.  An AVL tree of all keyed objects in the table (sorted by
159*7c478bd9Sstevel@tonic-gate  * key) is used for key lookups.  An unordered doubly linked list of
160*7c478bd9Sstevel@tonic-gate  * all objects in the namespace (keyed or not) is maintained to
161*7c478bd9Sstevel@tonic-gate  * facilitate ID enumeration.
162*7c478bd9Sstevel@tonic-gate  *
163*7c478bd9Sstevel@tonic-gate  * To help visualize these relationships, here's a picture of a
164*7c478bd9Sstevel@tonic-gate  * namespace with a table of size 8 containing three objects
165*7c478bd9Sstevel@tonic-gate  * (IPC_SEQ_BITS = 28):
166*7c478bd9Sstevel@tonic-gate  *
167*7c478bd9Sstevel@tonic-gate  *
168*7c478bd9Sstevel@tonic-gate  * +-ipc_service_t--+
169*7c478bd9Sstevel@tonic-gate  * | table          *---\
170*7c478bd9Sstevel@tonic-gate  * | keys           *---+----------------------\
171*7c478bd9Sstevel@tonic-gate  * | all ids        *--\|                      |
172*7c478bd9Sstevel@tonic-gate  * |                |  ||                      |
173*7c478bd9Sstevel@tonic-gate  * +----------------+  ||                      |
174*7c478bd9Sstevel@tonic-gate  *                     ||                      |
175*7c478bd9Sstevel@tonic-gate  * /-------------------/|                      |
176*7c478bd9Sstevel@tonic-gate  * |    /---------------/                      |
177*7c478bd9Sstevel@tonic-gate  * |    |                                      |
178*7c478bd9Sstevel@tonic-gate  * |    v                                      |
179*7c478bd9Sstevel@tonic-gate  * |  +-0------+-1------+-2------+-3------+-4--+---+-5------+-6------+-7------+
180*7c478bd9Sstevel@tonic-gate  * |  | Seq=3  |        |        | Seq=1  |    :   |        |        | Seq=6  |
181*7c478bd9Sstevel@tonic-gate  * |  |        |        |        |        |    :   |        |        |        |
182*7c478bd9Sstevel@tonic-gate  * |  +-*------+--------+--------+-*------+----+---+--------+--------+-*------+
183*7c478bd9Sstevel@tonic-gate  * |    |                          |           |                       |
184*7c478bd9Sstevel@tonic-gate  * |    |                      /---/           |      /----------------/
185*7c478bd9Sstevel@tonic-gate  * |    |                      |               |      |
186*7c478bd9Sstevel@tonic-gate  * |    v                      v               |      v
187*7c478bd9Sstevel@tonic-gate  * |  +-kipc_perm_t-+        +-kipc_perm_t-+   |    +-kipc_perm_t-+
188*7c478bd9Sstevel@tonic-gate  * |  | id=0x30     |        | id=0x13     |   |    | id=0x67     |
189*7c478bd9Sstevel@tonic-gate  * |  | key=0xfeed  |        | key=0xbeef  |   |    | key=0xcafe  |
190*7c478bd9Sstevel@tonic-gate  * \->| [list]      |<------>| [list]      |<------>| [list]      |
191*7c478bd9Sstevel@tonic-gate  * /->| [avl left]  x   /--->| [avl left]  x   \--->| [avl left]  *---\
192*7c478bd9Sstevel@tonic-gate  * |  | [avl right] x   |    | [avl right] x        | [avl right] *---+-\
193*7c478bd9Sstevel@tonic-gate  * |  |             |   |    |             |        |             |   | |
194*7c478bd9Sstevel@tonic-gate  * |  +-------------+   |    +-------------+        +-------------+   | |
195*7c478bd9Sstevel@tonic-gate  * |                    \---------------------------------------------/ |
196*7c478bd9Sstevel@tonic-gate  * \--------------------------------------------------------------------/
197*7c478bd9Sstevel@tonic-gate  *
198*7c478bd9Sstevel@tonic-gate  * Locking
199*7c478bd9Sstevel@tonic-gate  * -------
200*7c478bd9Sstevel@tonic-gate  *
201*7c478bd9Sstevel@tonic-gate  * There are three locks (or sets of locks) which are used to ensure
202*7c478bd9Sstevel@tonic-gate  * correctness: the slot locks, the namespace lock, and p_lock (needed
203*7c478bd9Sstevel@tonic-gate  * when checking resource controls).  Their ordering is
204*7c478bd9Sstevel@tonic-gate  *
205*7c478bd9Sstevel@tonic-gate  *   namespace lock -> slot lock 0 -> ... -> slot lock t -> p_lock
206*7c478bd9Sstevel@tonic-gate  *
207*7c478bd9Sstevel@tonic-gate  * Generally speaking, the namespace lock is used to protect allocation
208*7c478bd9Sstevel@tonic-gate  * and removal from the namespace, ID enumeration, and resizing the ID
209*7c478bd9Sstevel@tonic-gate  * table.  Specifically:
210*7c478bd9Sstevel@tonic-gate  *
211*7c478bd9Sstevel@tonic-gate  * - write access to all fields of the ipc_service structure
212*7c478bd9Sstevel@tonic-gate  * - read access to all variable fields of ipc_service except
213*7c478bd9Sstevel@tonic-gate  *   ipcs_tabsz (table size) and ipcs_table (the table pointer)
214*7c478bd9Sstevel@tonic-gate  * - read/write access to ipc_avl, ipc_list in visible objects'
215*7c478bd9Sstevel@tonic-gate  *   kipc_perm structures (i.e. objects which have been removed from
216*7c478bd9Sstevel@tonic-gate  *   the namespace don't have this restriction)
217*7c478bd9Sstevel@tonic-gate  * - write access to ipct_seq and ipct_data in the table entries
218*7c478bd9Sstevel@tonic-gate  *
219*7c478bd9Sstevel@tonic-gate  * A slot lock by itself is meaningless (except when resizing).  Of
220*7c478bd9Sstevel@tonic-gate  * greater interest conceptually is the notion of an ID lock -- a
221*7c478bd9Sstevel@tonic-gate  * "virtual lock" which refers to whichever slot lock an object's ID
222*7c478bd9Sstevel@tonic-gate  * currently hashes to.
223*7c478bd9Sstevel@tonic-gate  *
224*7c478bd9Sstevel@tonic-gate  * An ID lock protects all objects with that ID.  Normally there will
225*7c478bd9Sstevel@tonic-gate  * only be one such object: the one pointed to by the locked slot.
226*7c478bd9Sstevel@tonic-gate  * However, if an object is removed from the namespace but retains
227*7c478bd9Sstevel@tonic-gate  * references (e.g. an attached shared memory segment which has been
228*7c478bd9Sstevel@tonic-gate  * RMIDed), it continues to use the lock associated with its original
229*7c478bd9Sstevel@tonic-gate  * ID.  While this can result in increased contention, operations which
230*7c478bd9Sstevel@tonic-gate  * require taking the ID lock of removed objects are infrequent.
231*7c478bd9Sstevel@tonic-gate  *
232*7c478bd9Sstevel@tonic-gate  * Specifically, an ID lock protects the contents of an object's
233*7c478bd9Sstevel@tonic-gate  * structure, including the contents of the embedded kipc_perm
234*7c478bd9Sstevel@tonic-gate  * structure (but excluding those fields protected by the namespace
235*7c478bd9Sstevel@tonic-gate  * lock).  It also protects the ipct_seq and ipct_data fields in its
236*7c478bd9Sstevel@tonic-gate  * slot (it is really a slot lock, after all).
237*7c478bd9Sstevel@tonic-gate  *
238*7c478bd9Sstevel@tonic-gate  * Recall that the table is resizable.  To avoid requiring every ID
239*7c478bd9Sstevel@tonic-gate  * lookup to take a global lock, a scheme much like that employed for
240*7c478bd9Sstevel@tonic-gate  * file descriptors (see the comment above UF_ENTER in user.h) is
241*7c478bd9Sstevel@tonic-gate  * used.  Note that the sequence number and data pointer are protected
242*7c478bd9Sstevel@tonic-gate  * by both the namespace lock and their slot lock.  When the table is
243*7c478bd9Sstevel@tonic-gate  * resized, the following operations take place:
244*7c478bd9Sstevel@tonic-gate  *
245*7c478bd9Sstevel@tonic-gate  *   1) A new table is allocated.
246*7c478bd9Sstevel@tonic-gate  *   2) The global lock is taken.
247*7c478bd9Sstevel@tonic-gate  *   3) All old slots are locked, in order.
248*7c478bd9Sstevel@tonic-gate  *   4) The first half of the new slots are locked.
249*7c478bd9Sstevel@tonic-gate  *   5) All table entries are copied to the new table, and cleared from
250*7c478bd9Sstevel@tonic-gate  *	the old table.
251*7c478bd9Sstevel@tonic-gate  *   6) The ipc_service structure is updated to point to the new table.
252*7c478bd9Sstevel@tonic-gate  *   7) The ipc_service structure is updated with the new table size.
253*7c478bd9Sstevel@tonic-gate  *   8) All slot locks (old and new) are dropped.
254*7c478bd9Sstevel@tonic-gate  *
255*7c478bd9Sstevel@tonic-gate  * Because the slot locks are embedded in the table, ID lookups and
256*7c478bd9Sstevel@tonic-gate  * other operations which require taking an slot lock need to verify
257*7c478bd9Sstevel@tonic-gate  * that the lock taken wasn't part of a stale table.  This is
258*7c478bd9Sstevel@tonic-gate  * accomplished by checking the table size before and after
259*7c478bd9Sstevel@tonic-gate  * dereferencing the table pointer and taking the lock: if the size
260*7c478bd9Sstevel@tonic-gate  * changes, the lock must be dropped and reacquired.  It is this
261*7c478bd9Sstevel@tonic-gate  * additional work which distinguishes an ID lock from a slot lock.
262*7c478bd9Sstevel@tonic-gate  *
263*7c478bd9Sstevel@tonic-gate  * Because we can't guarantee that threads aren't accessing the old
264*7c478bd9Sstevel@tonic-gate  * tables' locks, they are never deallocated.  To prevent spurious
265*7c478bd9Sstevel@tonic-gate  * reports of memory leaks, a pointer to the discarded table is stored
266*7c478bd9Sstevel@tonic-gate  * in the new one in step 5.  (Theoretically ipcs_destroy will delete
267*7c478bd9Sstevel@tonic-gate  * the discarded tables, but it is only ever called from a failed _init
268*7c478bd9Sstevel@tonic-gate  * invocation; i.e. when there aren't any.)
269*7c478bd9Sstevel@tonic-gate  *
270*7c478bd9Sstevel@tonic-gate  * Interfaces
271*7c478bd9Sstevel@tonic-gate  * ----------
272*7c478bd9Sstevel@tonic-gate  *
273*7c478bd9Sstevel@tonic-gate  * The following interfaces are provided by the ipc module for use by
274*7c478bd9Sstevel@tonic-gate  * the individual IPC facilities:
275*7c478bd9Sstevel@tonic-gate  *
276*7c478bd9Sstevel@tonic-gate  * ipcperm_access
277*7c478bd9Sstevel@tonic-gate  *
278*7c478bd9Sstevel@tonic-gate  *   Given an object and a cred structure, determines if the requested
279*7c478bd9Sstevel@tonic-gate  *   access type is allowed.
280*7c478bd9Sstevel@tonic-gate  *
281*7c478bd9Sstevel@tonic-gate  * ipcperm_set, ipcperm_stat,
282*7c478bd9Sstevel@tonic-gate  * ipcperm_set64, ipcperm_stat64
283*7c478bd9Sstevel@tonic-gate  *
284*7c478bd9Sstevel@tonic-gate  *   Performs the common portion of an STAT or SET operation.  All
285*7c478bd9Sstevel@tonic-gate  *   (except stat and stat64) can fail, so they should be called before
286*7c478bd9Sstevel@tonic-gate  *   any facility-specific non-reversible changes are made to an
287*7c478bd9Sstevel@tonic-gate  *   object.  Similarly, the set operations have side effects, so they
288*7c478bd9Sstevel@tonic-gate  *   should only be called once the possibility of a facility-specific
289*7c478bd9Sstevel@tonic-gate  *   failure is eliminated.
290*7c478bd9Sstevel@tonic-gate  *
291*7c478bd9Sstevel@tonic-gate  * ipcs_create
292*7c478bd9Sstevel@tonic-gate  *
293*7c478bd9Sstevel@tonic-gate  *   Creates an IPC namespace for use by an IPC facility.
294*7c478bd9Sstevel@tonic-gate  *
295*7c478bd9Sstevel@tonic-gate  * ipcs_destroy
296*7c478bd9Sstevel@tonic-gate  *
297*7c478bd9Sstevel@tonic-gate  *   Destroys an IPC namespace.
298*7c478bd9Sstevel@tonic-gate  *
299*7c478bd9Sstevel@tonic-gate  * ipcs_lock, ipcs_unlock
300*7c478bd9Sstevel@tonic-gate  *
301*7c478bd9Sstevel@tonic-gate  *   Takes the namespace lock.  Ideally such access wouldn't be
302*7c478bd9Sstevel@tonic-gate  *   necessary, but there may be facility-specific data protected by
303*7c478bd9Sstevel@tonic-gate  *   this lock (e.g. project-wide resource consumption).
304*7c478bd9Sstevel@tonic-gate  *
305*7c478bd9Sstevel@tonic-gate  * ipc_lock
306*7c478bd9Sstevel@tonic-gate  *
307*7c478bd9Sstevel@tonic-gate  *   Takes the lock associated with an ID.  Can't fail.
308*7c478bd9Sstevel@tonic-gate  *
309*7c478bd9Sstevel@tonic-gate  * ipc_relock
310*7c478bd9Sstevel@tonic-gate  *
311*7c478bd9Sstevel@tonic-gate  *   Like ipc_lock, but takes a pointer to a held lock.  Drops the lock
312*7c478bd9Sstevel@tonic-gate  *   unless it is the one that would have been returned by ipc_lock.
313*7c478bd9Sstevel@tonic-gate  *   Used after calls to cv_wait.
314*7c478bd9Sstevel@tonic-gate  *
315*7c478bd9Sstevel@tonic-gate  * ipc_lookup
316*7c478bd9Sstevel@tonic-gate  *
317*7c478bd9Sstevel@tonic-gate  *   Performs an ID lookup, returns with the ID lock held.  Fails if
318*7c478bd9Sstevel@tonic-gate  *   the ID doesn't exist in the namespace.
319*7c478bd9Sstevel@tonic-gate  *
320*7c478bd9Sstevel@tonic-gate  * ipc_hold
321*7c478bd9Sstevel@tonic-gate  *
322*7c478bd9Sstevel@tonic-gate  *   Takes a reference on an object.
323*7c478bd9Sstevel@tonic-gate  *
324*7c478bd9Sstevel@tonic-gate  * ipc_rele
325*7c478bd9Sstevel@tonic-gate  *
326*7c478bd9Sstevel@tonic-gate  *   Releases a reference on an object, and drops the object's lock.
327*7c478bd9Sstevel@tonic-gate  *   Calls the object's destructor if last reference is being
328*7c478bd9Sstevel@tonic-gate  *   released.
329*7c478bd9Sstevel@tonic-gate  *
330*7c478bd9Sstevel@tonic-gate  * ipc_rele_locked
331*7c478bd9Sstevel@tonic-gate  *
332*7c478bd9Sstevel@tonic-gate  *   Releases a reference on an object.  Doesn't drop lock, and may
333*7c478bd9Sstevel@tonic-gate  *   only be called when there is more than one reference to the
334*7c478bd9Sstevel@tonic-gate  *   object.
335*7c478bd9Sstevel@tonic-gate  *
336*7c478bd9Sstevel@tonic-gate  * ipc_get, ipc_commit_begin, ipc_commit_end, ipc_cleanup
337*7c478bd9Sstevel@tonic-gate  *
338*7c478bd9Sstevel@tonic-gate  *   Components of a GET operation.  ipc_get performs a key lookup,
339*7c478bd9Sstevel@tonic-gate  *   allocating an object if the key isn't found (returning with the
340*7c478bd9Sstevel@tonic-gate  *   namespace lock and p_lock held), and returning the existing object
341*7c478bd9Sstevel@tonic-gate  *   if it is (with the object lock held).  ipc_get doesn't modify the
342*7c478bd9Sstevel@tonic-gate  *   namespace.
343*7c478bd9Sstevel@tonic-gate  *
344*7c478bd9Sstevel@tonic-gate  *   ipc_commit_begin begins the process of inserting an object
345*7c478bd9Sstevel@tonic-gate  *   allocated by ipc_get into the namespace, and can fail.  If
346*7c478bd9Sstevel@tonic-gate  *   successful, it returns with the namespace lock and p_lock held.
347*7c478bd9Sstevel@tonic-gate  *   ipc_commit_end completes the process of inserting an object into
348*7c478bd9Sstevel@tonic-gate  *   the namespace and can't fail.  The facility can call ipc_cleanup
349*7c478bd9Sstevel@tonic-gate  *   at any time following a successful ipc_get and before
350*7c478bd9Sstevel@tonic-gate  *   ipc_commit_end or a failed ipc_commit_begin to fail the
351*7c478bd9Sstevel@tonic-gate  *   allocation.  Pseudocode for the suggested GET implementation:
352*7c478bd9Sstevel@tonic-gate  *
353*7c478bd9Sstevel@tonic-gate  *   top:
354*7c478bd9Sstevel@tonic-gate  *
355*7c478bd9Sstevel@tonic-gate  *     ipc_get
356*7c478bd9Sstevel@tonic-gate  *
357*7c478bd9Sstevel@tonic-gate  *     if failure
358*7c478bd9Sstevel@tonic-gate  *       return
359*7c478bd9Sstevel@tonic-gate  *
360*7c478bd9Sstevel@tonic-gate  *     if found {
361*7c478bd9Sstevel@tonic-gate  *
362*7c478bd9Sstevel@tonic-gate  *	 if object meets criteria
363*7c478bd9Sstevel@tonic-gate  *	   unlock object and return success
364*7c478bd9Sstevel@tonic-gate  *       else
365*7c478bd9Sstevel@tonic-gate  *	   unlock object and return failure
366*7c478bd9Sstevel@tonic-gate  *
367*7c478bd9Sstevel@tonic-gate  *     } else {
368*7c478bd9Sstevel@tonic-gate  *
369*7c478bd9Sstevel@tonic-gate  *	 perform resource control tests
370*7c478bd9Sstevel@tonic-gate  *	 drop namespace lock, p_lock
371*7c478bd9Sstevel@tonic-gate  *	 if failure
372*7c478bd9Sstevel@tonic-gate  *	   ipc_cleanup
373*7c478bd9Sstevel@tonic-gate  *
374*7c478bd9Sstevel@tonic-gate  *       perform facility-specific initialization
375*7c478bd9Sstevel@tonic-gate  *	 if failure {
376*7c478bd9Sstevel@tonic-gate  *	   facility-specific cleanup
377*7c478bd9Sstevel@tonic-gate  *	   ipc_cleanup
378*7c478bd9Sstevel@tonic-gate  *       }
379*7c478bd9Sstevel@tonic-gate  *
380*7c478bd9Sstevel@tonic-gate  *	 ( At this point the object should be destructible using the
381*7c478bd9Sstevel@tonic-gate  *	   destructor given to ipcs_create )
382*7c478bd9Sstevel@tonic-gate  *
383*7c478bd9Sstevel@tonic-gate  *       ipc_commit_begin
384*7c478bd9Sstevel@tonic-gate  *	 if retry
385*7c478bd9Sstevel@tonic-gate  *	   goto top
386*7c478bd9Sstevel@tonic-gate  *       else if failure
387*7c478bd9Sstevel@tonic-gate  *         return
388*7c478bd9Sstevel@tonic-gate  *
389*7c478bd9Sstevel@tonic-gate  *       perform facility-specific resource control tests/allocations
390*7c478bd9Sstevel@tonic-gate  *	 if failure
391*7c478bd9Sstevel@tonic-gate  *	   ipc_cleanup
392*7c478bd9Sstevel@tonic-gate  *
393*7c478bd9Sstevel@tonic-gate  *	 ipc_commit_end
394*7c478bd9Sstevel@tonic-gate  *	 perform any infallible post-creation actions, unlock, and return
395*7c478bd9Sstevel@tonic-gate  *
396*7c478bd9Sstevel@tonic-gate  *     }
397*7c478bd9Sstevel@tonic-gate  *
398*7c478bd9Sstevel@tonic-gate  * ipc_rmid
399*7c478bd9Sstevel@tonic-gate  *
400*7c478bd9Sstevel@tonic-gate  *   Performs the common portion of an RMID operation -- looks up an ID
401*7c478bd9Sstevel@tonic-gate  *   removes it, and calls the a facility-specific function to do
402*7c478bd9Sstevel@tonic-gate  *   RMID-time cleanup on the private portions of the object.
403*7c478bd9Sstevel@tonic-gate  *
404*7c478bd9Sstevel@tonic-gate  * ipc_ids
405*7c478bd9Sstevel@tonic-gate  *
406*7c478bd9Sstevel@tonic-gate  *   Performs the common portion of an IDS operation.
407*7c478bd9Sstevel@tonic-gate  *
408*7c478bd9Sstevel@tonic-gate  */
409*7c478bd9Sstevel@tonic-gate 
410*7c478bd9Sstevel@tonic-gate #include <sys/types.h>
411*7c478bd9Sstevel@tonic-gate #include <sys/param.h>
412*7c478bd9Sstevel@tonic-gate #include <sys/cred.h>
413*7c478bd9Sstevel@tonic-gate #include <sys/policy.h>
414*7c478bd9Sstevel@tonic-gate #include <sys/proc.h>
415*7c478bd9Sstevel@tonic-gate #include <sys/user.h>
416*7c478bd9Sstevel@tonic-gate #include <sys/ipc.h>
417*7c478bd9Sstevel@tonic-gate #include <sys/ipc_impl.h>
418*7c478bd9Sstevel@tonic-gate #include <sys/errno.h>
419*7c478bd9Sstevel@tonic-gate #include <sys/systm.h>
420*7c478bd9Sstevel@tonic-gate #include <sys/list.h>
421*7c478bd9Sstevel@tonic-gate #include <sys/atomic.h>
422*7c478bd9Sstevel@tonic-gate #include <sys/zone.h>
423*7c478bd9Sstevel@tonic-gate #include <sys/task.h>
424*7c478bd9Sstevel@tonic-gate #include <sys/modctl.h>
425*7c478bd9Sstevel@tonic-gate 
426*7c478bd9Sstevel@tonic-gate #include <c2/audit.h>
427*7c478bd9Sstevel@tonic-gate 
428*7c478bd9Sstevel@tonic-gate static struct modlmisc modlmisc = {
429*7c478bd9Sstevel@tonic-gate 	&mod_miscops,
430*7c478bd9Sstevel@tonic-gate 	"common ipc code",
431*7c478bd9Sstevel@tonic-gate };
432*7c478bd9Sstevel@tonic-gate 
433*7c478bd9Sstevel@tonic-gate static struct modlinkage modlinkage = {
434*7c478bd9Sstevel@tonic-gate 	MODREV_1, (void *)&modlmisc, NULL
435*7c478bd9Sstevel@tonic-gate };
436*7c478bd9Sstevel@tonic-gate 
437*7c478bd9Sstevel@tonic-gate 
438*7c478bd9Sstevel@tonic-gate int
439*7c478bd9Sstevel@tonic-gate _init(void)
440*7c478bd9Sstevel@tonic-gate {
441*7c478bd9Sstevel@tonic-gate 	return (mod_install(&modlinkage));
442*7c478bd9Sstevel@tonic-gate }
443*7c478bd9Sstevel@tonic-gate 
444*7c478bd9Sstevel@tonic-gate int
445*7c478bd9Sstevel@tonic-gate _fini(void)
446*7c478bd9Sstevel@tonic-gate {
447*7c478bd9Sstevel@tonic-gate 	return (mod_remove(&modlinkage));
448*7c478bd9Sstevel@tonic-gate }
449*7c478bd9Sstevel@tonic-gate 
450*7c478bd9Sstevel@tonic-gate int
451*7c478bd9Sstevel@tonic-gate _info(struct modinfo *modinfop)
452*7c478bd9Sstevel@tonic-gate {
453*7c478bd9Sstevel@tonic-gate 	return (mod_info(&modlinkage, modinfop));
454*7c478bd9Sstevel@tonic-gate }
455*7c478bd9Sstevel@tonic-gate 
456*7c478bd9Sstevel@tonic-gate 
457*7c478bd9Sstevel@tonic-gate /*
458*7c478bd9Sstevel@tonic-gate  * Check message, semaphore, or shared memory access permissions.
459*7c478bd9Sstevel@tonic-gate  *
460*7c478bd9Sstevel@tonic-gate  * This routine verifies the requested access permission for the current
461*7c478bd9Sstevel@tonic-gate  * process.  The zone ids are compared, and the appropriate bits are
462*7c478bd9Sstevel@tonic-gate  * checked corresponding to owner, group (including the list of
463*7c478bd9Sstevel@tonic-gate  * supplementary groups), or everyone.  Zero is returned on success.
464*7c478bd9Sstevel@tonic-gate  * On failure, the security policy is asked to check to override the
465*7c478bd9Sstevel@tonic-gate  * permissions check; the policy will either return 0 for access granted
466*7c478bd9Sstevel@tonic-gate  * or EACCES.
467*7c478bd9Sstevel@tonic-gate  *
468*7c478bd9Sstevel@tonic-gate  * Access to objects in other zones requires that the caller be in the
469*7c478bd9Sstevel@tonic-gate  * global zone and have the appropriate IPC_DAC_* privilege, regardless
470*7c478bd9Sstevel@tonic-gate  * of whether the uid or gid match those of the object.  Note that
471*7c478bd9Sstevel@tonic-gate  * cross-zone accesses will normally never get here since they'll
472*7c478bd9Sstevel@tonic-gate  * fail in ipc_lookup or ipc_get.
473*7c478bd9Sstevel@tonic-gate  *
474*7c478bd9Sstevel@tonic-gate  * The arguments must be set up as follows:
475*7c478bd9Sstevel@tonic-gate  * 	p - Pointer to permission structure to verify
476*7c478bd9Sstevel@tonic-gate  * 	mode - Desired access permissions
477*7c478bd9Sstevel@tonic-gate  */
478*7c478bd9Sstevel@tonic-gate int
479*7c478bd9Sstevel@tonic-gate ipcperm_access(kipc_perm_t *p, int mode, cred_t *cr)
480*7c478bd9Sstevel@tonic-gate {
481*7c478bd9Sstevel@tonic-gate 	int shifts = 0;
482*7c478bd9Sstevel@tonic-gate 	uid_t uid = crgetuid(cr);
483*7c478bd9Sstevel@tonic-gate 	zoneid_t zoneid = getzoneid();
484*7c478bd9Sstevel@tonic-gate 
485*7c478bd9Sstevel@tonic-gate 	if (p->ipc_zoneid == zoneid) {
486*7c478bd9Sstevel@tonic-gate 		if (uid != p->ipc_uid && uid != p->ipc_cuid) {
487*7c478bd9Sstevel@tonic-gate 			shifts += 3;
488*7c478bd9Sstevel@tonic-gate 			if (!groupmember(p->ipc_gid, cr) &&
489*7c478bd9Sstevel@tonic-gate 			    !groupmember(p->ipc_cgid, cr))
490*7c478bd9Sstevel@tonic-gate 				shifts += 3;
491*7c478bd9Sstevel@tonic-gate 		}
492*7c478bd9Sstevel@tonic-gate 
493*7c478bd9Sstevel@tonic-gate 		mode &= ~(p->ipc_mode << shifts);
494*7c478bd9Sstevel@tonic-gate 
495*7c478bd9Sstevel@tonic-gate 		if (mode == 0)
496*7c478bd9Sstevel@tonic-gate 			return (0);
497*7c478bd9Sstevel@tonic-gate 	} else if (zoneid != GLOBAL_ZONEID)
498*7c478bd9Sstevel@tonic-gate 		return (EACCES);
499*7c478bd9Sstevel@tonic-gate 
500*7c478bd9Sstevel@tonic-gate 	return (secpolicy_ipc_access(cr, p, mode));
501*7c478bd9Sstevel@tonic-gate }
502*7c478bd9Sstevel@tonic-gate 
503*7c478bd9Sstevel@tonic-gate /*
504*7c478bd9Sstevel@tonic-gate  * There are two versions of the ipcperm_set/stat functions:
505*7c478bd9Sstevel@tonic-gate  *   ipcperm_???        - for use with IPC_SET/STAT
506*7c478bd9Sstevel@tonic-gate  *   ipcperm_???_64     - for use with IPC_SET64/STAT64
507*7c478bd9Sstevel@tonic-gate  *
508*7c478bd9Sstevel@tonic-gate  * These functions encapsulate the common portions (copying, permission
509*7c478bd9Sstevel@tonic-gate  * checks, and auditing) of the set/stat operations.  All, except for
510*7c478bd9Sstevel@tonic-gate  * stat and stat_64 which are void, return 0 on success or a non-zero
511*7c478bd9Sstevel@tonic-gate  * errno value on error.
512*7c478bd9Sstevel@tonic-gate  */
513*7c478bd9Sstevel@tonic-gate 
514*7c478bd9Sstevel@tonic-gate int
515*7c478bd9Sstevel@tonic-gate ipcperm_set(ipc_service_t *service, struct cred *cr,
516*7c478bd9Sstevel@tonic-gate     kipc_perm_t *kperm, struct ipc_perm *perm, model_t model)
517*7c478bd9Sstevel@tonic-gate {
518*7c478bd9Sstevel@tonic-gate 	STRUCT_HANDLE(ipc_perm, lperm);
519*7c478bd9Sstevel@tonic-gate 	uid_t uid;
520*7c478bd9Sstevel@tonic-gate 	gid_t gid;
521*7c478bd9Sstevel@tonic-gate 	mode_t mode;
522*7c478bd9Sstevel@tonic-gate 
523*7c478bd9Sstevel@tonic-gate 	ASSERT(IPC_LOCKED(service, kperm));
524*7c478bd9Sstevel@tonic-gate 
525*7c478bd9Sstevel@tonic-gate 	STRUCT_SET_HANDLE(lperm, model, perm);
526*7c478bd9Sstevel@tonic-gate 	uid = STRUCT_FGET(lperm, uid);
527*7c478bd9Sstevel@tonic-gate 	gid = STRUCT_FGET(lperm, gid);
528*7c478bd9Sstevel@tonic-gate 	mode = STRUCT_FGET(lperm, mode);
529*7c478bd9Sstevel@tonic-gate 
530*7c478bd9Sstevel@tonic-gate 	if (secpolicy_ipc_owner(cr, kperm) != 0)
531*7c478bd9Sstevel@tonic-gate 		return (EPERM);
532*7c478bd9Sstevel@tonic-gate 
533*7c478bd9Sstevel@tonic-gate 	if ((uid < 0) || (uid > MAXUID) || (gid < 0) || (gid > MAXUID))
534*7c478bd9Sstevel@tonic-gate 		return (EINVAL);
535*7c478bd9Sstevel@tonic-gate 
536*7c478bd9Sstevel@tonic-gate 	kperm->ipc_uid = uid;
537*7c478bd9Sstevel@tonic-gate 	kperm->ipc_gid = gid;
538*7c478bd9Sstevel@tonic-gate 	kperm->ipc_mode = (mode & 0777) | (kperm->ipc_mode & ~0777);
539*7c478bd9Sstevel@tonic-gate 
540*7c478bd9Sstevel@tonic-gate #ifdef C2_AUDIT
541*7c478bd9Sstevel@tonic-gate 	if (audit_active)
542*7c478bd9Sstevel@tonic-gate 		audit_ipcget(service->ipcs_atype, kperm);
543*7c478bd9Sstevel@tonic-gate #endif
544*7c478bd9Sstevel@tonic-gate 
545*7c478bd9Sstevel@tonic-gate 	return (0);
546*7c478bd9Sstevel@tonic-gate }
547*7c478bd9Sstevel@tonic-gate 
548*7c478bd9Sstevel@tonic-gate void
549*7c478bd9Sstevel@tonic-gate ipcperm_stat(struct ipc_perm *perm, kipc_perm_t *kperm, model_t model)
550*7c478bd9Sstevel@tonic-gate {
551*7c478bd9Sstevel@tonic-gate 	STRUCT_HANDLE(ipc_perm, lperm);
552*7c478bd9Sstevel@tonic-gate 
553*7c478bd9Sstevel@tonic-gate 	STRUCT_SET_HANDLE(lperm, model, perm);
554*7c478bd9Sstevel@tonic-gate 	STRUCT_FSET(lperm, uid, kperm->ipc_uid);
555*7c478bd9Sstevel@tonic-gate 	STRUCT_FSET(lperm, gid, kperm->ipc_gid);
556*7c478bd9Sstevel@tonic-gate 	STRUCT_FSET(lperm, cuid, kperm->ipc_cuid);
557*7c478bd9Sstevel@tonic-gate 	STRUCT_FSET(lperm, cgid, kperm->ipc_cgid);
558*7c478bd9Sstevel@tonic-gate 	STRUCT_FSET(lperm, mode, kperm->ipc_mode);
559*7c478bd9Sstevel@tonic-gate 	STRUCT_FSET(lperm, seq, 0);
560*7c478bd9Sstevel@tonic-gate 	STRUCT_FSET(lperm, key, kperm->ipc_key);
561*7c478bd9Sstevel@tonic-gate }
562*7c478bd9Sstevel@tonic-gate 
563*7c478bd9Sstevel@tonic-gate int
564*7c478bd9Sstevel@tonic-gate ipcperm_set64(ipc_service_t *service, struct cred *cr,
565*7c478bd9Sstevel@tonic-gate     kipc_perm_t *kperm, ipc_perm64_t *perm64)
566*7c478bd9Sstevel@tonic-gate {
567*7c478bd9Sstevel@tonic-gate 	ASSERT(IPC_LOCKED(service, kperm));
568*7c478bd9Sstevel@tonic-gate 
569*7c478bd9Sstevel@tonic-gate 	if (secpolicy_ipc_owner(cr, kperm) != 0)
570*7c478bd9Sstevel@tonic-gate 		return (EPERM);
571*7c478bd9Sstevel@tonic-gate 
572*7c478bd9Sstevel@tonic-gate 	if ((perm64->ipcx_uid < 0) || (perm64->ipcx_uid > MAXUID) ||
573*7c478bd9Sstevel@tonic-gate 	    (perm64->ipcx_gid < 0) || (perm64->ipcx_gid > MAXUID))
574*7c478bd9Sstevel@tonic-gate 		return (EINVAL);
575*7c478bd9Sstevel@tonic-gate 
576*7c478bd9Sstevel@tonic-gate 	kperm->ipc_uid = perm64->ipcx_uid;
577*7c478bd9Sstevel@tonic-gate 	kperm->ipc_gid = perm64->ipcx_gid;
578*7c478bd9Sstevel@tonic-gate 	kperm->ipc_mode = (perm64->ipcx_mode & 0777) |
579*7c478bd9Sstevel@tonic-gate 	    (kperm->ipc_mode & ~0777);
580*7c478bd9Sstevel@tonic-gate 
581*7c478bd9Sstevel@tonic-gate #ifdef C2_AUDIT
582*7c478bd9Sstevel@tonic-gate 	if (audit_active)
583*7c478bd9Sstevel@tonic-gate 		audit_ipcget(service->ipcs_atype, kperm);
584*7c478bd9Sstevel@tonic-gate #endif
585*7c478bd9Sstevel@tonic-gate 
586*7c478bd9Sstevel@tonic-gate 	return (0);
587*7c478bd9Sstevel@tonic-gate }
588*7c478bd9Sstevel@tonic-gate 
589*7c478bd9Sstevel@tonic-gate void
590*7c478bd9Sstevel@tonic-gate ipcperm_stat64(ipc_perm64_t *perm64, kipc_perm_t *kperm)
591*7c478bd9Sstevel@tonic-gate {
592*7c478bd9Sstevel@tonic-gate 	perm64->ipcx_uid = kperm->ipc_uid;
593*7c478bd9Sstevel@tonic-gate 	perm64->ipcx_gid = kperm->ipc_gid;
594*7c478bd9Sstevel@tonic-gate 	perm64->ipcx_cuid = kperm->ipc_cuid;
595*7c478bd9Sstevel@tonic-gate 	perm64->ipcx_cgid = kperm->ipc_cgid;
596*7c478bd9Sstevel@tonic-gate 	perm64->ipcx_mode = kperm->ipc_mode;
597*7c478bd9Sstevel@tonic-gate 	perm64->ipcx_key = kperm->ipc_key;
598*7c478bd9Sstevel@tonic-gate 	perm64->ipcx_projid = kperm->ipc_proj->kpj_id;
599*7c478bd9Sstevel@tonic-gate 	perm64->ipcx_zoneid = kperm->ipc_zoneid;
600*7c478bd9Sstevel@tonic-gate }
601*7c478bd9Sstevel@tonic-gate 
602*7c478bd9Sstevel@tonic-gate 
603*7c478bd9Sstevel@tonic-gate /*
604*7c478bd9Sstevel@tonic-gate  * ipc key comparator.
605*7c478bd9Sstevel@tonic-gate  */
606*7c478bd9Sstevel@tonic-gate static int
607*7c478bd9Sstevel@tonic-gate ipc_key_compar(const void *a, const void *b)
608*7c478bd9Sstevel@tonic-gate {
609*7c478bd9Sstevel@tonic-gate 	kipc_perm_t *aperm = (kipc_perm_t *)a;
610*7c478bd9Sstevel@tonic-gate 	kipc_perm_t *bperm = (kipc_perm_t *)b;
611*7c478bd9Sstevel@tonic-gate 	int ak = aperm->ipc_key;
612*7c478bd9Sstevel@tonic-gate 	int bk = bperm->ipc_key;
613*7c478bd9Sstevel@tonic-gate 	zoneid_t az;
614*7c478bd9Sstevel@tonic-gate 	zoneid_t bz;
615*7c478bd9Sstevel@tonic-gate 
616*7c478bd9Sstevel@tonic-gate 	ASSERT(ak != IPC_PRIVATE);
617*7c478bd9Sstevel@tonic-gate 	ASSERT(bk != IPC_PRIVATE);
618*7c478bd9Sstevel@tonic-gate 
619*7c478bd9Sstevel@tonic-gate 	/*
620*7c478bd9Sstevel@tonic-gate 	 * Compare key first, then zoneid.  This optimizes performance for
621*7c478bd9Sstevel@tonic-gate 	 * systems with only one zone, since the zone checks will only be
622*7c478bd9Sstevel@tonic-gate 	 * made when the keys match.
623*7c478bd9Sstevel@tonic-gate 	 */
624*7c478bd9Sstevel@tonic-gate 	if (ak < bk)
625*7c478bd9Sstevel@tonic-gate 		return (-1);
626*7c478bd9Sstevel@tonic-gate 	if (ak > bk)
627*7c478bd9Sstevel@tonic-gate 		return (1);
628*7c478bd9Sstevel@tonic-gate 
629*7c478bd9Sstevel@tonic-gate 	/* keys match */
630*7c478bd9Sstevel@tonic-gate 	az = aperm->ipc_zoneid;
631*7c478bd9Sstevel@tonic-gate 	bz = bperm->ipc_zoneid;
632*7c478bd9Sstevel@tonic-gate 	if (az < bz)
633*7c478bd9Sstevel@tonic-gate 		return (-1);
634*7c478bd9Sstevel@tonic-gate 	if (az > bz)
635*7c478bd9Sstevel@tonic-gate 		return (1);
636*7c478bd9Sstevel@tonic-gate 	return (0);
637*7c478bd9Sstevel@tonic-gate }
638*7c478bd9Sstevel@tonic-gate 
639*7c478bd9Sstevel@tonic-gate /*
640*7c478bd9Sstevel@tonic-gate  * Create an ipc service.
641*7c478bd9Sstevel@tonic-gate  */
642*7c478bd9Sstevel@tonic-gate ipc_service_t *
643*7c478bd9Sstevel@tonic-gate ipcs_create(const char *name, rctl_hndl_t rctl, size_t size, ipc_func_t *dtor,
644*7c478bd9Sstevel@tonic-gate     ipc_func_t *rmid, int audit_type, size_t rctl_offset)
645*7c478bd9Sstevel@tonic-gate {
646*7c478bd9Sstevel@tonic-gate 	ipc_service_t *result;
647*7c478bd9Sstevel@tonic-gate 
648*7c478bd9Sstevel@tonic-gate 	result = kmem_alloc(sizeof (ipc_service_t), KM_SLEEP);
649*7c478bd9Sstevel@tonic-gate 
650*7c478bd9Sstevel@tonic-gate 	mutex_init(&result->ipcs_lock, NULL, MUTEX_ADAPTIVE, NULL);
651*7c478bd9Sstevel@tonic-gate 	result->ipcs_count = 0;
652*7c478bd9Sstevel@tonic-gate 	avl_create(&result->ipcs_keys, ipc_key_compar, size, 0);
653*7c478bd9Sstevel@tonic-gate 	result->ipcs_tabsz = IPC_IDS_MIN;
654*7c478bd9Sstevel@tonic-gate 	result->ipcs_table =
655*7c478bd9Sstevel@tonic-gate 	    kmem_zalloc(IPC_IDS_MIN * sizeof (ipc_slot_t), KM_SLEEP);
656*7c478bd9Sstevel@tonic-gate 	result->ipcs_ssize = size;
657*7c478bd9Sstevel@tonic-gate 	result->ipcs_ids = id_space_create(name, 0, IPC_IDS_MIN);
658*7c478bd9Sstevel@tonic-gate 	result->ipcs_dtor = dtor;
659*7c478bd9Sstevel@tonic-gate 	result->ipcs_rmid = rmid;
660*7c478bd9Sstevel@tonic-gate 	result->ipcs_rctl = rctl;
661*7c478bd9Sstevel@tonic-gate 	result->ipcs_atype = audit_type;
662*7c478bd9Sstevel@tonic-gate 	ASSERT(rctl_offset < sizeof (kproject_data_t));
663*7c478bd9Sstevel@tonic-gate 	result->ipcs_rctlofs = rctl_offset;
664*7c478bd9Sstevel@tonic-gate 	list_create(&result->ipcs_usedids, sizeof (kipc_perm_t),
665*7c478bd9Sstevel@tonic-gate 	    offsetof(kipc_perm_t, ipc_list));
666*7c478bd9Sstevel@tonic-gate 
667*7c478bd9Sstevel@tonic-gate 	return (result);
668*7c478bd9Sstevel@tonic-gate }
669*7c478bd9Sstevel@tonic-gate 
670*7c478bd9Sstevel@tonic-gate /*
671*7c478bd9Sstevel@tonic-gate  * Destroy an ipc service.
672*7c478bd9Sstevel@tonic-gate  */
673*7c478bd9Sstevel@tonic-gate void
674*7c478bd9Sstevel@tonic-gate ipcs_destroy(ipc_service_t *service)
675*7c478bd9Sstevel@tonic-gate {
676*7c478bd9Sstevel@tonic-gate 	ipc_slot_t *slot, *next;
677*7c478bd9Sstevel@tonic-gate 
678*7c478bd9Sstevel@tonic-gate 	mutex_enter(&service->ipcs_lock);
679*7c478bd9Sstevel@tonic-gate 
680*7c478bd9Sstevel@tonic-gate 	ASSERT(service->ipcs_count == 0);
681*7c478bd9Sstevel@tonic-gate 	avl_destroy(&service->ipcs_keys);
682*7c478bd9Sstevel@tonic-gate 	list_destroy(&service->ipcs_usedids);
683*7c478bd9Sstevel@tonic-gate 	id_space_destroy(service->ipcs_ids);
684*7c478bd9Sstevel@tonic-gate 
685*7c478bd9Sstevel@tonic-gate 	for (slot = service->ipcs_table; slot; slot = next) {
686*7c478bd9Sstevel@tonic-gate 		next = slot[0].ipct_chain;
687*7c478bd9Sstevel@tonic-gate 		kmem_free(slot, service->ipcs_tabsz * sizeof (ipc_slot_t));
688*7c478bd9Sstevel@tonic-gate 		service->ipcs_tabsz >>= 1;
689*7c478bd9Sstevel@tonic-gate 	}
690*7c478bd9Sstevel@tonic-gate 
691*7c478bd9Sstevel@tonic-gate 	mutex_destroy(&service->ipcs_lock);
692*7c478bd9Sstevel@tonic-gate 	kmem_free(service, sizeof (ipc_service_t));
693*7c478bd9Sstevel@tonic-gate }
694*7c478bd9Sstevel@tonic-gate 
695*7c478bd9Sstevel@tonic-gate /*
696*7c478bd9Sstevel@tonic-gate  * Takes the service lock.
697*7c478bd9Sstevel@tonic-gate  */
698*7c478bd9Sstevel@tonic-gate void
699*7c478bd9Sstevel@tonic-gate ipcs_lock(ipc_service_t *service)
700*7c478bd9Sstevel@tonic-gate {
701*7c478bd9Sstevel@tonic-gate 	mutex_enter(&service->ipcs_lock);
702*7c478bd9Sstevel@tonic-gate }
703*7c478bd9Sstevel@tonic-gate 
704*7c478bd9Sstevel@tonic-gate /*
705*7c478bd9Sstevel@tonic-gate  * Releases the service lock.
706*7c478bd9Sstevel@tonic-gate  */
707*7c478bd9Sstevel@tonic-gate void
708*7c478bd9Sstevel@tonic-gate ipcs_unlock(ipc_service_t *service)
709*7c478bd9Sstevel@tonic-gate {
710*7c478bd9Sstevel@tonic-gate 	mutex_exit(&service->ipcs_lock);
711*7c478bd9Sstevel@tonic-gate }
712*7c478bd9Sstevel@tonic-gate 
713*7c478bd9Sstevel@tonic-gate 
714*7c478bd9Sstevel@tonic-gate /*
715*7c478bd9Sstevel@tonic-gate  * Locks the specified ID.  Returns the ID's ID table index.
716*7c478bd9Sstevel@tonic-gate  */
717*7c478bd9Sstevel@tonic-gate static int
718*7c478bd9Sstevel@tonic-gate ipc_lock_internal(ipc_service_t *service, uint_t id)
719*7c478bd9Sstevel@tonic-gate {
720*7c478bd9Sstevel@tonic-gate 	uint_t	tabsz;
721*7c478bd9Sstevel@tonic-gate 	uint_t	index;
722*7c478bd9Sstevel@tonic-gate 	kmutex_t *mutex;
723*7c478bd9Sstevel@tonic-gate 
724*7c478bd9Sstevel@tonic-gate 	for (;;) {
725*7c478bd9Sstevel@tonic-gate 		tabsz = service->ipcs_tabsz;
726*7c478bd9Sstevel@tonic-gate 		membar_consumer();
727*7c478bd9Sstevel@tonic-gate 		index = id & (tabsz - 1);
728*7c478bd9Sstevel@tonic-gate 		mutex = &service->ipcs_table[index].ipct_lock;
729*7c478bd9Sstevel@tonic-gate 		mutex_enter(mutex);
730*7c478bd9Sstevel@tonic-gate 		if (tabsz == service->ipcs_tabsz)
731*7c478bd9Sstevel@tonic-gate 			break;
732*7c478bd9Sstevel@tonic-gate 		mutex_exit(mutex);
733*7c478bd9Sstevel@tonic-gate 	}
734*7c478bd9Sstevel@tonic-gate 
735*7c478bd9Sstevel@tonic-gate 	return (index);
736*7c478bd9Sstevel@tonic-gate }
737*7c478bd9Sstevel@tonic-gate 
738*7c478bd9Sstevel@tonic-gate /*
739*7c478bd9Sstevel@tonic-gate  * Locks the specified ID.  Returns a pointer to the ID's lock.
740*7c478bd9Sstevel@tonic-gate  */
741*7c478bd9Sstevel@tonic-gate kmutex_t *
742*7c478bd9Sstevel@tonic-gate ipc_lock(ipc_service_t *service, int id)
743*7c478bd9Sstevel@tonic-gate {
744*7c478bd9Sstevel@tonic-gate 	uint_t index;
745*7c478bd9Sstevel@tonic-gate 
746*7c478bd9Sstevel@tonic-gate 	/*
747*7c478bd9Sstevel@tonic-gate 	 * These assertions don't reflect requirements of the code
748*7c478bd9Sstevel@tonic-gate 	 * which follows, but they should never fail nonetheless.
749*7c478bd9Sstevel@tonic-gate 	 */
750*7c478bd9Sstevel@tonic-gate 	ASSERT(id >= 0);
751*7c478bd9Sstevel@tonic-gate 	ASSERT(IPC_INDEX(id) < service->ipcs_tabsz);
752*7c478bd9Sstevel@tonic-gate 	index = ipc_lock_internal(service, id);
753*7c478bd9Sstevel@tonic-gate 
754*7c478bd9Sstevel@tonic-gate 	return (&service->ipcs_table[index].ipct_lock);
755*7c478bd9Sstevel@tonic-gate }
756*7c478bd9Sstevel@tonic-gate 
757*7c478bd9Sstevel@tonic-gate /*
758*7c478bd9Sstevel@tonic-gate  * Checks to see if the held lock provided is the current lock for the
759*7c478bd9Sstevel@tonic-gate  * specified id.  If so, we return it instead of dropping it and
760*7c478bd9Sstevel@tonic-gate  * returning the result of ipc_lock.  This is intended to speed up cv
761*7c478bd9Sstevel@tonic-gate  * wakeups where we are left holding a lock which could be stale, but
762*7c478bd9Sstevel@tonic-gate  * probably isn't.
763*7c478bd9Sstevel@tonic-gate  */
764*7c478bd9Sstevel@tonic-gate kmutex_t *
765*7c478bd9Sstevel@tonic-gate ipc_relock(ipc_service_t *service, int id, kmutex_t *lock)
766*7c478bd9Sstevel@tonic-gate {
767*7c478bd9Sstevel@tonic-gate 	ASSERT(id >= 0);
768*7c478bd9Sstevel@tonic-gate 	ASSERT(IPC_INDEX(id) < service->ipcs_tabsz);
769*7c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(lock));
770*7c478bd9Sstevel@tonic-gate 
771*7c478bd9Sstevel@tonic-gate 	if (&service->ipcs_table[IPC_INDEX(id)].ipct_lock == lock)
772*7c478bd9Sstevel@tonic-gate 		return (lock);
773*7c478bd9Sstevel@tonic-gate 
774*7c478bd9Sstevel@tonic-gate 	mutex_exit(lock);
775*7c478bd9Sstevel@tonic-gate 	return (ipc_lock(service, id));
776*7c478bd9Sstevel@tonic-gate }
777*7c478bd9Sstevel@tonic-gate 
778*7c478bd9Sstevel@tonic-gate /*
779*7c478bd9Sstevel@tonic-gate  * Performs an ID lookup.  If the ID doesn't exist or has been removed,
780*7c478bd9Sstevel@tonic-gate  * or isn't visible to the caller (because of zones), NULL is returned.
781*7c478bd9Sstevel@tonic-gate  * Otherwise, a pointer to the ID's perm structure and held ID lock are
782*7c478bd9Sstevel@tonic-gate  * returned.
783*7c478bd9Sstevel@tonic-gate  */
784*7c478bd9Sstevel@tonic-gate kmutex_t *
785*7c478bd9Sstevel@tonic-gate ipc_lookup(ipc_service_t *service, int id, kipc_perm_t **perm)
786*7c478bd9Sstevel@tonic-gate {
787*7c478bd9Sstevel@tonic-gate 	kipc_perm_t *result;
788*7c478bd9Sstevel@tonic-gate 	uint_t index;
789*7c478bd9Sstevel@tonic-gate 
790*7c478bd9Sstevel@tonic-gate 	/*
791*7c478bd9Sstevel@tonic-gate 	 * There is no need to check to see if id is in-range (i.e.
792*7c478bd9Sstevel@tonic-gate 	 * positive and fits into the table).  If it is out-of-range,
793*7c478bd9Sstevel@tonic-gate 	 * the id simply won't match the object's.
794*7c478bd9Sstevel@tonic-gate 	 */
795*7c478bd9Sstevel@tonic-gate 
796*7c478bd9Sstevel@tonic-gate 	index = ipc_lock_internal(service, id);
797*7c478bd9Sstevel@tonic-gate 	result = service->ipcs_table[index].ipct_data;
798*7c478bd9Sstevel@tonic-gate 	if (result == NULL || result->ipc_id != (uint_t)id ||
799*7c478bd9Sstevel@tonic-gate 	    !HASZONEACCESS(curproc, result->ipc_zoneid)) {
800*7c478bd9Sstevel@tonic-gate 		mutex_exit(&service->ipcs_table[index].ipct_lock);
801*7c478bd9Sstevel@tonic-gate 		return (NULL);
802*7c478bd9Sstevel@tonic-gate 	}
803*7c478bd9Sstevel@tonic-gate 
804*7c478bd9Sstevel@tonic-gate 	ASSERT(IPC_SEQ(id) == service->ipcs_table[index].ipct_seq);
805*7c478bd9Sstevel@tonic-gate 
806*7c478bd9Sstevel@tonic-gate 	*perm = result;
807*7c478bd9Sstevel@tonic-gate #ifdef C2_AUDIT
808*7c478bd9Sstevel@tonic-gate 	if (audit_active)
809*7c478bd9Sstevel@tonic-gate 		audit_ipc(service->ipcs_atype, id, result);
810*7c478bd9Sstevel@tonic-gate #endif
811*7c478bd9Sstevel@tonic-gate 
812*7c478bd9Sstevel@tonic-gate 	return (&service->ipcs_table[index].ipct_lock);
813*7c478bd9Sstevel@tonic-gate }
814*7c478bd9Sstevel@tonic-gate 
815*7c478bd9Sstevel@tonic-gate /*
816*7c478bd9Sstevel@tonic-gate  * Increase the reference count on an ID.
817*7c478bd9Sstevel@tonic-gate  */
818*7c478bd9Sstevel@tonic-gate /*ARGSUSED*/
819*7c478bd9Sstevel@tonic-gate void
820*7c478bd9Sstevel@tonic-gate ipc_hold(ipc_service_t *s, kipc_perm_t *perm)
821*7c478bd9Sstevel@tonic-gate {
822*7c478bd9Sstevel@tonic-gate 	ASSERT(IPC_INDEX(perm->ipc_id) < s->ipcs_tabsz);
823*7c478bd9Sstevel@tonic-gate 	ASSERT(IPC_LOCKED(s, perm));
824*7c478bd9Sstevel@tonic-gate 	perm->ipc_ref++;
825*7c478bd9Sstevel@tonic-gate }
826*7c478bd9Sstevel@tonic-gate 
827*7c478bd9Sstevel@tonic-gate /*
828*7c478bd9Sstevel@tonic-gate  * Decrease the reference count on an ID and drops the ID's lock.
829*7c478bd9Sstevel@tonic-gate  * Destroys the ID if the new reference count is zero.
830*7c478bd9Sstevel@tonic-gate  */
831*7c478bd9Sstevel@tonic-gate void
832*7c478bd9Sstevel@tonic-gate ipc_rele(ipc_service_t *s, kipc_perm_t *perm)
833*7c478bd9Sstevel@tonic-gate {
834*7c478bd9Sstevel@tonic-gate 	int nref;
835*7c478bd9Sstevel@tonic-gate 
836*7c478bd9Sstevel@tonic-gate 	ASSERT(IPC_INDEX(perm->ipc_id) < s->ipcs_tabsz);
837*7c478bd9Sstevel@tonic-gate 	ASSERT(IPC_LOCKED(s, perm));
838*7c478bd9Sstevel@tonic-gate 	ASSERT(perm->ipc_ref > 0);
839*7c478bd9Sstevel@tonic-gate 
840*7c478bd9Sstevel@tonic-gate 	nref = --perm->ipc_ref;
841*7c478bd9Sstevel@tonic-gate 	mutex_exit(&s->ipcs_table[IPC_INDEX(perm->ipc_id)].ipct_lock);
842*7c478bd9Sstevel@tonic-gate 
843*7c478bd9Sstevel@tonic-gate 	if (nref == 0) {
844*7c478bd9Sstevel@tonic-gate 		ASSERT(IPC_FREE(perm));		/* ipc_rmid clears IPC_ALLOC */
845*7c478bd9Sstevel@tonic-gate 		s->ipcs_dtor(perm);
846*7c478bd9Sstevel@tonic-gate 		project_rele(perm->ipc_proj);
847*7c478bd9Sstevel@tonic-gate 		kmem_free(perm, s->ipcs_ssize);
848*7c478bd9Sstevel@tonic-gate 	}
849*7c478bd9Sstevel@tonic-gate }
850*7c478bd9Sstevel@tonic-gate 
851*7c478bd9Sstevel@tonic-gate /*
852*7c478bd9Sstevel@tonic-gate  * Decrease the reference count on an ID, but don't drop the ID lock.
853*7c478bd9Sstevel@tonic-gate  * Used in cases where one thread needs to remove many references (on
854*7c478bd9Sstevel@tonic-gate  * behalf of other parties).
855*7c478bd9Sstevel@tonic-gate  */
856*7c478bd9Sstevel@tonic-gate void
857*7c478bd9Sstevel@tonic-gate ipc_rele_locked(ipc_service_t *s, kipc_perm_t *perm)
858*7c478bd9Sstevel@tonic-gate {
859*7c478bd9Sstevel@tonic-gate 	ASSERT(perm->ipc_ref > 1);
860*7c478bd9Sstevel@tonic-gate 	ASSERT(IPC_INDEX(perm->ipc_id) < s->ipcs_tabsz);
861*7c478bd9Sstevel@tonic-gate 	ASSERT(IPC_LOCKED(s, perm));
862*7c478bd9Sstevel@tonic-gate 
863*7c478bd9Sstevel@tonic-gate 	perm->ipc_ref--;
864*7c478bd9Sstevel@tonic-gate }
865*7c478bd9Sstevel@tonic-gate 
866*7c478bd9Sstevel@tonic-gate 
867*7c478bd9Sstevel@tonic-gate /*
868*7c478bd9Sstevel@tonic-gate  * Internal function to grow the service ID table.
869*7c478bd9Sstevel@tonic-gate  */
870*7c478bd9Sstevel@tonic-gate static int
871*7c478bd9Sstevel@tonic-gate ipc_grow(ipc_service_t *service)
872*7c478bd9Sstevel@tonic-gate {
873*7c478bd9Sstevel@tonic-gate 	ipc_slot_t *new, *old;
874*7c478bd9Sstevel@tonic-gate 	int i, oldsize, newsize;
875*7c478bd9Sstevel@tonic-gate 
876*7c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&service->ipcs_lock));
877*7c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_NOT_HELD(&curproc->p_lock));
878*7c478bd9Sstevel@tonic-gate 
879*7c478bd9Sstevel@tonic-gate 	if (service->ipcs_tabsz == IPC_IDS_MAX)
880*7c478bd9Sstevel@tonic-gate 		return (ENOSPC);
881*7c478bd9Sstevel@tonic-gate 
882*7c478bd9Sstevel@tonic-gate 	oldsize = service->ipcs_tabsz;
883*7c478bd9Sstevel@tonic-gate 	newsize = oldsize << 1;
884*7c478bd9Sstevel@tonic-gate 	new = kmem_zalloc(newsize * sizeof (ipc_slot_t), KM_NOSLEEP);
885*7c478bd9Sstevel@tonic-gate 	if (new == NULL)
886*7c478bd9Sstevel@tonic-gate 		return (ENOSPC);
887*7c478bd9Sstevel@tonic-gate 
888*7c478bd9Sstevel@tonic-gate 	old = service->ipcs_table;
889*7c478bd9Sstevel@tonic-gate 	for (i = 0; i < oldsize; i++) {
890*7c478bd9Sstevel@tonic-gate 		mutex_enter(&old[i].ipct_lock);
891*7c478bd9Sstevel@tonic-gate 		mutex_enter(&new[i].ipct_lock);
892*7c478bd9Sstevel@tonic-gate 
893*7c478bd9Sstevel@tonic-gate 		new[i].ipct_seq = old[i].ipct_seq;
894*7c478bd9Sstevel@tonic-gate 		new[i].ipct_data = old[i].ipct_data;
895*7c478bd9Sstevel@tonic-gate 		old[i].ipct_data = NULL;
896*7c478bd9Sstevel@tonic-gate 	}
897*7c478bd9Sstevel@tonic-gate 
898*7c478bd9Sstevel@tonic-gate 	new[0].ipct_chain = old;
899*7c478bd9Sstevel@tonic-gate 	service->ipcs_table = new;
900*7c478bd9Sstevel@tonic-gate 	membar_producer();
901*7c478bd9Sstevel@tonic-gate 	service->ipcs_tabsz = newsize;
902*7c478bd9Sstevel@tonic-gate 
903*7c478bd9Sstevel@tonic-gate 	for (i = 0; i < oldsize; i++) {
904*7c478bd9Sstevel@tonic-gate 		mutex_exit(&old[i].ipct_lock);
905*7c478bd9Sstevel@tonic-gate 		mutex_exit(&new[i].ipct_lock);
906*7c478bd9Sstevel@tonic-gate 	}
907*7c478bd9Sstevel@tonic-gate 
908*7c478bd9Sstevel@tonic-gate 	id_space_extend(service->ipcs_ids, oldsize, service->ipcs_tabsz);
909*7c478bd9Sstevel@tonic-gate 
910*7c478bd9Sstevel@tonic-gate 	return (0);
911*7c478bd9Sstevel@tonic-gate }
912*7c478bd9Sstevel@tonic-gate 
913*7c478bd9Sstevel@tonic-gate 
914*7c478bd9Sstevel@tonic-gate static int
915*7c478bd9Sstevel@tonic-gate ipc_keylookup(ipc_service_t *service, key_t key, int flag, kipc_perm_t **permp)
916*7c478bd9Sstevel@tonic-gate {
917*7c478bd9Sstevel@tonic-gate 	kipc_perm_t *perm = NULL;
918*7c478bd9Sstevel@tonic-gate 	avl_index_t where;
919*7c478bd9Sstevel@tonic-gate 	kipc_perm_t template;
920*7c478bd9Sstevel@tonic-gate 
921*7c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&service->ipcs_lock));
922*7c478bd9Sstevel@tonic-gate 
923*7c478bd9Sstevel@tonic-gate 	template.ipc_key = key;
924*7c478bd9Sstevel@tonic-gate 	template.ipc_zoneid = getzoneid();
925*7c478bd9Sstevel@tonic-gate 	if (perm = avl_find(&service->ipcs_keys, &template, &where)) {
926*7c478bd9Sstevel@tonic-gate 		ASSERT(!IPC_FREE(perm));
927*7c478bd9Sstevel@tonic-gate 		if ((flag & (IPC_CREAT | IPC_EXCL)) == (IPC_CREAT | IPC_EXCL))
928*7c478bd9Sstevel@tonic-gate 			return (EEXIST);
929*7c478bd9Sstevel@tonic-gate 		if ((flag & 0777) & ~perm->ipc_mode) {
930*7c478bd9Sstevel@tonic-gate #ifdef C2_AUDIT
931*7c478bd9Sstevel@tonic-gate 			if (audit_active)
932*7c478bd9Sstevel@tonic-gate 				audit_ipcget(NULL, (void *)perm);
933*7c478bd9Sstevel@tonic-gate #endif
934*7c478bd9Sstevel@tonic-gate 			return (EACCES);
935*7c478bd9Sstevel@tonic-gate 		}
936*7c478bd9Sstevel@tonic-gate 		*permp = perm;
937*7c478bd9Sstevel@tonic-gate 		return (0);
938*7c478bd9Sstevel@tonic-gate 	} else if (flag & IPC_CREAT) {
939*7c478bd9Sstevel@tonic-gate 		*permp = NULL;
940*7c478bd9Sstevel@tonic-gate 		return (0);
941*7c478bd9Sstevel@tonic-gate 	}
942*7c478bd9Sstevel@tonic-gate 	return (ENOENT);
943*7c478bd9Sstevel@tonic-gate }
944*7c478bd9Sstevel@tonic-gate 
945*7c478bd9Sstevel@tonic-gate static int
946*7c478bd9Sstevel@tonic-gate ipc_alloc_test(ipc_service_t *service, proc_t *pp)
947*7c478bd9Sstevel@tonic-gate {
948*7c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&service->ipcs_lock));
949*7c478bd9Sstevel@tonic-gate 
950*7c478bd9Sstevel@tonic-gate 	/*
951*7c478bd9Sstevel@tonic-gate 	 * Resizing the table first would result in a cleaner code
952*7c478bd9Sstevel@tonic-gate 	 * path, but would also allow a user to (permanently) double
953*7c478bd9Sstevel@tonic-gate 	 * the id table size in cases where the allocation would be
954*7c478bd9Sstevel@tonic-gate 	 * denied.  Hence we test the rctl first.
955*7c478bd9Sstevel@tonic-gate 	 */
956*7c478bd9Sstevel@tonic-gate retry:
957*7c478bd9Sstevel@tonic-gate 	mutex_enter(&pp->p_lock);
958*7c478bd9Sstevel@tonic-gate 	if (rctl_test(service->ipcs_rctl, pp->p_task->tk_proj->kpj_rctls, pp,
959*7c478bd9Sstevel@tonic-gate 	    1, RCA_SAFE) & RCT_DENY) {
960*7c478bd9Sstevel@tonic-gate 		mutex_exit(&pp->p_lock);
961*7c478bd9Sstevel@tonic-gate 		return (ENOSPC);
962*7c478bd9Sstevel@tonic-gate 	}
963*7c478bd9Sstevel@tonic-gate 
964*7c478bd9Sstevel@tonic-gate 	if (service->ipcs_count == service->ipcs_tabsz) {
965*7c478bd9Sstevel@tonic-gate 		int error;
966*7c478bd9Sstevel@tonic-gate 
967*7c478bd9Sstevel@tonic-gate 		mutex_exit(&pp->p_lock);
968*7c478bd9Sstevel@tonic-gate 		if (error = ipc_grow(service))
969*7c478bd9Sstevel@tonic-gate 			return (error);
970*7c478bd9Sstevel@tonic-gate 		goto retry;
971*7c478bd9Sstevel@tonic-gate 	}
972*7c478bd9Sstevel@tonic-gate 
973*7c478bd9Sstevel@tonic-gate 	return (0);
974*7c478bd9Sstevel@tonic-gate }
975*7c478bd9Sstevel@tonic-gate 
976*7c478bd9Sstevel@tonic-gate /*
977*7c478bd9Sstevel@tonic-gate  * Given a key, search for or create the associated identifier.
978*7c478bd9Sstevel@tonic-gate  *
979*7c478bd9Sstevel@tonic-gate  * If IPC_CREAT is specified and the key isn't found, or if the key is
980*7c478bd9Sstevel@tonic-gate  * equal to IPC_PRIVATE, we return 0 and place a pointer to a newly
981*7c478bd9Sstevel@tonic-gate  * allocated object structure in permp.  A pointer to the held service
982*7c478bd9Sstevel@tonic-gate  * lock is placed in lockp.  ipc_mode's IPC_ALLOC bit is clear.
983*7c478bd9Sstevel@tonic-gate  *
984*7c478bd9Sstevel@tonic-gate  * If the key is found and no error conditions arise, we return 0 and
985*7c478bd9Sstevel@tonic-gate  * place a pointer to the existing object structure in permp.  A
986*7c478bd9Sstevel@tonic-gate  * pointer to the held ID lock is placed in lockp.  ipc_mode's
987*7c478bd9Sstevel@tonic-gate  * IPC_ALLOC bit is set.
988*7c478bd9Sstevel@tonic-gate  *
989*7c478bd9Sstevel@tonic-gate  * Otherwise, a non-zero errno value is returned.
990*7c478bd9Sstevel@tonic-gate  */
991*7c478bd9Sstevel@tonic-gate int
992*7c478bd9Sstevel@tonic-gate ipc_get(ipc_service_t *service, key_t key, int flag, kipc_perm_t **permp,
993*7c478bd9Sstevel@tonic-gate     kmutex_t **lockp)
994*7c478bd9Sstevel@tonic-gate {
995*7c478bd9Sstevel@tonic-gate 	kipc_perm_t	*perm = NULL;
996*7c478bd9Sstevel@tonic-gate 	proc_t		*pp = curproc;
997*7c478bd9Sstevel@tonic-gate 	int		error, index;
998*7c478bd9Sstevel@tonic-gate 	cred_t		*cr = CRED();
999*7c478bd9Sstevel@tonic-gate 
1000*7c478bd9Sstevel@tonic-gate 	if (key != IPC_PRIVATE) {
1001*7c478bd9Sstevel@tonic-gate 
1002*7c478bd9Sstevel@tonic-gate 		mutex_enter(&service->ipcs_lock);
1003*7c478bd9Sstevel@tonic-gate 		error = ipc_keylookup(service, key, flag, &perm);
1004*7c478bd9Sstevel@tonic-gate 		if (perm != NULL)
1005*7c478bd9Sstevel@tonic-gate 			index = ipc_lock_internal(service, perm->ipc_id);
1006*7c478bd9Sstevel@tonic-gate 		mutex_exit(&service->ipcs_lock);
1007*7c478bd9Sstevel@tonic-gate 
1008*7c478bd9Sstevel@tonic-gate 		if (error) {
1009*7c478bd9Sstevel@tonic-gate 			ASSERT(perm == NULL);
1010*7c478bd9Sstevel@tonic-gate 			return (error);
1011*7c478bd9Sstevel@tonic-gate 		}
1012*7c478bd9Sstevel@tonic-gate 
1013*7c478bd9Sstevel@tonic-gate 		if (perm) {
1014*7c478bd9Sstevel@tonic-gate 			ASSERT(!IPC_FREE(perm));
1015*7c478bd9Sstevel@tonic-gate 			*permp = perm;
1016*7c478bd9Sstevel@tonic-gate 			*lockp = &service->ipcs_table[index].ipct_lock;
1017*7c478bd9Sstevel@tonic-gate 			return (0);
1018*7c478bd9Sstevel@tonic-gate 		}
1019*7c478bd9Sstevel@tonic-gate 
1020*7c478bd9Sstevel@tonic-gate 		/* Key not found; fall through */
1021*7c478bd9Sstevel@tonic-gate 	}
1022*7c478bd9Sstevel@tonic-gate 
1023*7c478bd9Sstevel@tonic-gate 	perm = kmem_zalloc(service->ipcs_ssize, KM_SLEEP);
1024*7c478bd9Sstevel@tonic-gate 
1025*7c478bd9Sstevel@tonic-gate 	mutex_enter(&service->ipcs_lock);
1026*7c478bd9Sstevel@tonic-gate 	if (error = ipc_alloc_test(service, pp)) {
1027*7c478bd9Sstevel@tonic-gate 		mutex_exit(&service->ipcs_lock);
1028*7c478bd9Sstevel@tonic-gate 		kmem_free(perm, service->ipcs_ssize);
1029*7c478bd9Sstevel@tonic-gate 		return (error);
1030*7c478bd9Sstevel@tonic-gate 	}
1031*7c478bd9Sstevel@tonic-gate 
1032*7c478bd9Sstevel@tonic-gate 	perm->ipc_cuid = perm->ipc_uid = crgetuid(cr);
1033*7c478bd9Sstevel@tonic-gate 	perm->ipc_cgid = perm->ipc_gid = crgetgid(cr);
1034*7c478bd9Sstevel@tonic-gate 	perm->ipc_zoneid = getzoneid();
1035*7c478bd9Sstevel@tonic-gate 	perm->ipc_mode = flag & 0777;
1036*7c478bd9Sstevel@tonic-gate 	perm->ipc_key = key;
1037*7c478bd9Sstevel@tonic-gate 	perm->ipc_ref = 1;
1038*7c478bd9Sstevel@tonic-gate 	perm->ipc_id = IPC_ID_INVAL;
1039*7c478bd9Sstevel@tonic-gate 	*permp = perm;
1040*7c478bd9Sstevel@tonic-gate 	*lockp = &service->ipcs_lock;
1041*7c478bd9Sstevel@tonic-gate 
1042*7c478bd9Sstevel@tonic-gate 	return (0);
1043*7c478bd9Sstevel@tonic-gate }
1044*7c478bd9Sstevel@tonic-gate 
1045*7c478bd9Sstevel@tonic-gate /*
1046*7c478bd9Sstevel@tonic-gate  * Attempts to add the a newly created ID to the global namespace.  If
1047*7c478bd9Sstevel@tonic-gate  * creating it would cause an error, we return the error.  If there is
1048*7c478bd9Sstevel@tonic-gate  * the possibility that we could obtain the existing ID and return it
1049*7c478bd9Sstevel@tonic-gate  * to the user, we return EAGAIN.  Otherwise, we return 0 with p_lock
1050*7c478bd9Sstevel@tonic-gate  * and the service lock held.
1051*7c478bd9Sstevel@tonic-gate  *
1052*7c478bd9Sstevel@tonic-gate  * Since this should be only called after all initialization has been
1053*7c478bd9Sstevel@tonic-gate  * completed, on failure we automatically invoke the destructor for the
1054*7c478bd9Sstevel@tonic-gate  * object and deallocate the memory associated with it.
1055*7c478bd9Sstevel@tonic-gate  */
1056*7c478bd9Sstevel@tonic-gate int
1057*7c478bd9Sstevel@tonic-gate ipc_commit_begin(ipc_service_t *service, key_t key, int flag,
1058*7c478bd9Sstevel@tonic-gate     kipc_perm_t *newperm)
1059*7c478bd9Sstevel@tonic-gate {
1060*7c478bd9Sstevel@tonic-gate 	kipc_perm_t *perm;
1061*7c478bd9Sstevel@tonic-gate 	int error;
1062*7c478bd9Sstevel@tonic-gate 	proc_t *pp = curproc;
1063*7c478bd9Sstevel@tonic-gate 
1064*7c478bd9Sstevel@tonic-gate 	ASSERT(newperm->ipc_ref == 1);
1065*7c478bd9Sstevel@tonic-gate 	ASSERT(IPC_FREE(newperm));
1066*7c478bd9Sstevel@tonic-gate 
1067*7c478bd9Sstevel@tonic-gate 	mutex_enter(&service->ipcs_lock);
1068*7c478bd9Sstevel@tonic-gate 	/*
1069*7c478bd9Sstevel@tonic-gate 	 * Ensure that no-one has raced with us and created the key.
1070*7c478bd9Sstevel@tonic-gate 	 */
1071*7c478bd9Sstevel@tonic-gate 	if ((key != IPC_PRIVATE) &&
1072*7c478bd9Sstevel@tonic-gate 	    (((error = ipc_keylookup(service, key, flag, &perm)) != 0) ||
1073*7c478bd9Sstevel@tonic-gate 	    (perm != NULL))) {
1074*7c478bd9Sstevel@tonic-gate 		error = error ? error : EAGAIN;
1075*7c478bd9Sstevel@tonic-gate 		goto errout;
1076*7c478bd9Sstevel@tonic-gate 	}
1077*7c478bd9Sstevel@tonic-gate 
1078*7c478bd9Sstevel@tonic-gate 	/*
1079*7c478bd9Sstevel@tonic-gate 	 * Ensure that no-one has raced with us and used the last of
1080*7c478bd9Sstevel@tonic-gate 	 * the permissible ids, or the last of the free spaces in the
1081*7c478bd9Sstevel@tonic-gate 	 * id table.
1082*7c478bd9Sstevel@tonic-gate 	 */
1083*7c478bd9Sstevel@tonic-gate 	if (error = ipc_alloc_test(service, pp))
1084*7c478bd9Sstevel@tonic-gate 		goto errout;
1085*7c478bd9Sstevel@tonic-gate 
1086*7c478bd9Sstevel@tonic-gate 	/*
1087*7c478bd9Sstevel@tonic-gate 	 * Set ipc_proj so ipc_cleanup cleans up necessary state.
1088*7c478bd9Sstevel@tonic-gate 	 */
1089*7c478bd9Sstevel@tonic-gate 	newperm->ipc_proj = pp->p_task->tk_proj;
1090*7c478bd9Sstevel@tonic-gate 
1091*7c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&service->ipcs_lock));
1092*7c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&pp->p_lock));
1093*7c478bd9Sstevel@tonic-gate 
1094*7c478bd9Sstevel@tonic-gate 	return (0);
1095*7c478bd9Sstevel@tonic-gate errout:
1096*7c478bd9Sstevel@tonic-gate 	mutex_exit(&service->ipcs_lock);
1097*7c478bd9Sstevel@tonic-gate 	service->ipcs_dtor(newperm);
1098*7c478bd9Sstevel@tonic-gate 	kmem_free(newperm, service->ipcs_ssize);
1099*7c478bd9Sstevel@tonic-gate 	return (error);
1100*7c478bd9Sstevel@tonic-gate }
1101*7c478bd9Sstevel@tonic-gate 
1102*7c478bd9Sstevel@tonic-gate /*
1103*7c478bd9Sstevel@tonic-gate  * Commit the ID allocation transaction.  Called with p_lock and the
1104*7c478bd9Sstevel@tonic-gate  * service lock held, both of which are dropped.  Returns the held ID
1105*7c478bd9Sstevel@tonic-gate  * lock so the caller can extract the ID and perform ipcget auditing.
1106*7c478bd9Sstevel@tonic-gate  */
1107*7c478bd9Sstevel@tonic-gate kmutex_t *
1108*7c478bd9Sstevel@tonic-gate ipc_commit_end(ipc_service_t *service, kipc_perm_t *perm)
1109*7c478bd9Sstevel@tonic-gate {
1110*7c478bd9Sstevel@tonic-gate 	ipc_slot_t *slot;
1111*7c478bd9Sstevel@tonic-gate 	avl_index_t where;
1112*7c478bd9Sstevel@tonic-gate 	int index;
1113*7c478bd9Sstevel@tonic-gate 	void *loc;
1114*7c478bd9Sstevel@tonic-gate 
1115*7c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&service->ipcs_lock));
1116*7c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&curproc->p_lock));
1117*7c478bd9Sstevel@tonic-gate 
1118*7c478bd9Sstevel@tonic-gate 	(void) project_hold(perm->ipc_proj);
1119*7c478bd9Sstevel@tonic-gate 	mutex_exit(&curproc->p_lock);
1120*7c478bd9Sstevel@tonic-gate 
1121*7c478bd9Sstevel@tonic-gate 	/*
1122*7c478bd9Sstevel@tonic-gate 	 * Pick out our slot.
1123*7c478bd9Sstevel@tonic-gate 	 */
1124*7c478bd9Sstevel@tonic-gate 	service->ipcs_count++;
1125*7c478bd9Sstevel@tonic-gate 	index = id_alloc(service->ipcs_ids);
1126*7c478bd9Sstevel@tonic-gate 	ASSERT(index < service->ipcs_tabsz);
1127*7c478bd9Sstevel@tonic-gate 	slot = &service->ipcs_table[index];
1128*7c478bd9Sstevel@tonic-gate 	mutex_enter(&slot->ipct_lock);
1129*7c478bd9Sstevel@tonic-gate 	ASSERT(slot->ipct_data == NULL);
1130*7c478bd9Sstevel@tonic-gate 
1131*7c478bd9Sstevel@tonic-gate 	/*
1132*7c478bd9Sstevel@tonic-gate 	 * Update the perm structure.
1133*7c478bd9Sstevel@tonic-gate 	 */
1134*7c478bd9Sstevel@tonic-gate 	perm->ipc_mode |= IPC_ALLOC;
1135*7c478bd9Sstevel@tonic-gate 	perm->ipc_id = (slot->ipct_seq << IPC_SEQ_SHIFT) | index;
1136*7c478bd9Sstevel@tonic-gate 
1137*7c478bd9Sstevel@tonic-gate 	/*
1138*7c478bd9Sstevel@tonic-gate 	 * Push into global visibility.
1139*7c478bd9Sstevel@tonic-gate 	 */
1140*7c478bd9Sstevel@tonic-gate 	slot->ipct_data = perm;
1141*7c478bd9Sstevel@tonic-gate 	if (perm->ipc_key != IPC_PRIVATE) {
1142*7c478bd9Sstevel@tonic-gate 		loc = avl_find(&service->ipcs_keys, perm, &where);
1143*7c478bd9Sstevel@tonic-gate 		ASSERT(loc == NULL);
1144*7c478bd9Sstevel@tonic-gate 		avl_insert(&service->ipcs_keys, perm, where);
1145*7c478bd9Sstevel@tonic-gate 	}
1146*7c478bd9Sstevel@tonic-gate 	list_insert_head(&service->ipcs_usedids, perm);
1147*7c478bd9Sstevel@tonic-gate 
1148*7c478bd9Sstevel@tonic-gate 	/*
1149*7c478bd9Sstevel@tonic-gate 	 * Update resource consumption.
1150*7c478bd9Sstevel@tonic-gate 	 */
1151*7c478bd9Sstevel@tonic-gate 	IPC_USAGE(perm, service) += 1;
1152*7c478bd9Sstevel@tonic-gate 
1153*7c478bd9Sstevel@tonic-gate 	mutex_exit(&service->ipcs_lock);
1154*7c478bd9Sstevel@tonic-gate 	return (&slot->ipct_lock);
1155*7c478bd9Sstevel@tonic-gate }
1156*7c478bd9Sstevel@tonic-gate 
1157*7c478bd9Sstevel@tonic-gate /*
1158*7c478bd9Sstevel@tonic-gate  * Clean up function, in case the allocation fails.  If called between
1159*7c478bd9Sstevel@tonic-gate  * ipc_lookup and ipc_commit_begin, perm->ipc_proj will be 0 and we
1160*7c478bd9Sstevel@tonic-gate  * merely free the perm structure.  If called after ipc_commit_begin,
1161*7c478bd9Sstevel@tonic-gate  * we also drop locks and call the ID's destructor.
1162*7c478bd9Sstevel@tonic-gate  */
1163*7c478bd9Sstevel@tonic-gate void
1164*7c478bd9Sstevel@tonic-gate ipc_cleanup(ipc_service_t *service, kipc_perm_t *perm)
1165*7c478bd9Sstevel@tonic-gate {
1166*7c478bd9Sstevel@tonic-gate 	ASSERT(IPC_FREE(perm));
1167*7c478bd9Sstevel@tonic-gate 	if (perm->ipc_proj) {
1168*7c478bd9Sstevel@tonic-gate 		mutex_exit(&curproc->p_lock);
1169*7c478bd9Sstevel@tonic-gate 		mutex_exit(&service->ipcs_lock);
1170*7c478bd9Sstevel@tonic-gate 		service->ipcs_dtor(perm);
1171*7c478bd9Sstevel@tonic-gate 	}
1172*7c478bd9Sstevel@tonic-gate 	kmem_free(perm, service->ipcs_ssize);
1173*7c478bd9Sstevel@tonic-gate }
1174*7c478bd9Sstevel@tonic-gate 
1175*7c478bd9Sstevel@tonic-gate 
1176*7c478bd9Sstevel@tonic-gate /*
1177*7c478bd9Sstevel@tonic-gate  * Common code to remove an IPC object.  This should be called after
1178*7c478bd9Sstevel@tonic-gate  * all permissions checks have been performed, and with the service
1179*7c478bd9Sstevel@tonic-gate  * and ID locked.  Note that this does not remove the object from
1180*7c478bd9Sstevel@tonic-gate  * the ipcs_usedids list (this needs to be done by the caller before
1181*7c478bd9Sstevel@tonic-gate  * dropping the service lock).
1182*7c478bd9Sstevel@tonic-gate  */
1183*7c478bd9Sstevel@tonic-gate static void
1184*7c478bd9Sstevel@tonic-gate ipc_remove(ipc_service_t *service, kipc_perm_t *perm)
1185*7c478bd9Sstevel@tonic-gate {
1186*7c478bd9Sstevel@tonic-gate 	int id = perm->ipc_id;
1187*7c478bd9Sstevel@tonic-gate 	int index;
1188*7c478bd9Sstevel@tonic-gate 
1189*7c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&service->ipcs_lock));
1190*7c478bd9Sstevel@tonic-gate 	ASSERT(IPC_LOCKED(service, perm));
1191*7c478bd9Sstevel@tonic-gate 
1192*7c478bd9Sstevel@tonic-gate 	index = IPC_INDEX(id);
1193*7c478bd9Sstevel@tonic-gate 
1194*7c478bd9Sstevel@tonic-gate 	service->ipcs_table[index].ipct_data = NULL;
1195*7c478bd9Sstevel@tonic-gate 
1196*7c478bd9Sstevel@tonic-gate 	if (perm->ipc_key != IPC_PRIVATE)
1197*7c478bd9Sstevel@tonic-gate 		avl_remove(&service->ipcs_keys, perm);
1198*7c478bd9Sstevel@tonic-gate 	list_remove(&service->ipcs_usedids, perm);
1199*7c478bd9Sstevel@tonic-gate 	perm->ipc_mode &= ~IPC_ALLOC;
1200*7c478bd9Sstevel@tonic-gate 
1201*7c478bd9Sstevel@tonic-gate 	id_free(service->ipcs_ids, index);
1202*7c478bd9Sstevel@tonic-gate 
1203*7c478bd9Sstevel@tonic-gate 	if (service->ipcs_table[index].ipct_seq++ == IPC_SEQ_MASK)
1204*7c478bd9Sstevel@tonic-gate 		service->ipcs_table[index].ipct_seq = 0;
1205*7c478bd9Sstevel@tonic-gate 	service->ipcs_count--;
1206*7c478bd9Sstevel@tonic-gate 	ASSERT(IPC_USAGE(perm, service) > 0);
1207*7c478bd9Sstevel@tonic-gate 	IPC_USAGE(perm, service) -= 1;
1208*7c478bd9Sstevel@tonic-gate 	ASSERT(service->ipcs_count || (IPC_USAGE(perm, service) == 0));
1209*7c478bd9Sstevel@tonic-gate }
1210*7c478bd9Sstevel@tonic-gate 
1211*7c478bd9Sstevel@tonic-gate 
1212*7c478bd9Sstevel@tonic-gate /*
1213*7c478bd9Sstevel@tonic-gate  * Common code to perform an IPC_RMID.  Returns an errno value on
1214*7c478bd9Sstevel@tonic-gate  * failure, 0 on success.
1215*7c478bd9Sstevel@tonic-gate  */
1216*7c478bd9Sstevel@tonic-gate int
1217*7c478bd9Sstevel@tonic-gate ipc_rmid(ipc_service_t *service, int id, cred_t *cr)
1218*7c478bd9Sstevel@tonic-gate {
1219*7c478bd9Sstevel@tonic-gate 	kipc_perm_t *perm;
1220*7c478bd9Sstevel@tonic-gate 	kmutex_t *lock;
1221*7c478bd9Sstevel@tonic-gate 
1222*7c478bd9Sstevel@tonic-gate 	mutex_enter(&service->ipcs_lock);
1223*7c478bd9Sstevel@tonic-gate 
1224*7c478bd9Sstevel@tonic-gate 	lock = ipc_lookup(service, id, &perm);
1225*7c478bd9Sstevel@tonic-gate 	if (lock == NULL) {
1226*7c478bd9Sstevel@tonic-gate 		mutex_exit(&service->ipcs_lock);
1227*7c478bd9Sstevel@tonic-gate 		return (EINVAL);
1228*7c478bd9Sstevel@tonic-gate 	}
1229*7c478bd9Sstevel@tonic-gate 
1230*7c478bd9Sstevel@tonic-gate 	ASSERT(service->ipcs_count > 0);
1231*7c478bd9Sstevel@tonic-gate 
1232*7c478bd9Sstevel@tonic-gate 	if (secpolicy_ipc_owner(cr, perm) != 0) {
1233*7c478bd9Sstevel@tonic-gate 		mutex_exit(lock);
1234*7c478bd9Sstevel@tonic-gate 		mutex_exit(&service->ipcs_lock);
1235*7c478bd9Sstevel@tonic-gate 		return (EPERM);
1236*7c478bd9Sstevel@tonic-gate 	}
1237*7c478bd9Sstevel@tonic-gate 
1238*7c478bd9Sstevel@tonic-gate 	/*
1239*7c478bd9Sstevel@tonic-gate 	 * Nothing can fail from this point on.
1240*7c478bd9Sstevel@tonic-gate 	 */
1241*7c478bd9Sstevel@tonic-gate 	ipc_remove(service, perm);
1242*7c478bd9Sstevel@tonic-gate 	mutex_exit(&service->ipcs_lock);
1243*7c478bd9Sstevel@tonic-gate 
1244*7c478bd9Sstevel@tonic-gate 	/* perform any per-service removal actions */
1245*7c478bd9Sstevel@tonic-gate 	service->ipcs_rmid(perm);
1246*7c478bd9Sstevel@tonic-gate 
1247*7c478bd9Sstevel@tonic-gate 	ipc_rele(service, perm);
1248*7c478bd9Sstevel@tonic-gate 
1249*7c478bd9Sstevel@tonic-gate 	return (0);
1250*7c478bd9Sstevel@tonic-gate }
1251*7c478bd9Sstevel@tonic-gate 
1252*7c478bd9Sstevel@tonic-gate /*
1253*7c478bd9Sstevel@tonic-gate  * Implementation for shmids, semids, and msgids.  buf is the address
1254*7c478bd9Sstevel@tonic-gate  * of the user buffer, nids is the size, and pnids is a pointer to
1255*7c478bd9Sstevel@tonic-gate  * where we write the actual number of ids that [would] have been
1256*7c478bd9Sstevel@tonic-gate  * copied out.
1257*7c478bd9Sstevel@tonic-gate  */
1258*7c478bd9Sstevel@tonic-gate int
1259*7c478bd9Sstevel@tonic-gate ipc_ids(ipc_service_t *service, int *buf, uint_t nids, uint_t *pnids)
1260*7c478bd9Sstevel@tonic-gate {
1261*7c478bd9Sstevel@tonic-gate 	kipc_perm_t *perm;
1262*7c478bd9Sstevel@tonic-gate 	size_t	idsize = 0;
1263*7c478bd9Sstevel@tonic-gate 	int	error = 0;
1264*7c478bd9Sstevel@tonic-gate 	int	idcount;
1265*7c478bd9Sstevel@tonic-gate 	int	*ids;
1266*7c478bd9Sstevel@tonic-gate 	int	numids = 0;
1267*7c478bd9Sstevel@tonic-gate 	zoneid_t zoneid = getzoneid();
1268*7c478bd9Sstevel@tonic-gate 	int	global = INGLOBALZONE(curproc);
1269*7c478bd9Sstevel@tonic-gate 
1270*7c478bd9Sstevel@tonic-gate 	if (buf == NULL)
1271*7c478bd9Sstevel@tonic-gate 		nids = 0;
1272*7c478bd9Sstevel@tonic-gate 
1273*7c478bd9Sstevel@tonic-gate 	/*
1274*7c478bd9Sstevel@tonic-gate 	 * Get an accurate count of the total number of ids, and allocate a
1275*7c478bd9Sstevel@tonic-gate 	 * staging buffer.  Since ipcs_count is always sane, we don't have
1276*7c478bd9Sstevel@tonic-gate 	 * to take ipcs_lock for our first guess.  If there are no ids, or
1277*7c478bd9Sstevel@tonic-gate 	 * we're in the global zone and the number of ids is greater than
1278*7c478bd9Sstevel@tonic-gate 	 * the size of the specified buffer, we shunt to the end.  Otherwise,
1279*7c478bd9Sstevel@tonic-gate 	 * we go through the id list looking for (and counting) what is
1280*7c478bd9Sstevel@tonic-gate 	 * visible in the specified zone.
1281*7c478bd9Sstevel@tonic-gate 	 */
1282*7c478bd9Sstevel@tonic-gate 	idcount = service->ipcs_count;
1283*7c478bd9Sstevel@tonic-gate 	for (;;) {
1284*7c478bd9Sstevel@tonic-gate 		if ((global && idcount > nids) || idcount == 0) {
1285*7c478bd9Sstevel@tonic-gate 			numids = idcount;
1286*7c478bd9Sstevel@tonic-gate 			nids = 0;
1287*7c478bd9Sstevel@tonic-gate 			goto out;
1288*7c478bd9Sstevel@tonic-gate 		}
1289*7c478bd9Sstevel@tonic-gate 
1290*7c478bd9Sstevel@tonic-gate 		idsize = idcount * sizeof (int);
1291*7c478bd9Sstevel@tonic-gate 		ids = kmem_alloc(idsize, KM_SLEEP);
1292*7c478bd9Sstevel@tonic-gate 
1293*7c478bd9Sstevel@tonic-gate 		mutex_enter(&service->ipcs_lock);
1294*7c478bd9Sstevel@tonic-gate 		if (idcount >= service->ipcs_count)
1295*7c478bd9Sstevel@tonic-gate 			break;
1296*7c478bd9Sstevel@tonic-gate 		idcount = service->ipcs_count;
1297*7c478bd9Sstevel@tonic-gate 		mutex_exit(&service->ipcs_lock);
1298*7c478bd9Sstevel@tonic-gate 
1299*7c478bd9Sstevel@tonic-gate 		if (idsize != 0) {
1300*7c478bd9Sstevel@tonic-gate 			kmem_free(ids, idsize);
1301*7c478bd9Sstevel@tonic-gate 			idsize = 0;
1302*7c478bd9Sstevel@tonic-gate 		}
1303*7c478bd9Sstevel@tonic-gate 	}
1304*7c478bd9Sstevel@tonic-gate 
1305*7c478bd9Sstevel@tonic-gate 	for (perm = list_head(&service->ipcs_usedids); perm != NULL;
1306*7c478bd9Sstevel@tonic-gate 	    perm = list_next(&service->ipcs_usedids, perm)) {
1307*7c478bd9Sstevel@tonic-gate 		ASSERT(!IPC_FREE(perm));
1308*7c478bd9Sstevel@tonic-gate 		if (global || perm->ipc_zoneid == zoneid)
1309*7c478bd9Sstevel@tonic-gate 			ids[numids++] = perm->ipc_id;
1310*7c478bd9Sstevel@tonic-gate 	}
1311*7c478bd9Sstevel@tonic-gate 	mutex_exit(&service->ipcs_lock);
1312*7c478bd9Sstevel@tonic-gate 
1313*7c478bd9Sstevel@tonic-gate 	/*
1314*7c478bd9Sstevel@tonic-gate 	 * If there isn't enough space to hold all of the ids, just
1315*7c478bd9Sstevel@tonic-gate 	 * return the number of ids without copying out any of them.
1316*7c478bd9Sstevel@tonic-gate 	 */
1317*7c478bd9Sstevel@tonic-gate 	if (nids < numids)
1318*7c478bd9Sstevel@tonic-gate 		nids = 0;
1319*7c478bd9Sstevel@tonic-gate 
1320*7c478bd9Sstevel@tonic-gate out:
1321*7c478bd9Sstevel@tonic-gate 	if (suword32(pnids, (uint32_t)numids) ||
1322*7c478bd9Sstevel@tonic-gate 	    (nids != 0 && copyout(ids, buf, numids * sizeof (int))))
1323*7c478bd9Sstevel@tonic-gate 		error = EFAULT;
1324*7c478bd9Sstevel@tonic-gate 	if (idsize != 0)
1325*7c478bd9Sstevel@tonic-gate 		kmem_free(ids, idsize);
1326*7c478bd9Sstevel@tonic-gate 	return (error);
1327*7c478bd9Sstevel@tonic-gate }
1328*7c478bd9Sstevel@tonic-gate 
1329*7c478bd9Sstevel@tonic-gate /*
1330*7c478bd9Sstevel@tonic-gate  * Destroy IPC objects from the given service that are associated with
1331*7c478bd9Sstevel@tonic-gate  * the given zone.
1332*7c478bd9Sstevel@tonic-gate  *
1333*7c478bd9Sstevel@tonic-gate  * We can't hold on to the service lock when freeing objects, so we
1334*7c478bd9Sstevel@tonic-gate  * first search the service and move all the objects to a private
1335*7c478bd9Sstevel@tonic-gate  * list, then walk through and free them after dropping the lock.
1336*7c478bd9Sstevel@tonic-gate  */
1337*7c478bd9Sstevel@tonic-gate void
1338*7c478bd9Sstevel@tonic-gate ipc_remove_zone(ipc_service_t *service, zoneid_t zoneid)
1339*7c478bd9Sstevel@tonic-gate {
1340*7c478bd9Sstevel@tonic-gate 	kipc_perm_t *perm, *next;
1341*7c478bd9Sstevel@tonic-gate 	list_t rmlist;
1342*7c478bd9Sstevel@tonic-gate 	kmutex_t *lock;
1343*7c478bd9Sstevel@tonic-gate 
1344*7c478bd9Sstevel@tonic-gate 	list_create(&rmlist, sizeof (kipc_perm_t),
1345*7c478bd9Sstevel@tonic-gate 	    offsetof(kipc_perm_t, ipc_list));
1346*7c478bd9Sstevel@tonic-gate 
1347*7c478bd9Sstevel@tonic-gate 	mutex_enter(&service->ipcs_lock);
1348*7c478bd9Sstevel@tonic-gate 	for (perm = list_head(&service->ipcs_usedids); perm != NULL;
1349*7c478bd9Sstevel@tonic-gate 	    perm = next) {
1350*7c478bd9Sstevel@tonic-gate 		next = list_next(&service->ipcs_usedids, perm);
1351*7c478bd9Sstevel@tonic-gate 		if (perm->ipc_zoneid != zoneid)
1352*7c478bd9Sstevel@tonic-gate 			continue;
1353*7c478bd9Sstevel@tonic-gate 
1354*7c478bd9Sstevel@tonic-gate 		/*
1355*7c478bd9Sstevel@tonic-gate 		 * Remove the object from the service, then put it on
1356*7c478bd9Sstevel@tonic-gate 		 * the removal list so we can defer the call to
1357*7c478bd9Sstevel@tonic-gate 		 * ipc_rele (which will actually free the structure).
1358*7c478bd9Sstevel@tonic-gate 		 * We need to do this since the destructor may grab
1359*7c478bd9Sstevel@tonic-gate 		 * the service lock.
1360*7c478bd9Sstevel@tonic-gate 		 */
1361*7c478bd9Sstevel@tonic-gate 		ASSERT(!IPC_FREE(perm));
1362*7c478bd9Sstevel@tonic-gate 		lock = ipc_lock(service, perm->ipc_id);
1363*7c478bd9Sstevel@tonic-gate 		ipc_remove(service, perm);
1364*7c478bd9Sstevel@tonic-gate 		mutex_exit(lock);
1365*7c478bd9Sstevel@tonic-gate 		list_insert_tail(&rmlist, perm);
1366*7c478bd9Sstevel@tonic-gate 	}
1367*7c478bd9Sstevel@tonic-gate 	mutex_exit(&service->ipcs_lock);
1368*7c478bd9Sstevel@tonic-gate 
1369*7c478bd9Sstevel@tonic-gate 	/*
1370*7c478bd9Sstevel@tonic-gate 	 * Now that we've dropped the service lock, loop through the
1371*7c478bd9Sstevel@tonic-gate 	 * private list freeing removed objects.
1372*7c478bd9Sstevel@tonic-gate 	 */
1373*7c478bd9Sstevel@tonic-gate 	for (perm = list_head(&rmlist); perm != NULL; perm = next) {
1374*7c478bd9Sstevel@tonic-gate 		next = list_next(&rmlist, perm);
1375*7c478bd9Sstevel@tonic-gate 		list_remove(&rmlist, perm);
1376*7c478bd9Sstevel@tonic-gate 
1377*7c478bd9Sstevel@tonic-gate 		(void) ipc_lock(service, perm->ipc_id);
1378*7c478bd9Sstevel@tonic-gate 
1379*7c478bd9Sstevel@tonic-gate 		/* perform any per-service removal actions */
1380*7c478bd9Sstevel@tonic-gate 		service->ipcs_rmid(perm);
1381*7c478bd9Sstevel@tonic-gate 
1382*7c478bd9Sstevel@tonic-gate 		/* release reference */
1383*7c478bd9Sstevel@tonic-gate 		ipc_rele(service, perm);
1384*7c478bd9Sstevel@tonic-gate 	}
1385*7c478bd9Sstevel@tonic-gate 
1386*7c478bd9Sstevel@tonic-gate 	list_destroy(&rmlist);
1387*7c478bd9Sstevel@tonic-gate }
1388