xref: /titanic_52/usr/src/uts/common/os/ipc.c (revision c2580b931007758eab8cb5ae8726ebe1588e259b)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
27 /*	  All Rights Reserved  	*/
28 
29 
30 #pragma ident	"%Z%%M%	%I%	%E% SMI"
31 
32 /*
33  * Common Inter-Process Communication routines.
34  *
35  * Overview
36  * --------
37  *
38  * The System V inter-process communication (IPC) facilities provide
39  * three services, message queues, semaphore arrays, and shared memory
40  * segments, which are mananged using filesystem-like namespaces.
41  * Unlike a filesystem, these namespaces aren't mounted and accessible
42  * via a path -- a special API is used to interact with the different
43  * facilities (nothing precludes a VFS-based interface, but the
44  * standards require the special APIs).  Furthermore, these special
45  * APIs don't use file descriptors, nor do they have an equivalent.
46  * This means that every operation which acts on an object needs to
47  * perform the quivalent of a lookup, which in turn means that every
48  * operation can fail if the specified object doesn't exist in the
49  * facility's namespace.
50  *
51  * Objects
52  * -------
53  *
54  * Each object in a namespace has a unique ID, which is assigned by the
55  * system and is used to identify the object when performing operations
56  * on it.  An object can also have a key, which is selected by the user
57  * at allocation time and is used as a primitive rendezvous mechanism.
58  * An object without a key is said to have a "private" key.
59  *
60  * To perform an operation on an object given its key, one must first
61  * perform a lookup and obtain its ID.  The ID is then used to identify
62  * the object when performing the operation.  If the object has a
63  * private key, the ID must be known or obtained by other means.
64  *
65  * Each object in the namespace has a creator uid and gid, as well as
66  * an owner uid and gid.  Both are initialized with the ruid and rgid
67  * of the process which created the object.  The creator or current
68  * owner has the ability to change the owner of the object.
69  *
70  * Each object in the namespace has a set of file-like permissions,
71  * which, in conjunction with the creator and owner uid and gid,
72  * control read and write access to the object (execute is ignored).
73  *
74  * Each object also has a creator project and zone, which are used to
75  * account for its resource usage.
76  *
77  * Operations
78  * ----------
79  *
80  * There are five operations which all three facilities have in
81  * common: GET, SET, STAT, RMID, and IDS.
82  *
83  * GET, like open, is used to allocate a new object or obtain an
84  * existing one (using its key).  It takes a key, a set of flags and
85  * mode bits, and optionally facility-specific arguments.  If the key
86  * is IPC_PRIVATE, a new object with the requested mode bits and
87  * facility-specific attributes is created.  If the key isn't
88  * IPC_PRIVATE, the GET will attempt to look up the specified key and
89  * either return that or create a new key depending on the state of the
90  * IPC_CREAT and IPC_EXCL flags, much like open.  If GET needs to
91  * allocate an object, it can fail if there is insufficient space in
92  * the namespace (the maximum number of ids for the facility has been
93  * exceeded) or if the facility-specific initialization fails.  If GET
94  * finds an object it can return, it can still fail if that object's
95  * permissions or facility-specific attributes are less than those
96  * requested.
97  *
98  * SET is used to adjust facility-specific parameters of an object, in
99  * addition to the owner uid and gid, and mode bits.  It can fail if
100  * the caller isn't the creator or owner.
101  *
102  * STAT is used to obtain information about an object including the
103  * general attributes object described as well as facility-specific
104  * information.  It can fail if the caller doesn't have read
105  * permission.
106  *
107  * RMID removes an object from the namespace.  Subsequent operations
108  * using the object's ID or key will fail (until another object is
109  * created with the same key or ID).  Since an RMID may be performed
110  * asynchronously with other operations, it is possible that other
111  * threads and/or processes will have references to the object.  While
112  * a facility may have actions which need to be performed at RMID time,
113  * only when all references are dropped can the object be destroyed.
114  * RMID will fail if the caller isn't the creator or owner.
115  *
116  * IDS obtains a list of all IDs in a facility's namespace.  There are
117  * no facility-specific behaviors of IDS.
118  *
119  * Design
120  * ------
121  *
122  * Because some IPC facilities provide services whose operations must
123  * scale, a mechanism which allows fast, concurrent access to
124  * individual objects is needed.  Of primary importance is object
125  * lookup based on ID (SET, STAT, others).  Allocation (GET),
126  * deallocation (RMID), ID enumeration (IDS), and key lookups (GET) are
127  * lesser concerns, but should be implemented in such a way that ID
128  * lookup isn't affected (at least not in the common case).
129  *
130  * Starting from the bottom up, each object is represented by a
131  * structure, the first member of which must be a kipc_perm_t.  The
132  * kipc_perm_t contains the information described above in "Objects", a
133  * reference count (since the object may continue to exist after it has
134  * been removed from the namespace), as well as some additional
135  * metadata used to manage data structure membership.  These objects
136  * are dynamically allocated.
137  *
138  * Above the objects is a power-of-two sized table of ID slots.  Each
139  * slot contains a pointer to an object, a sequence number, and a
140  * lock.  An object's ID is a function of its slot's index in the table
141  * and its slot's sequence number.  Every time a slot is released (via
142  * RMID) its sequence number is increased.  Strictly speaking, the
143  * sequence number is unnecessary.  However, checking the sequence
144  * number after a lookup provides a certain degree of robustness
145  * against the use of stale IDs (useful since nothing else does).  When
146  * the table fills up, it is resized (see Locking, below).
147  *
148  * Of an ID's 31 bits (an ID is, as defined by the standards, a signed
149  * int) the top IPC_SEQ_BITS are used for the sequence number with the
150  * remainder holding the index into the table.  The size of the table
151  * is therefore bounded at 2 ^ (31 - IPC_SEQ_BITS) slots.
152  *
153  * Managing this table is the ipc_service structure.  It contains a
154  * pointer to the dynamically allocated ID table, a namespace-global
155  * lock, an id_space for managing the free space in the table, and
156  * sundry other metadata necessary for the maintenance of the
157  * namespace.  An AVL tree of all keyed objects in the table (sorted by
158  * key) is used for key lookups.  An unordered doubly linked list of
159  * all objects in the namespace (keyed or not) is maintained to
160  * facilitate ID enumeration.
161  *
162  * To help visualize these relationships, here's a picture of a
163  * namespace with a table of size 8 containing three objects
164  * (IPC_SEQ_BITS = 28):
165  *
166  *
167  * +-ipc_service_t--+
168  * | table          *---\
169  * | keys           *---+----------------------\
170  * | all ids        *--\|                      |
171  * |                |  ||                      |
172  * +----------------+  ||                      |
173  *                     ||                      |
174  * /-------------------/|                      |
175  * |    /---------------/                      |
176  * |    |                                      |
177  * |    v                                      |
178  * |  +-0------+-1------+-2------+-3------+-4--+---+-5------+-6------+-7------+
179  * |  | Seq=3  |        |        | Seq=1  |    :   |        |        | Seq=6  |
180  * |  |        |        |        |        |    :   |        |        |        |
181  * |  +-*------+--------+--------+-*------+----+---+--------+--------+-*------+
182  * |    |                          |           |                       |
183  * |    |                      /---/           |      /----------------/
184  * |    |                      |               |      |
185  * |    v                      v               |      v
186  * |  +-kipc_perm_t-+        +-kipc_perm_t-+   |    +-kipc_perm_t-+
187  * |  | id=0x30     |        | id=0x13     |   |    | id=0x67     |
188  * |  | key=0xfeed  |        | key=0xbeef  |   |    | key=0xcafe  |
189  * \->| [list]      |<------>| [list]      |<------>| [list]      |
190  * /->| [avl left]  x   /--->| [avl left]  x   \--->| [avl left]  *---\
191  * |  | [avl right] x   |    | [avl right] x        | [avl right] *---+-\
192  * |  |             |   |    |             |        |             |   | |
193  * |  +-------------+   |    +-------------+        +-------------+   | |
194  * |                    \---------------------------------------------/ |
195  * \--------------------------------------------------------------------/
196  *
197  * Locking
198  * -------
199  *
200  * There are three locks (or sets of locks) which are used to ensure
201  * correctness: the slot locks, the namespace lock, and p_lock (needed
202  * when checking resource controls).  Their ordering is
203  *
204  *   namespace lock -> slot lock 0 -> ... -> slot lock t -> p_lock
205  *
206  * Generally speaking, the namespace lock is used to protect allocation
207  * and removal from the namespace, ID enumeration, and resizing the ID
208  * table.  Specifically:
209  *
210  * - write access to all fields of the ipc_service structure
211  * - read access to all variable fields of ipc_service except
212  *   ipcs_tabsz (table size) and ipcs_table (the table pointer)
213  * - read/write access to ipc_avl, ipc_list in visible objects'
214  *   kipc_perm structures (i.e. objects which have been removed from
215  *   the namespace don't have this restriction)
216  * - write access to ipct_seq and ipct_data in the table entries
217  *
218  * A slot lock by itself is meaningless (except when resizing).  Of
219  * greater interest conceptually is the notion of an ID lock -- a
220  * "virtual lock" which refers to whichever slot lock an object's ID
221  * currently hashes to.
222  *
223  * An ID lock protects all objects with that ID.  Normally there will
224  * only be one such object: the one pointed to by the locked slot.
225  * However, if an object is removed from the namespace but retains
226  * references (e.g. an attached shared memory segment which has been
227  * RMIDed), it continues to use the lock associated with its original
228  * ID.  While this can result in increased contention, operations which
229  * require taking the ID lock of removed objects are infrequent.
230  *
231  * Specifically, an ID lock protects the contents of an object's
232  * structure, including the contents of the embedded kipc_perm
233  * structure (but excluding those fields protected by the namespace
234  * lock).  It also protects the ipct_seq and ipct_data fields in its
235  * slot (it is really a slot lock, after all).
236  *
237  * Recall that the table is resizable.  To avoid requiring every ID
238  * lookup to take a global lock, a scheme much like that employed for
239  * file descriptors (see the comment above UF_ENTER in user.h) is
240  * used.  Note that the sequence number and data pointer are protected
241  * by both the namespace lock and their slot lock.  When the table is
242  * resized, the following operations take place:
243  *
244  *   1) A new table is allocated.
245  *   2) The global lock is taken.
246  *   3) All old slots are locked, in order.
247  *   4) The first half of the new slots are locked.
248  *   5) All table entries are copied to the new table, and cleared from
249  *	the old table.
250  *   6) The ipc_service structure is updated to point to the new table.
251  *   7) The ipc_service structure is updated with the new table size.
252  *   8) All slot locks (old and new) are dropped.
253  *
254  * Because the slot locks are embedded in the table, ID lookups and
255  * other operations which require taking an slot lock need to verify
256  * that the lock taken wasn't part of a stale table.  This is
257  * accomplished by checking the table size before and after
258  * dereferencing the table pointer and taking the lock: if the size
259  * changes, the lock must be dropped and reacquired.  It is this
260  * additional work which distinguishes an ID lock from a slot lock.
261  *
262  * Because we can't guarantee that threads aren't accessing the old
263  * tables' locks, they are never deallocated.  To prevent spurious
264  * reports of memory leaks, a pointer to the discarded table is stored
265  * in the new one in step 5.  (Theoretically ipcs_destroy will delete
266  * the discarded tables, but it is only ever called from a failed _init
267  * invocation; i.e. when there aren't any.)
268  *
269  * Interfaces
270  * ----------
271  *
272  * The following interfaces are provided by the ipc module for use by
273  * the individual IPC facilities:
274  *
275  * ipcperm_access
276  *
277  *   Given an object and a cred structure, determines if the requested
278  *   access type is allowed.
279  *
280  * ipcperm_set, ipcperm_stat,
281  * ipcperm_set64, ipcperm_stat64
282  *
283  *   Performs the common portion of an STAT or SET operation.  All
284  *   (except stat and stat64) can fail, so they should be called before
285  *   any facility-specific non-reversible changes are made to an
286  *   object.  Similarly, the set operations have side effects, so they
287  *   should only be called once the possibility of a facility-specific
288  *   failure is eliminated.
289  *
290  * ipcs_create
291  *
292  *   Creates an IPC namespace for use by an IPC facility.
293  *
294  * ipcs_destroy
295  *
296  *   Destroys an IPC namespace.
297  *
298  * ipcs_lock, ipcs_unlock
299  *
300  *   Takes the namespace lock.  Ideally such access wouldn't be
301  *   necessary, but there may be facility-specific data protected by
302  *   this lock (e.g. project-wide resource consumption).
303  *
304  * ipc_lock
305  *
306  *   Takes the lock associated with an ID.  Can't fail.
307  *
308  * ipc_relock
309  *
310  *   Like ipc_lock, but takes a pointer to a held lock.  Drops the lock
311  *   unless it is the one that would have been returned by ipc_lock.
312  *   Used after calls to cv_wait.
313  *
314  * ipc_lookup
315  *
316  *   Performs an ID lookup, returns with the ID lock held.  Fails if
317  *   the ID doesn't exist in the namespace.
318  *
319  * ipc_hold
320  *
321  *   Takes a reference on an object.
322  *
323  * ipc_rele
324  *
325  *   Releases a reference on an object, and drops the object's lock.
326  *   Calls the object's destructor if last reference is being
327  *   released.
328  *
329  * ipc_rele_locked
330  *
331  *   Releases a reference on an object.  Doesn't drop lock, and may
332  *   only be called when there is more than one reference to the
333  *   object.
334  *
335  * ipc_get, ipc_commit_begin, ipc_commit_end, ipc_cleanup
336  *
337  *   Components of a GET operation.  ipc_get performs a key lookup,
338  *   allocating an object if the key isn't found (returning with the
339  *   namespace lock and p_lock held), and returning the existing object
340  *   if it is (with the object lock held).  ipc_get doesn't modify the
341  *   namespace.
342  *
343  *   ipc_commit_begin begins the process of inserting an object
344  *   allocated by ipc_get into the namespace, and can fail.  If
345  *   successful, it returns with the namespace lock and p_lock held.
346  *   ipc_commit_end completes the process of inserting an object into
347  *   the namespace and can't fail.  The facility can call ipc_cleanup
348  *   at any time following a successful ipc_get and before
349  *   ipc_commit_end or a failed ipc_commit_begin to fail the
350  *   allocation.  Pseudocode for the suggested GET implementation:
351  *
352  *   top:
353  *
354  *     ipc_get
355  *
356  *     if failure
357  *       return
358  *
359  *     if found {
360  *
361  *	 if object meets criteria
362  *	   unlock object and return success
363  *       else
364  *	   unlock object and return failure
365  *
366  *     } else {
367  *
368  *	 perform resource control tests
369  *	 drop namespace lock, p_lock
370  *	 if failure
371  *	   ipc_cleanup
372  *
373  *       perform facility-specific initialization
374  *	 if failure {
375  *	   facility-specific cleanup
376  *	   ipc_cleanup
377  *       }
378  *
379  *	 ( At this point the object should be destructible using the
380  *	   destructor given to ipcs_create )
381  *
382  *       ipc_commit_begin
383  *	 if retry
384  *	   goto top
385  *       else if failure
386  *         return
387  *
388  *       perform facility-specific resource control tests/allocations
389  *	 if failure
390  *	   ipc_cleanup
391  *
392  *	 ipc_commit_end
393  *	 perform any infallible post-creation actions, unlock, and return
394  *
395  *     }
396  *
397  * ipc_rmid
398  *
399  *   Performs the common portion of an RMID operation -- looks up an ID
400  *   removes it, and calls the a facility-specific function to do
401  *   RMID-time cleanup on the private portions of the object.
402  *
403  * ipc_ids
404  *
405  *   Performs the common portion of an IDS operation.
406  *
407  */
408 
409 #include <sys/types.h>
410 #include <sys/param.h>
411 #include <sys/cred.h>
412 #include <sys/policy.h>
413 #include <sys/proc.h>
414 #include <sys/user.h>
415 #include <sys/ipc.h>
416 #include <sys/ipc_impl.h>
417 #include <sys/errno.h>
418 #include <sys/systm.h>
419 #include <sys/list.h>
420 #include <sys/atomic.h>
421 #include <sys/zone.h>
422 #include <sys/task.h>
423 #include <sys/modctl.h>
424 
425 #include <c2/audit.h>
426 
427 static struct modlmisc modlmisc = {
428 	&mod_miscops,
429 	"common ipc code",
430 };
431 
432 static struct modlinkage modlinkage = {
433 	MODREV_1, (void *)&modlmisc, NULL
434 };
435 
436 
437 int
438 _init(void)
439 {
440 	return (mod_install(&modlinkage));
441 }
442 
443 int
444 _fini(void)
445 {
446 	return (mod_remove(&modlinkage));
447 }
448 
449 int
450 _info(struct modinfo *modinfop)
451 {
452 	return (mod_info(&modlinkage, modinfop));
453 }
454 
455 
456 /*
457  * Check message, semaphore, or shared memory access permissions.
458  *
459  * This routine verifies the requested access permission for the current
460  * process.  The zone ids are compared, and the appropriate bits are
461  * checked corresponding to owner, group (including the list of
462  * supplementary groups), or everyone.  Zero is returned on success.
463  * On failure, the security policy is asked to check to override the
464  * permissions check; the policy will either return 0 for access granted
465  * or EACCES.
466  *
467  * Access to objects in other zones requires that the caller be in the
468  * global zone and have the appropriate IPC_DAC_* privilege, regardless
469  * of whether the uid or gid match those of the object.  Note that
470  * cross-zone accesses will normally never get here since they'll
471  * fail in ipc_lookup or ipc_get.
472  *
473  * The arguments must be set up as follows:
474  * 	p - Pointer to permission structure to verify
475  * 	mode - Desired access permissions
476  */
477 int
478 ipcperm_access(kipc_perm_t *p, int mode, cred_t *cr)
479 {
480 	int shifts = 0;
481 	uid_t uid = crgetuid(cr);
482 	zoneid_t zoneid = getzoneid();
483 
484 	if (p->ipc_zoneid == zoneid) {
485 		if (uid != p->ipc_uid && uid != p->ipc_cuid) {
486 			shifts += 3;
487 			if (!groupmember(p->ipc_gid, cr) &&
488 			    !groupmember(p->ipc_cgid, cr))
489 				shifts += 3;
490 		}
491 
492 		mode &= ~(p->ipc_mode << shifts);
493 
494 		if (mode == 0)
495 			return (0);
496 	} else if (zoneid != GLOBAL_ZONEID)
497 		return (EACCES);
498 
499 	return (secpolicy_ipc_access(cr, p, mode));
500 }
501 
502 /*
503  * There are two versions of the ipcperm_set/stat functions:
504  *   ipcperm_???        - for use with IPC_SET/STAT
505  *   ipcperm_???_64     - for use with IPC_SET64/STAT64
506  *
507  * These functions encapsulate the common portions (copying, permission
508  * checks, and auditing) of the set/stat operations.  All, except for
509  * stat and stat_64 which are void, return 0 on success or a non-zero
510  * errno value on error.
511  */
512 
513 int
514 ipcperm_set(ipc_service_t *service, struct cred *cr,
515     kipc_perm_t *kperm, struct ipc_perm *perm, model_t model)
516 {
517 	STRUCT_HANDLE(ipc_perm, lperm);
518 	uid_t uid;
519 	gid_t gid;
520 	mode_t mode;
521 
522 	ASSERT(IPC_LOCKED(service, kperm));
523 
524 	STRUCT_SET_HANDLE(lperm, model, perm);
525 	uid = STRUCT_FGET(lperm, uid);
526 	gid = STRUCT_FGET(lperm, gid);
527 	mode = STRUCT_FGET(lperm, mode);
528 
529 	if (secpolicy_ipc_owner(cr, kperm) != 0)
530 		return (EPERM);
531 
532 	if ((uid < 0) || (uid > MAXUID) || (gid < 0) || (gid > MAXUID))
533 		return (EINVAL);
534 
535 	kperm->ipc_uid = uid;
536 	kperm->ipc_gid = gid;
537 	kperm->ipc_mode = (mode & 0777) | (kperm->ipc_mode & ~0777);
538 
539 #ifdef C2_AUDIT
540 	if (audit_active)
541 		audit_ipcget(service->ipcs_atype, kperm);
542 #endif
543 
544 	return (0);
545 }
546 
547 void
548 ipcperm_stat(struct ipc_perm *perm, kipc_perm_t *kperm, model_t model)
549 {
550 	STRUCT_HANDLE(ipc_perm, lperm);
551 
552 	STRUCT_SET_HANDLE(lperm, model, perm);
553 	STRUCT_FSET(lperm, uid, kperm->ipc_uid);
554 	STRUCT_FSET(lperm, gid, kperm->ipc_gid);
555 	STRUCT_FSET(lperm, cuid, kperm->ipc_cuid);
556 	STRUCT_FSET(lperm, cgid, kperm->ipc_cgid);
557 	STRUCT_FSET(lperm, mode, kperm->ipc_mode);
558 	STRUCT_FSET(lperm, seq, 0);
559 	STRUCT_FSET(lperm, key, kperm->ipc_key);
560 }
561 
562 int
563 ipcperm_set64(ipc_service_t *service, struct cred *cr,
564     kipc_perm_t *kperm, ipc_perm64_t *perm64)
565 {
566 	ASSERT(IPC_LOCKED(service, kperm));
567 
568 	if (secpolicy_ipc_owner(cr, kperm) != 0)
569 		return (EPERM);
570 
571 	if ((perm64->ipcx_uid < 0) || (perm64->ipcx_uid > MAXUID) ||
572 	    (perm64->ipcx_gid < 0) || (perm64->ipcx_gid > MAXUID))
573 		return (EINVAL);
574 
575 	kperm->ipc_uid = perm64->ipcx_uid;
576 	kperm->ipc_gid = perm64->ipcx_gid;
577 	kperm->ipc_mode = (perm64->ipcx_mode & 0777) |
578 	    (kperm->ipc_mode & ~0777);
579 
580 #ifdef C2_AUDIT
581 	if (audit_active)
582 		audit_ipcget(service->ipcs_atype, kperm);
583 #endif
584 
585 	return (0);
586 }
587 
588 void
589 ipcperm_stat64(ipc_perm64_t *perm64, kipc_perm_t *kperm)
590 {
591 	perm64->ipcx_uid = kperm->ipc_uid;
592 	perm64->ipcx_gid = kperm->ipc_gid;
593 	perm64->ipcx_cuid = kperm->ipc_cuid;
594 	perm64->ipcx_cgid = kperm->ipc_cgid;
595 	perm64->ipcx_mode = kperm->ipc_mode;
596 	perm64->ipcx_key = kperm->ipc_key;
597 	perm64->ipcx_projid = kperm->ipc_proj->kpj_id;
598 	perm64->ipcx_zoneid = kperm->ipc_zoneid;
599 }
600 
601 
602 /*
603  * ipc key comparator.
604  */
605 static int
606 ipc_key_compar(const void *a, const void *b)
607 {
608 	kipc_perm_t *aperm = (kipc_perm_t *)a;
609 	kipc_perm_t *bperm = (kipc_perm_t *)b;
610 	int ak = aperm->ipc_key;
611 	int bk = bperm->ipc_key;
612 	zoneid_t az;
613 	zoneid_t bz;
614 
615 	ASSERT(ak != IPC_PRIVATE);
616 	ASSERT(bk != IPC_PRIVATE);
617 
618 	/*
619 	 * Compare key first, then zoneid.  This optimizes performance for
620 	 * systems with only one zone, since the zone checks will only be
621 	 * made when the keys match.
622 	 */
623 	if (ak < bk)
624 		return (-1);
625 	if (ak > bk)
626 		return (1);
627 
628 	/* keys match */
629 	az = aperm->ipc_zoneid;
630 	bz = bperm->ipc_zoneid;
631 	if (az < bz)
632 		return (-1);
633 	if (az > bz)
634 		return (1);
635 	return (0);
636 }
637 
638 /*
639  * Create an ipc service.
640  */
641 ipc_service_t *
642 ipcs_create(const char *name, rctl_hndl_t proj_rctl, rctl_hndl_t zone_rctl,
643     size_t size, ipc_func_t *dtor, ipc_func_t *rmid, int audit_type,
644     size_t rctl_offset)
645 {
646 	ipc_service_t *result;
647 
648 	result = kmem_alloc(sizeof (ipc_service_t), KM_SLEEP);
649 
650 	mutex_init(&result->ipcs_lock, NULL, MUTEX_ADAPTIVE, NULL);
651 	result->ipcs_count = 0;
652 	avl_create(&result->ipcs_keys, ipc_key_compar, size, 0);
653 	result->ipcs_tabsz = IPC_IDS_MIN;
654 	result->ipcs_table =
655 	    kmem_zalloc(IPC_IDS_MIN * sizeof (ipc_slot_t), KM_SLEEP);
656 	result->ipcs_ssize = size;
657 	result->ipcs_ids = id_space_create(name, 0, IPC_IDS_MIN);
658 	result->ipcs_dtor = dtor;
659 	result->ipcs_rmid = rmid;
660 	result->ipcs_proj_rctl = proj_rctl;
661 	result->ipcs_zone_rctl = zone_rctl;
662 	result->ipcs_atype = audit_type;
663 	ASSERT(rctl_offset < sizeof (ipc_rqty_t));
664 	result->ipcs_rctlofs = rctl_offset;
665 	list_create(&result->ipcs_usedids, sizeof (kipc_perm_t),
666 	    offsetof(kipc_perm_t, ipc_list));
667 
668 	return (result);
669 }
670 
671 /*
672  * Destroy an ipc service.
673  */
674 void
675 ipcs_destroy(ipc_service_t *service)
676 {
677 	ipc_slot_t *slot, *next;
678 
679 	mutex_enter(&service->ipcs_lock);
680 
681 	ASSERT(service->ipcs_count == 0);
682 	avl_destroy(&service->ipcs_keys);
683 	list_destroy(&service->ipcs_usedids);
684 	id_space_destroy(service->ipcs_ids);
685 
686 	for (slot = service->ipcs_table; slot; slot = next) {
687 		next = slot[0].ipct_chain;
688 		kmem_free(slot, service->ipcs_tabsz * sizeof (ipc_slot_t));
689 		service->ipcs_tabsz >>= 1;
690 	}
691 
692 	mutex_destroy(&service->ipcs_lock);
693 	kmem_free(service, sizeof (ipc_service_t));
694 }
695 
696 /*
697  * Takes the service lock.
698  */
699 void
700 ipcs_lock(ipc_service_t *service)
701 {
702 	mutex_enter(&service->ipcs_lock);
703 }
704 
705 /*
706  * Releases the service lock.
707  */
708 void
709 ipcs_unlock(ipc_service_t *service)
710 {
711 	mutex_exit(&service->ipcs_lock);
712 }
713 
714 
715 /*
716  * Locks the specified ID.  Returns the ID's ID table index.
717  */
718 static int
719 ipc_lock_internal(ipc_service_t *service, uint_t id)
720 {
721 	uint_t	tabsz;
722 	uint_t	index;
723 	kmutex_t *mutex;
724 
725 	for (;;) {
726 		tabsz = service->ipcs_tabsz;
727 		membar_consumer();
728 		index = id & (tabsz - 1);
729 		mutex = &service->ipcs_table[index].ipct_lock;
730 		mutex_enter(mutex);
731 		if (tabsz == service->ipcs_tabsz)
732 			break;
733 		mutex_exit(mutex);
734 	}
735 
736 	return (index);
737 }
738 
739 /*
740  * Locks the specified ID.  Returns a pointer to the ID's lock.
741  */
742 kmutex_t *
743 ipc_lock(ipc_service_t *service, int id)
744 {
745 	uint_t index;
746 
747 	/*
748 	 * These assertions don't reflect requirements of the code
749 	 * which follows, but they should never fail nonetheless.
750 	 */
751 	ASSERT(id >= 0);
752 	ASSERT(IPC_INDEX(id) < service->ipcs_tabsz);
753 	index = ipc_lock_internal(service, id);
754 
755 	return (&service->ipcs_table[index].ipct_lock);
756 }
757 
758 /*
759  * Checks to see if the held lock provided is the current lock for the
760  * specified id.  If so, we return it instead of dropping it and
761  * returning the result of ipc_lock.  This is intended to speed up cv
762  * wakeups where we are left holding a lock which could be stale, but
763  * probably isn't.
764  */
765 kmutex_t *
766 ipc_relock(ipc_service_t *service, int id, kmutex_t *lock)
767 {
768 	ASSERT(id >= 0);
769 	ASSERT(IPC_INDEX(id) < service->ipcs_tabsz);
770 	ASSERT(MUTEX_HELD(lock));
771 
772 	if (&service->ipcs_table[IPC_INDEX(id)].ipct_lock == lock)
773 		return (lock);
774 
775 	mutex_exit(lock);
776 	return (ipc_lock(service, id));
777 }
778 
779 /*
780  * Performs an ID lookup.  If the ID doesn't exist or has been removed,
781  * or isn't visible to the caller (because of zones), NULL is returned.
782  * Otherwise, a pointer to the ID's perm structure and held ID lock are
783  * returned.
784  */
785 kmutex_t *
786 ipc_lookup(ipc_service_t *service, int id, kipc_perm_t **perm)
787 {
788 	kipc_perm_t *result;
789 	uint_t index;
790 
791 	/*
792 	 * There is no need to check to see if id is in-range (i.e.
793 	 * positive and fits into the table).  If it is out-of-range,
794 	 * the id simply won't match the object's.
795 	 */
796 
797 	index = ipc_lock_internal(service, id);
798 	result = service->ipcs_table[index].ipct_data;
799 	if (result == NULL || result->ipc_id != (uint_t)id ||
800 	    !HASZONEACCESS(curproc, result->ipc_zoneid)) {
801 		mutex_exit(&service->ipcs_table[index].ipct_lock);
802 		return (NULL);
803 	}
804 
805 	ASSERT(IPC_SEQ(id) == service->ipcs_table[index].ipct_seq);
806 
807 	*perm = result;
808 #ifdef C2_AUDIT
809 	if (audit_active)
810 		audit_ipc(service->ipcs_atype, id, result);
811 #endif
812 
813 	return (&service->ipcs_table[index].ipct_lock);
814 }
815 
816 /*
817  * Increase the reference count on an ID.
818  */
819 /*ARGSUSED*/
820 void
821 ipc_hold(ipc_service_t *s, kipc_perm_t *perm)
822 {
823 	ASSERT(IPC_INDEX(perm->ipc_id) < s->ipcs_tabsz);
824 	ASSERT(IPC_LOCKED(s, perm));
825 	perm->ipc_ref++;
826 }
827 
828 /*
829  * Decrease the reference count on an ID and drops the ID's lock.
830  * Destroys the ID if the new reference count is zero.
831  */
832 void
833 ipc_rele(ipc_service_t *s, kipc_perm_t *perm)
834 {
835 	int nref;
836 
837 	ASSERT(IPC_INDEX(perm->ipc_id) < s->ipcs_tabsz);
838 	ASSERT(IPC_LOCKED(s, perm));
839 	ASSERT(perm->ipc_ref > 0);
840 
841 	nref = --perm->ipc_ref;
842 	mutex_exit(&s->ipcs_table[IPC_INDEX(perm->ipc_id)].ipct_lock);
843 
844 	if (nref == 0) {
845 		ASSERT(IPC_FREE(perm));		/* ipc_rmid clears IPC_ALLOC */
846 		s->ipcs_dtor(perm);
847 		project_rele(perm->ipc_proj);
848 		zone_rele(perm->ipc_zone);
849 		kmem_free(perm, s->ipcs_ssize);
850 	}
851 }
852 
853 /*
854  * Decrease the reference count on an ID, but don't drop the ID lock.
855  * Used in cases where one thread needs to remove many references (on
856  * behalf of other parties).
857  */
858 void
859 ipc_rele_locked(ipc_service_t *s, kipc_perm_t *perm)
860 {
861 	ASSERT(perm->ipc_ref > 1);
862 	ASSERT(IPC_INDEX(perm->ipc_id) < s->ipcs_tabsz);
863 	ASSERT(IPC_LOCKED(s, perm));
864 
865 	perm->ipc_ref--;
866 }
867 
868 
869 /*
870  * Internal function to grow the service ID table.
871  */
872 static int
873 ipc_grow(ipc_service_t *service)
874 {
875 	ipc_slot_t *new, *old;
876 	int i, oldsize, newsize;
877 
878 	ASSERT(MUTEX_HELD(&service->ipcs_lock));
879 	ASSERT(MUTEX_NOT_HELD(&curproc->p_lock));
880 
881 	if (service->ipcs_tabsz == IPC_IDS_MAX)
882 		return (ENOSPC);
883 
884 	oldsize = service->ipcs_tabsz;
885 	newsize = oldsize << 1;
886 	new = kmem_zalloc(newsize * sizeof (ipc_slot_t), KM_NOSLEEP);
887 	if (new == NULL)
888 		return (ENOSPC);
889 
890 	old = service->ipcs_table;
891 	for (i = 0; i < oldsize; i++) {
892 		mutex_enter(&old[i].ipct_lock);
893 		mutex_enter(&new[i].ipct_lock);
894 
895 		new[i].ipct_seq = old[i].ipct_seq;
896 		new[i].ipct_data = old[i].ipct_data;
897 		old[i].ipct_data = NULL;
898 	}
899 
900 	new[0].ipct_chain = old;
901 	service->ipcs_table = new;
902 	membar_producer();
903 	service->ipcs_tabsz = newsize;
904 
905 	for (i = 0; i < oldsize; i++) {
906 		mutex_exit(&old[i].ipct_lock);
907 		mutex_exit(&new[i].ipct_lock);
908 	}
909 
910 	id_space_extend(service->ipcs_ids, oldsize, service->ipcs_tabsz);
911 
912 	return (0);
913 }
914 
915 
916 static int
917 ipc_keylookup(ipc_service_t *service, key_t key, int flag, kipc_perm_t **permp)
918 {
919 	kipc_perm_t *perm = NULL;
920 	avl_index_t where;
921 	kipc_perm_t template;
922 
923 	ASSERT(MUTEX_HELD(&service->ipcs_lock));
924 
925 	template.ipc_key = key;
926 	template.ipc_zoneid = getzoneid();
927 	if (perm = avl_find(&service->ipcs_keys, &template, &where)) {
928 		ASSERT(!IPC_FREE(perm));
929 		if ((flag & (IPC_CREAT | IPC_EXCL)) == (IPC_CREAT | IPC_EXCL))
930 			return (EEXIST);
931 		if ((flag & 0777) & ~perm->ipc_mode) {
932 #ifdef C2_AUDIT
933 			if (audit_active)
934 				audit_ipcget(NULL, (void *)perm);
935 #endif
936 			return (EACCES);
937 		}
938 		*permp = perm;
939 		return (0);
940 	} else if (flag & IPC_CREAT) {
941 		*permp = NULL;
942 		return (0);
943 	}
944 	return (ENOENT);
945 }
946 
947 static int
948 ipc_alloc_test(ipc_service_t *service, proc_t *pp)
949 {
950 	ASSERT(MUTEX_HELD(&service->ipcs_lock));
951 
952 	/*
953 	 * Resizing the table first would result in a cleaner code
954 	 * path, but would also allow a user to (permanently) double
955 	 * the id table size in cases where the allocation would be
956 	 * denied.  Hence we test the rctl first.
957 	 */
958 retry:
959 	mutex_enter(&pp->p_lock);
960 	if ((rctl_test(service->ipcs_proj_rctl, pp->p_task->tk_proj->kpj_rctls,
961 	    pp, 1, RCA_SAFE) & RCT_DENY) ||
962 	    (rctl_test(service->ipcs_zone_rctl, pp->p_zone->zone_rctls,
963 	    pp, 1, RCA_SAFE) & RCT_DENY)) {
964 		mutex_exit(&pp->p_lock);
965 		return (ENOSPC);
966 	}
967 
968 	if (service->ipcs_count == service->ipcs_tabsz) {
969 		int error;
970 
971 		mutex_exit(&pp->p_lock);
972 		if (error = ipc_grow(service))
973 			return (error);
974 		goto retry;
975 	}
976 
977 	return (0);
978 }
979 
980 /*
981  * Given a key, search for or create the associated identifier.
982  *
983  * If IPC_CREAT is specified and the key isn't found, or if the key is
984  * equal to IPC_PRIVATE, we return 0 and place a pointer to a newly
985  * allocated object structure in permp.  A pointer to the held service
986  * lock is placed in lockp.  ipc_mode's IPC_ALLOC bit is clear.
987  *
988  * If the key is found and no error conditions arise, we return 0 and
989  * place a pointer to the existing object structure in permp.  A
990  * pointer to the held ID lock is placed in lockp.  ipc_mode's
991  * IPC_ALLOC bit is set.
992  *
993  * Otherwise, a non-zero errno value is returned.
994  */
995 int
996 ipc_get(ipc_service_t *service, key_t key, int flag, kipc_perm_t **permp,
997     kmutex_t **lockp)
998 {
999 	kipc_perm_t	*perm = NULL;
1000 	proc_t		*pp = curproc;
1001 	int		error, index;
1002 	cred_t		*cr = CRED();
1003 
1004 	if (key != IPC_PRIVATE) {
1005 
1006 		mutex_enter(&service->ipcs_lock);
1007 		error = ipc_keylookup(service, key, flag, &perm);
1008 		if (perm != NULL)
1009 			index = ipc_lock_internal(service, perm->ipc_id);
1010 		mutex_exit(&service->ipcs_lock);
1011 
1012 		if (error) {
1013 			ASSERT(perm == NULL);
1014 			return (error);
1015 		}
1016 
1017 		if (perm) {
1018 			ASSERT(!IPC_FREE(perm));
1019 			*permp = perm;
1020 			*lockp = &service->ipcs_table[index].ipct_lock;
1021 			return (0);
1022 		}
1023 
1024 		/* Key not found; fall through */
1025 	}
1026 
1027 	perm = kmem_zalloc(service->ipcs_ssize, KM_SLEEP);
1028 
1029 	mutex_enter(&service->ipcs_lock);
1030 	if (error = ipc_alloc_test(service, pp)) {
1031 		mutex_exit(&service->ipcs_lock);
1032 		kmem_free(perm, service->ipcs_ssize);
1033 		return (error);
1034 	}
1035 
1036 	perm->ipc_cuid = perm->ipc_uid = crgetuid(cr);
1037 	perm->ipc_cgid = perm->ipc_gid = crgetgid(cr);
1038 	perm->ipc_zoneid = getzoneid();
1039 	perm->ipc_mode = flag & 0777;
1040 	perm->ipc_key = key;
1041 	perm->ipc_ref = 1;
1042 	perm->ipc_id = IPC_ID_INVAL;
1043 	*permp = perm;
1044 	*lockp = &service->ipcs_lock;
1045 
1046 	return (0);
1047 }
1048 
1049 /*
1050  * Attempts to add the a newly created ID to the global namespace.  If
1051  * creating it would cause an error, we return the error.  If there is
1052  * the possibility that we could obtain the existing ID and return it
1053  * to the user, we return EAGAIN.  Otherwise, we return 0 with p_lock
1054  * and the service lock held.
1055  *
1056  * Since this should be only called after all initialization has been
1057  * completed, on failure we automatically invoke the destructor for the
1058  * object and deallocate the memory associated with it.
1059  */
1060 int
1061 ipc_commit_begin(ipc_service_t *service, key_t key, int flag,
1062     kipc_perm_t *newperm)
1063 {
1064 	kipc_perm_t *perm;
1065 	int error;
1066 	proc_t *pp = curproc;
1067 
1068 	ASSERT(newperm->ipc_ref == 1);
1069 	ASSERT(IPC_FREE(newperm));
1070 
1071 	mutex_enter(&service->ipcs_lock);
1072 	/*
1073 	 * Ensure that no-one has raced with us and created the key.
1074 	 */
1075 	if ((key != IPC_PRIVATE) &&
1076 	    (((error = ipc_keylookup(service, key, flag, &perm)) != 0) ||
1077 	    (perm != NULL))) {
1078 		error = error ? error : EAGAIN;
1079 		goto errout;
1080 	}
1081 
1082 	/*
1083 	 * Ensure that no-one has raced with us and used the last of
1084 	 * the permissible ids, or the last of the free spaces in the
1085 	 * id table.
1086 	 */
1087 	if (error = ipc_alloc_test(service, pp))
1088 		goto errout;
1089 
1090 	/*
1091 	 * Set ipc_proj so ipc_cleanup cleans up necessary state.
1092 	 */
1093 	newperm->ipc_proj = pp->p_task->tk_proj;
1094 	newperm->ipc_zone = pp->p_zone;
1095 
1096 	ASSERT(MUTEX_HELD(&service->ipcs_lock));
1097 	ASSERT(MUTEX_HELD(&pp->p_lock));
1098 
1099 	return (0);
1100 errout:
1101 	mutex_exit(&service->ipcs_lock);
1102 	service->ipcs_dtor(newperm);
1103 	kmem_free(newperm, service->ipcs_ssize);
1104 	return (error);
1105 }
1106 
1107 /*
1108  * Commit the ID allocation transaction.  Called with p_lock and the
1109  * service lock held, both of which are dropped.  Returns the held ID
1110  * lock so the caller can extract the ID and perform ipcget auditing.
1111  */
1112 kmutex_t *
1113 ipc_commit_end(ipc_service_t *service, kipc_perm_t *perm)
1114 {
1115 	ipc_slot_t *slot;
1116 	avl_index_t where;
1117 	int index;
1118 	void *loc;
1119 
1120 	ASSERT(MUTEX_HELD(&service->ipcs_lock));
1121 	ASSERT(MUTEX_HELD(&curproc->p_lock));
1122 
1123 	(void) project_hold(perm->ipc_proj);
1124 	(void) zone_hold(perm->ipc_zone);
1125 	mutex_exit(&curproc->p_lock);
1126 
1127 	/*
1128 	 * Pick out our slot.
1129 	 */
1130 	service->ipcs_count++;
1131 	index = id_alloc(service->ipcs_ids);
1132 	ASSERT(index < service->ipcs_tabsz);
1133 	slot = &service->ipcs_table[index];
1134 	mutex_enter(&slot->ipct_lock);
1135 	ASSERT(slot->ipct_data == NULL);
1136 
1137 	/*
1138 	 * Update the perm structure.
1139 	 */
1140 	perm->ipc_mode |= IPC_ALLOC;
1141 	perm->ipc_id = (slot->ipct_seq << IPC_SEQ_SHIFT) | index;
1142 
1143 	/*
1144 	 * Push into global visibility.
1145 	 */
1146 	slot->ipct_data = perm;
1147 	if (perm->ipc_key != IPC_PRIVATE) {
1148 		loc = avl_find(&service->ipcs_keys, perm, &where);
1149 		ASSERT(loc == NULL);
1150 		avl_insert(&service->ipcs_keys, perm, where);
1151 	}
1152 	list_insert_head(&service->ipcs_usedids, perm);
1153 
1154 	/*
1155 	 * Update resource consumption.
1156 	 */
1157 	IPC_PROJ_USAGE(perm, service) += 1;
1158 	IPC_ZONE_USAGE(perm, service) += 1;
1159 
1160 	mutex_exit(&service->ipcs_lock);
1161 	return (&slot->ipct_lock);
1162 }
1163 
1164 /*
1165  * Clean up function, in case the allocation fails.  If called between
1166  * ipc_lookup and ipc_commit_begin, perm->ipc_proj will be 0 and we
1167  * merely free the perm structure.  If called after ipc_commit_begin,
1168  * we also drop locks and call the ID's destructor.
1169  */
1170 void
1171 ipc_cleanup(ipc_service_t *service, kipc_perm_t *perm)
1172 {
1173 	ASSERT(IPC_FREE(perm));
1174 	if (perm->ipc_proj) {
1175 		mutex_exit(&curproc->p_lock);
1176 		mutex_exit(&service->ipcs_lock);
1177 		service->ipcs_dtor(perm);
1178 	}
1179 	kmem_free(perm, service->ipcs_ssize);
1180 }
1181 
1182 
1183 /*
1184  * Common code to remove an IPC object.  This should be called after
1185  * all permissions checks have been performed, and with the service
1186  * and ID locked.  Note that this does not remove the object from
1187  * the ipcs_usedids list (this needs to be done by the caller before
1188  * dropping the service lock).
1189  */
1190 static void
1191 ipc_remove(ipc_service_t *service, kipc_perm_t *perm)
1192 {
1193 	int id = perm->ipc_id;
1194 	int index;
1195 
1196 	ASSERT(MUTEX_HELD(&service->ipcs_lock));
1197 	ASSERT(IPC_LOCKED(service, perm));
1198 
1199 	index = IPC_INDEX(id);
1200 
1201 	service->ipcs_table[index].ipct_data = NULL;
1202 
1203 	if (perm->ipc_key != IPC_PRIVATE)
1204 		avl_remove(&service->ipcs_keys, perm);
1205 	list_remove(&service->ipcs_usedids, perm);
1206 	perm->ipc_mode &= ~IPC_ALLOC;
1207 
1208 	id_free(service->ipcs_ids, index);
1209 
1210 	if (service->ipcs_table[index].ipct_seq++ == IPC_SEQ_MASK)
1211 		service->ipcs_table[index].ipct_seq = 0;
1212 	service->ipcs_count--;
1213 	ASSERT(IPC_PROJ_USAGE(perm, service) > 0);
1214 	ASSERT(IPC_ZONE_USAGE(perm, service) > 0);
1215 	IPC_PROJ_USAGE(perm, service) -= 1;
1216 	IPC_ZONE_USAGE(perm, service) -= 1;
1217 	ASSERT(service->ipcs_count || ((IPC_PROJ_USAGE(perm, service) == 0) &&
1218 	    (IPC_ZONE_USAGE(perm, service) == 0)));
1219 }
1220 
1221 
1222 /*
1223  * Common code to perform an IPC_RMID.  Returns an errno value on
1224  * failure, 0 on success.
1225  */
1226 int
1227 ipc_rmid(ipc_service_t *service, int id, cred_t *cr)
1228 {
1229 	kipc_perm_t *perm;
1230 	kmutex_t *lock;
1231 
1232 	mutex_enter(&service->ipcs_lock);
1233 
1234 	lock = ipc_lookup(service, id, &perm);
1235 	if (lock == NULL) {
1236 		mutex_exit(&service->ipcs_lock);
1237 		return (EINVAL);
1238 	}
1239 
1240 	ASSERT(service->ipcs_count > 0);
1241 
1242 	if (secpolicy_ipc_owner(cr, perm) != 0) {
1243 		mutex_exit(lock);
1244 		mutex_exit(&service->ipcs_lock);
1245 		return (EPERM);
1246 	}
1247 
1248 	/*
1249 	 * Nothing can fail from this point on.
1250 	 */
1251 	ipc_remove(service, perm);
1252 	mutex_exit(&service->ipcs_lock);
1253 
1254 	/* perform any per-service removal actions */
1255 	service->ipcs_rmid(perm);
1256 
1257 	ipc_rele(service, perm);
1258 
1259 	return (0);
1260 }
1261 
1262 /*
1263  * Implementation for shmids, semids, and msgids.  buf is the address
1264  * of the user buffer, nids is the size, and pnids is a pointer to
1265  * where we write the actual number of ids that [would] have been
1266  * copied out.
1267  */
1268 int
1269 ipc_ids(ipc_service_t *service, int *buf, uint_t nids, uint_t *pnids)
1270 {
1271 	kipc_perm_t *perm;
1272 	size_t	idsize = 0;
1273 	int	error = 0;
1274 	int	idcount;
1275 	int	*ids;
1276 	int	numids = 0;
1277 	zoneid_t zoneid = getzoneid();
1278 	int	global = INGLOBALZONE(curproc);
1279 
1280 	if (buf == NULL)
1281 		nids = 0;
1282 
1283 	/*
1284 	 * Get an accurate count of the total number of ids, and allocate a
1285 	 * staging buffer.  Since ipcs_count is always sane, we don't have
1286 	 * to take ipcs_lock for our first guess.  If there are no ids, or
1287 	 * we're in the global zone and the number of ids is greater than
1288 	 * the size of the specified buffer, we shunt to the end.  Otherwise,
1289 	 * we go through the id list looking for (and counting) what is
1290 	 * visible in the specified zone.
1291 	 */
1292 	idcount = service->ipcs_count;
1293 	for (;;) {
1294 		if ((global && idcount > nids) || idcount == 0) {
1295 			numids = idcount;
1296 			nids = 0;
1297 			goto out;
1298 		}
1299 
1300 		idsize = idcount * sizeof (int);
1301 		ids = kmem_alloc(idsize, KM_SLEEP);
1302 
1303 		mutex_enter(&service->ipcs_lock);
1304 		if (idcount >= service->ipcs_count)
1305 			break;
1306 		idcount = service->ipcs_count;
1307 		mutex_exit(&service->ipcs_lock);
1308 
1309 		if (idsize != 0) {
1310 			kmem_free(ids, idsize);
1311 			idsize = 0;
1312 		}
1313 	}
1314 
1315 	for (perm = list_head(&service->ipcs_usedids); perm != NULL;
1316 	    perm = list_next(&service->ipcs_usedids, perm)) {
1317 		ASSERT(!IPC_FREE(perm));
1318 		if (global || perm->ipc_zoneid == zoneid)
1319 			ids[numids++] = perm->ipc_id;
1320 	}
1321 	mutex_exit(&service->ipcs_lock);
1322 
1323 	/*
1324 	 * If there isn't enough space to hold all of the ids, just
1325 	 * return the number of ids without copying out any of them.
1326 	 */
1327 	if (nids < numids)
1328 		nids = 0;
1329 
1330 out:
1331 	if (suword32(pnids, (uint32_t)numids) ||
1332 	    (nids != 0 && copyout(ids, buf, numids * sizeof (int))))
1333 		error = EFAULT;
1334 	if (idsize != 0)
1335 		kmem_free(ids, idsize);
1336 	return (error);
1337 }
1338 
1339 /*
1340  * Destroy IPC objects from the given service that are associated with
1341  * the given zone.
1342  *
1343  * We can't hold on to the service lock when freeing objects, so we
1344  * first search the service and move all the objects to a private
1345  * list, then walk through and free them after dropping the lock.
1346  */
1347 void
1348 ipc_remove_zone(ipc_service_t *service, zoneid_t zoneid)
1349 {
1350 	kipc_perm_t *perm, *next;
1351 	list_t rmlist;
1352 	kmutex_t *lock;
1353 
1354 	list_create(&rmlist, sizeof (kipc_perm_t),
1355 	    offsetof(kipc_perm_t, ipc_list));
1356 
1357 	mutex_enter(&service->ipcs_lock);
1358 	for (perm = list_head(&service->ipcs_usedids); perm != NULL;
1359 	    perm = next) {
1360 		next = list_next(&service->ipcs_usedids, perm);
1361 		if (perm->ipc_zoneid != zoneid)
1362 			continue;
1363 
1364 		/*
1365 		 * Remove the object from the service, then put it on
1366 		 * the removal list so we can defer the call to
1367 		 * ipc_rele (which will actually free the structure).
1368 		 * We need to do this since the destructor may grab
1369 		 * the service lock.
1370 		 */
1371 		ASSERT(!IPC_FREE(perm));
1372 		lock = ipc_lock(service, perm->ipc_id);
1373 		ipc_remove(service, perm);
1374 		mutex_exit(lock);
1375 		list_insert_tail(&rmlist, perm);
1376 	}
1377 	mutex_exit(&service->ipcs_lock);
1378 
1379 	/*
1380 	 * Now that we've dropped the service lock, loop through the
1381 	 * private list freeing removed objects.
1382 	 */
1383 	for (perm = list_head(&rmlist); perm != NULL; perm = next) {
1384 		next = list_next(&rmlist, perm);
1385 		list_remove(&rmlist, perm);
1386 
1387 		(void) ipc_lock(service, perm->ipc_id);
1388 
1389 		/* perform any per-service removal actions */
1390 		service->ipcs_rmid(perm);
1391 
1392 		/* release reference */
1393 		ipc_rele(service, perm);
1394 	}
1395 
1396 	list_destroy(&rmlist);
1397 }
1398