xref: /titanic_54/usr/src/uts/common/os/shm.c (revision 7c478bd95313f5f23a4c958a745db2134aa03244)
1*7c478bd9Sstevel@tonic-gate /*
2*7c478bd9Sstevel@tonic-gate  * CDDL HEADER START
3*7c478bd9Sstevel@tonic-gate  *
4*7c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*7c478bd9Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
6*7c478bd9Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
7*7c478bd9Sstevel@tonic-gate  * with the License.
8*7c478bd9Sstevel@tonic-gate  *
9*7c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10*7c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
11*7c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
12*7c478bd9Sstevel@tonic-gate  * and limitations under the License.
13*7c478bd9Sstevel@tonic-gate  *
14*7c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
15*7c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16*7c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
17*7c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
18*7c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
19*7c478bd9Sstevel@tonic-gate  *
20*7c478bd9Sstevel@tonic-gate  * CDDL HEADER END
21*7c478bd9Sstevel@tonic-gate  */
22*7c478bd9Sstevel@tonic-gate /*
23*7c478bd9Sstevel@tonic-gate  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24*7c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
25*7c478bd9Sstevel@tonic-gate  */
26*7c478bd9Sstevel@tonic-gate 
27*7c478bd9Sstevel@tonic-gate /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
28*7c478bd9Sstevel@tonic-gate /*	  All Rights Reserved	*/
29*7c478bd9Sstevel@tonic-gate 
30*7c478bd9Sstevel@tonic-gate /*
31*7c478bd9Sstevel@tonic-gate  * University Copyright- Copyright (c) 1982, 1986, 1988
32*7c478bd9Sstevel@tonic-gate  * The Regents of the University of California
33*7c478bd9Sstevel@tonic-gate  * All Rights Reserved
34*7c478bd9Sstevel@tonic-gate  *
35*7c478bd9Sstevel@tonic-gate  * University Acknowledgment- Portions of this document are derived from
36*7c478bd9Sstevel@tonic-gate  * software developed by the University of California, Berkeley, and its
37*7c478bd9Sstevel@tonic-gate  * contributors.
38*7c478bd9Sstevel@tonic-gate  */
39*7c478bd9Sstevel@tonic-gate 
40*7c478bd9Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
41*7c478bd9Sstevel@tonic-gate 
42*7c478bd9Sstevel@tonic-gate /*
43*7c478bd9Sstevel@tonic-gate  * Inter-Process Communication Shared Memory Facility.
44*7c478bd9Sstevel@tonic-gate  *
45*7c478bd9Sstevel@tonic-gate  * See os/ipc.c for a description of common IPC functionality.
46*7c478bd9Sstevel@tonic-gate  *
47*7c478bd9Sstevel@tonic-gate  * Resource controls
48*7c478bd9Sstevel@tonic-gate  * -----------------
49*7c478bd9Sstevel@tonic-gate  *
50*7c478bd9Sstevel@tonic-gate  * Control:      project.max-shm-ids (rc_project_shmmni)
51*7c478bd9Sstevel@tonic-gate  * Description:  Maximum number of shared memory ids allowed a project.
52*7c478bd9Sstevel@tonic-gate  *
53*7c478bd9Sstevel@tonic-gate  *   When shmget() is used to allocate a shared memory segment, one id
54*7c478bd9Sstevel@tonic-gate  *   is allocated.  If the id allocation doesn't succeed, shmget()
55*7c478bd9Sstevel@tonic-gate  *   fails and errno is set to ENOSPC.  Upon successful shmctl(,
56*7c478bd9Sstevel@tonic-gate  *   IPC_RMID) the id is deallocated.
57*7c478bd9Sstevel@tonic-gate  *
58*7c478bd9Sstevel@tonic-gate  * Control:      project.max-shm-memory (rc_project_shmmax)
59*7c478bd9Sstevel@tonic-gate  * Description:  Total amount of shared memory allowed a project.
60*7c478bd9Sstevel@tonic-gate  *
61*7c478bd9Sstevel@tonic-gate  *   When shmget() is used to allocate a shared memory segment, the
62*7c478bd9Sstevel@tonic-gate  *   segment's size is allocated against this limit.  If the space
63*7c478bd9Sstevel@tonic-gate  *   allocation doesn't succeed, shmget() fails and errno is set to
64*7c478bd9Sstevel@tonic-gate  *   EINVAL.  The size will be deallocated once the last process has
65*7c478bd9Sstevel@tonic-gate  *   detached the segment and the segment has been successfully
66*7c478bd9Sstevel@tonic-gate  *   shmctl(, IPC_RMID)ed.
67*7c478bd9Sstevel@tonic-gate  */
68*7c478bd9Sstevel@tonic-gate 
69*7c478bd9Sstevel@tonic-gate #include <sys/types.h>
70*7c478bd9Sstevel@tonic-gate #include <sys/param.h>
71*7c478bd9Sstevel@tonic-gate #include <sys/cred.h>
72*7c478bd9Sstevel@tonic-gate #include <sys/errno.h>
73*7c478bd9Sstevel@tonic-gate #include <sys/time.h>
74*7c478bd9Sstevel@tonic-gate #include <sys/kmem.h>
75*7c478bd9Sstevel@tonic-gate #include <sys/user.h>
76*7c478bd9Sstevel@tonic-gate #include <sys/proc.h>
77*7c478bd9Sstevel@tonic-gate #include <sys/systm.h>
78*7c478bd9Sstevel@tonic-gate #include <sys/prsystm.h>
79*7c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h>
80*7c478bd9Sstevel@tonic-gate #include <sys/tuneable.h>
81*7c478bd9Sstevel@tonic-gate #include <sys/vm.h>
82*7c478bd9Sstevel@tonic-gate #include <sys/mman.h>
83*7c478bd9Sstevel@tonic-gate #include <sys/swap.h>
84*7c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h>
85*7c478bd9Sstevel@tonic-gate #include <sys/debug.h>
86*7c478bd9Sstevel@tonic-gate #include <sys/lwpchan_impl.h>
87*7c478bd9Sstevel@tonic-gate #include <sys/avl.h>
88*7c478bd9Sstevel@tonic-gate #include <sys/modctl.h>
89*7c478bd9Sstevel@tonic-gate #include <sys/syscall.h>
90*7c478bd9Sstevel@tonic-gate #include <sys/task.h>
91*7c478bd9Sstevel@tonic-gate #include <sys/project.h>
92*7c478bd9Sstevel@tonic-gate #include <sys/policy.h>
93*7c478bd9Sstevel@tonic-gate #include <sys/zone.h>
94*7c478bd9Sstevel@tonic-gate 
95*7c478bd9Sstevel@tonic-gate #include <sys/ipc.h>
96*7c478bd9Sstevel@tonic-gate #include <sys/ipc_impl.h>
97*7c478bd9Sstevel@tonic-gate #include <sys/shm.h>
98*7c478bd9Sstevel@tonic-gate #include <sys/shm_impl.h>
99*7c478bd9Sstevel@tonic-gate 
100*7c478bd9Sstevel@tonic-gate #include <vm/hat.h>
101*7c478bd9Sstevel@tonic-gate #include <vm/seg.h>
102*7c478bd9Sstevel@tonic-gate #include <vm/as.h>
103*7c478bd9Sstevel@tonic-gate #include <vm/seg_vn.h>
104*7c478bd9Sstevel@tonic-gate #include <vm/anon.h>
105*7c478bd9Sstevel@tonic-gate #include <vm/page.h>
106*7c478bd9Sstevel@tonic-gate #include <vm/vpage.h>
107*7c478bd9Sstevel@tonic-gate #include <vm/seg_spt.h>
108*7c478bd9Sstevel@tonic-gate 
109*7c478bd9Sstevel@tonic-gate #include <c2/audit.h>
110*7c478bd9Sstevel@tonic-gate 
111*7c478bd9Sstevel@tonic-gate static int shmem_lock(struct anon_map *amp);
112*7c478bd9Sstevel@tonic-gate static void shmem_unlock(struct anon_map *amp, uint_t lck);
113*7c478bd9Sstevel@tonic-gate static void sa_add(struct proc *pp, caddr_t addr, size_t len, ulong_t flags,
114*7c478bd9Sstevel@tonic-gate 	kshmid_t *id);
115*7c478bd9Sstevel@tonic-gate static void shm_rm_amp(struct anon_map *amp, uint_t lckflag);
116*7c478bd9Sstevel@tonic-gate static void shm_dtor(kipc_perm_t *);
117*7c478bd9Sstevel@tonic-gate static void shm_rmid(kipc_perm_t *);
118*7c478bd9Sstevel@tonic-gate static void shm_remove_zone(zoneid_t, void *);
119*7c478bd9Sstevel@tonic-gate 
120*7c478bd9Sstevel@tonic-gate /*
121*7c478bd9Sstevel@tonic-gate  * Semantics for share_page_table and ism_off:
122*7c478bd9Sstevel@tonic-gate  *
123*7c478bd9Sstevel@tonic-gate  * These are hooks in /etc/system - only for internal testing purpose.
124*7c478bd9Sstevel@tonic-gate  *
125*7c478bd9Sstevel@tonic-gate  * Setting share_page_table automatically turns on the SHM_SHARE_MMU (ISM) flag
126*7c478bd9Sstevel@tonic-gate  * in a call to shmat(2). In other words, with share_page_table set, you always
127*7c478bd9Sstevel@tonic-gate  * get ISM, even if say, DISM is specified. It should really be called "ism_on".
128*7c478bd9Sstevel@tonic-gate  *
129*7c478bd9Sstevel@tonic-gate  * Setting ism_off turns off the SHM_SHARE_MMU flag from the flags passed to
130*7c478bd9Sstevel@tonic-gate  * shmat(2).
131*7c478bd9Sstevel@tonic-gate  *
132*7c478bd9Sstevel@tonic-gate  * If both share_page_table and ism_off are set, share_page_table prevails.
133*7c478bd9Sstevel@tonic-gate  *
134*7c478bd9Sstevel@tonic-gate  * Although these tunables should probably be removed, they do have some
135*7c478bd9Sstevel@tonic-gate  * external exposure; as long as they exist, they should at least work sensibly.
136*7c478bd9Sstevel@tonic-gate  */
137*7c478bd9Sstevel@tonic-gate 
138*7c478bd9Sstevel@tonic-gate int share_page_table;
139*7c478bd9Sstevel@tonic-gate int ism_off;
140*7c478bd9Sstevel@tonic-gate 
141*7c478bd9Sstevel@tonic-gate /*
142*7c478bd9Sstevel@tonic-gate  * The following tunables are obsolete.  Though for compatibility we
143*7c478bd9Sstevel@tonic-gate  * still read and interpret shminfo_shmmax and shminfo_shmmni (see
144*7c478bd9Sstevel@tonic-gate  * os/project.c), the preferred mechanism for administrating the IPC
145*7c478bd9Sstevel@tonic-gate  * Shared Memory facility is through the resource controls described at
146*7c478bd9Sstevel@tonic-gate  * the top of this file.
147*7c478bd9Sstevel@tonic-gate  */
148*7c478bd9Sstevel@tonic-gate size_t	shminfo_shmmax = 0x800000;	/* (obsolete) */
149*7c478bd9Sstevel@tonic-gate int	shminfo_shmmni = 100;		/* (obsolete) */
150*7c478bd9Sstevel@tonic-gate size_t	shminfo_shmmin = 1;		/* (obsolete) */
151*7c478bd9Sstevel@tonic-gate int	shminfo_shmseg = 6;		/* (obsolete) */
152*7c478bd9Sstevel@tonic-gate 
153*7c478bd9Sstevel@tonic-gate extern rctl_hndl_t rc_project_shmmax;
154*7c478bd9Sstevel@tonic-gate extern rctl_hndl_t rc_project_shmmni;
155*7c478bd9Sstevel@tonic-gate static ipc_service_t *shm_svc;
156*7c478bd9Sstevel@tonic-gate static zone_key_t shm_zone_key;
157*7c478bd9Sstevel@tonic-gate 
158*7c478bd9Sstevel@tonic-gate /*
159*7c478bd9Sstevel@tonic-gate  * Module linkage information for the kernel.
160*7c478bd9Sstevel@tonic-gate  */
161*7c478bd9Sstevel@tonic-gate static uintptr_t shmsys(int, uintptr_t, uintptr_t, uintptr_t);
162*7c478bd9Sstevel@tonic-gate 
163*7c478bd9Sstevel@tonic-gate static struct sysent ipcshm_sysent = {
164*7c478bd9Sstevel@tonic-gate 	4,
165*7c478bd9Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
166*7c478bd9Sstevel@tonic-gate 	SE_ARGC | SE_NOUNLOAD | SE_64RVAL,
167*7c478bd9Sstevel@tonic-gate #else	/* _SYSCALL32_IMPL */
168*7c478bd9Sstevel@tonic-gate 	SE_ARGC | SE_NOUNLOAD | SE_32RVAL1,
169*7c478bd9Sstevel@tonic-gate #endif	/* _SYSCALL32_IMPL */
170*7c478bd9Sstevel@tonic-gate 	(int (*)())shmsys
171*7c478bd9Sstevel@tonic-gate };
172*7c478bd9Sstevel@tonic-gate 
173*7c478bd9Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
174*7c478bd9Sstevel@tonic-gate static struct sysent ipcshm_sysent32 = {
175*7c478bd9Sstevel@tonic-gate 	4,
176*7c478bd9Sstevel@tonic-gate 	SE_ARGC | SE_NOUNLOAD | SE_32RVAL1,
177*7c478bd9Sstevel@tonic-gate 	(int (*)())shmsys
178*7c478bd9Sstevel@tonic-gate };
179*7c478bd9Sstevel@tonic-gate #endif	/* _SYSCALL32_IMPL */
180*7c478bd9Sstevel@tonic-gate 
181*7c478bd9Sstevel@tonic-gate static struct modlsys modlsys = {
182*7c478bd9Sstevel@tonic-gate 	&mod_syscallops, "System V shared memory", &ipcshm_sysent
183*7c478bd9Sstevel@tonic-gate };
184*7c478bd9Sstevel@tonic-gate 
185*7c478bd9Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
186*7c478bd9Sstevel@tonic-gate static struct modlsys modlsys32 = {
187*7c478bd9Sstevel@tonic-gate 	&mod_syscallops32, "32-bit System V shared memory", &ipcshm_sysent32
188*7c478bd9Sstevel@tonic-gate };
189*7c478bd9Sstevel@tonic-gate #endif	/* _SYSCALL32_IMPL */
190*7c478bd9Sstevel@tonic-gate 
191*7c478bd9Sstevel@tonic-gate static struct modlinkage modlinkage = {
192*7c478bd9Sstevel@tonic-gate 	MODREV_1,
193*7c478bd9Sstevel@tonic-gate 	&modlsys,
194*7c478bd9Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
195*7c478bd9Sstevel@tonic-gate 	&modlsys32,
196*7c478bd9Sstevel@tonic-gate #endif
197*7c478bd9Sstevel@tonic-gate 	NULL
198*7c478bd9Sstevel@tonic-gate };
199*7c478bd9Sstevel@tonic-gate 
200*7c478bd9Sstevel@tonic-gate 
201*7c478bd9Sstevel@tonic-gate int
202*7c478bd9Sstevel@tonic-gate _init(void)
203*7c478bd9Sstevel@tonic-gate {
204*7c478bd9Sstevel@tonic-gate 	int result;
205*7c478bd9Sstevel@tonic-gate 
206*7c478bd9Sstevel@tonic-gate 	shm_svc = ipcs_create("shmids", rc_project_shmmni, sizeof (kshmid_t),
207*7c478bd9Sstevel@tonic-gate 	    shm_dtor, shm_rmid, AT_IPC_SHM,
208*7c478bd9Sstevel@tonic-gate 	    offsetof(kproject_data_t, kpd_shmmni));
209*7c478bd9Sstevel@tonic-gate 	zone_key_create(&shm_zone_key, NULL, shm_remove_zone, NULL);
210*7c478bd9Sstevel@tonic-gate 
211*7c478bd9Sstevel@tonic-gate 	if ((result = mod_install(&modlinkage)) == 0)
212*7c478bd9Sstevel@tonic-gate 		return (0);
213*7c478bd9Sstevel@tonic-gate 
214*7c478bd9Sstevel@tonic-gate 	(void) zone_key_delete(shm_zone_key);
215*7c478bd9Sstevel@tonic-gate 	ipcs_destroy(shm_svc);
216*7c478bd9Sstevel@tonic-gate 
217*7c478bd9Sstevel@tonic-gate 	return (result);
218*7c478bd9Sstevel@tonic-gate }
219*7c478bd9Sstevel@tonic-gate 
220*7c478bd9Sstevel@tonic-gate int
221*7c478bd9Sstevel@tonic-gate _fini(void)
222*7c478bd9Sstevel@tonic-gate {
223*7c478bd9Sstevel@tonic-gate 	return (EBUSY);
224*7c478bd9Sstevel@tonic-gate }
225*7c478bd9Sstevel@tonic-gate 
226*7c478bd9Sstevel@tonic-gate int
227*7c478bd9Sstevel@tonic-gate _info(struct modinfo *modinfop)
228*7c478bd9Sstevel@tonic-gate {
229*7c478bd9Sstevel@tonic-gate 	return (mod_info(&modlinkage, modinfop));
230*7c478bd9Sstevel@tonic-gate }
231*7c478bd9Sstevel@tonic-gate 
232*7c478bd9Sstevel@tonic-gate /*
233*7c478bd9Sstevel@tonic-gate  * Shmat (attach shared segment) system call.
234*7c478bd9Sstevel@tonic-gate  */
235*7c478bd9Sstevel@tonic-gate static int
236*7c478bd9Sstevel@tonic-gate shmat(int shmid, caddr_t uaddr, int uflags, uintptr_t *rvp)
237*7c478bd9Sstevel@tonic-gate {
238*7c478bd9Sstevel@tonic-gate 	kshmid_t *sp;	/* shared memory header ptr */
239*7c478bd9Sstevel@tonic-gate 	size_t	size;
240*7c478bd9Sstevel@tonic-gate 	int	error = 0;
241*7c478bd9Sstevel@tonic-gate 	proc_t *pp = curproc;
242*7c478bd9Sstevel@tonic-gate 	struct as *as = pp->p_as;
243*7c478bd9Sstevel@tonic-gate 	struct segvn_crargs	crargs;	/* segvn create arguments */
244*7c478bd9Sstevel@tonic-gate 	kmutex_t	*lock;
245*7c478bd9Sstevel@tonic-gate 	struct seg 	*segspt = NULL;
246*7c478bd9Sstevel@tonic-gate 	caddr_t		addr = uaddr;
247*7c478bd9Sstevel@tonic-gate 	int		flags = (uflags & SHMAT_VALID_FLAGS_MASK);
248*7c478bd9Sstevel@tonic-gate 	int		useISM;
249*7c478bd9Sstevel@tonic-gate 	uchar_t		prot = PROT_ALL;
250*7c478bd9Sstevel@tonic-gate 	int result;
251*7c478bd9Sstevel@tonic-gate 
252*7c478bd9Sstevel@tonic-gate 	if ((lock = ipc_lookup(shm_svc, shmid, (kipc_perm_t **)&sp)) == NULL)
253*7c478bd9Sstevel@tonic-gate 		return (EINVAL);
254*7c478bd9Sstevel@tonic-gate 	if (error = ipcperm_access(&sp->shm_perm, SHM_R, CRED()))
255*7c478bd9Sstevel@tonic-gate 		goto errret;
256*7c478bd9Sstevel@tonic-gate 	if ((flags & SHM_RDONLY) == 0 &&
257*7c478bd9Sstevel@tonic-gate 	    (error = ipcperm_access(&sp->shm_perm, SHM_W, CRED())))
258*7c478bd9Sstevel@tonic-gate 		goto errret;
259*7c478bd9Sstevel@tonic-gate 	if (spt_invalid(flags)) {
260*7c478bd9Sstevel@tonic-gate 		error = EINVAL;
261*7c478bd9Sstevel@tonic-gate 		goto errret;
262*7c478bd9Sstevel@tonic-gate 	}
263*7c478bd9Sstevel@tonic-gate 	if (ism_off)
264*7c478bd9Sstevel@tonic-gate 		flags = flags & ~SHM_SHARE_MMU;
265*7c478bd9Sstevel@tonic-gate 	if (share_page_table) {
266*7c478bd9Sstevel@tonic-gate 		flags = flags & ~SHM_PAGEABLE;
267*7c478bd9Sstevel@tonic-gate 		flags = flags | SHM_SHARE_MMU;
268*7c478bd9Sstevel@tonic-gate 	}
269*7c478bd9Sstevel@tonic-gate 	useISM = (spt_locked(flags) || spt_pageable(flags));
270*7c478bd9Sstevel@tonic-gate 	if (useISM && (error = ipcperm_access(&sp->shm_perm, SHM_W, CRED())))
271*7c478bd9Sstevel@tonic-gate 		goto errret;
272*7c478bd9Sstevel@tonic-gate 	if (useISM && isspt(sp)) {
273*7c478bd9Sstevel@tonic-gate 		uint_t newsptflags = flags | spt_flags(sp->shm_sptseg);
274*7c478bd9Sstevel@tonic-gate 		/*
275*7c478bd9Sstevel@tonic-gate 		 * If trying to change an existing {D}ISM segment from ISM
276*7c478bd9Sstevel@tonic-gate 		 * to DISM or vice versa, return error. Note that this
277*7c478bd9Sstevel@tonic-gate 		 * validation of flags needs to be done after the effect of
278*7c478bd9Sstevel@tonic-gate 		 * tunables such as ism_off and share_page_table, for
279*7c478bd9Sstevel@tonic-gate 		 * semantics that are consistent with the tunables' settings.
280*7c478bd9Sstevel@tonic-gate 		 */
281*7c478bd9Sstevel@tonic-gate 		if (spt_invalid(newsptflags)) {
282*7c478bd9Sstevel@tonic-gate 			error = EINVAL;
283*7c478bd9Sstevel@tonic-gate 			goto errret;
284*7c478bd9Sstevel@tonic-gate 		}
285*7c478bd9Sstevel@tonic-gate 	}
286*7c478bd9Sstevel@tonic-gate 	ANON_LOCK_ENTER(&sp->shm_amp->a_rwlock, RW_WRITER);
287*7c478bd9Sstevel@tonic-gate 	size = sp->shm_amp->size;
288*7c478bd9Sstevel@tonic-gate 	ANON_LOCK_EXIT(&sp->shm_amp->a_rwlock);
289*7c478bd9Sstevel@tonic-gate 
290*7c478bd9Sstevel@tonic-gate 	/* somewhere to record spt info for final detach */
291*7c478bd9Sstevel@tonic-gate 	if (sp->shm_sptinfo == NULL)
292*7c478bd9Sstevel@tonic-gate 		sp->shm_sptinfo = kmem_zalloc(sizeof (sptinfo_t), KM_SLEEP);
293*7c478bd9Sstevel@tonic-gate 
294*7c478bd9Sstevel@tonic-gate 	as_rangelock(as);
295*7c478bd9Sstevel@tonic-gate 
296*7c478bd9Sstevel@tonic-gate 	if (useISM) {
297*7c478bd9Sstevel@tonic-gate 		/*
298*7c478bd9Sstevel@tonic-gate 		 * Handle ISM
299*7c478bd9Sstevel@tonic-gate 		 */
300*7c478bd9Sstevel@tonic-gate 		uint_t	n, share_szc;
301*7c478bd9Sstevel@tonic-gate 		size_t	share_size;
302*7c478bd9Sstevel@tonic-gate 		struct	shm_data ssd;
303*7c478bd9Sstevel@tonic-gate 		uintptr_t align_hint;
304*7c478bd9Sstevel@tonic-gate 
305*7c478bd9Sstevel@tonic-gate 		n = page_num_pagesizes();
306*7c478bd9Sstevel@tonic-gate 		if (n < 2) { /* large pages aren't supported */
307*7c478bd9Sstevel@tonic-gate 			as_rangeunlock(as);
308*7c478bd9Sstevel@tonic-gate 			error = EINVAL;
309*7c478bd9Sstevel@tonic-gate 			goto errret;
310*7c478bd9Sstevel@tonic-gate 		}
311*7c478bd9Sstevel@tonic-gate 
312*7c478bd9Sstevel@tonic-gate 		/*
313*7c478bd9Sstevel@tonic-gate 		 * Pick a share pagesize to use, if (!isspt(sp)).
314*7c478bd9Sstevel@tonic-gate 		 * Otherwise use the already chosen page size.
315*7c478bd9Sstevel@tonic-gate 		 *
316*7c478bd9Sstevel@tonic-gate 		 * For the initial shmat (!isspt(sp)), where sptcreate is
317*7c478bd9Sstevel@tonic-gate 		 * called, map_pgsz is called to recommend a [D]ISM pagesize,
318*7c478bd9Sstevel@tonic-gate 		 * important for systems which offer more than one potential
319*7c478bd9Sstevel@tonic-gate 		 * [D]ISM pagesize.
320*7c478bd9Sstevel@tonic-gate 		 * If the shmat is just to attach to an already created
321*7c478bd9Sstevel@tonic-gate 		 * [D]ISM segment, then use the previously selected page size.
322*7c478bd9Sstevel@tonic-gate 		 */
323*7c478bd9Sstevel@tonic-gate 		if (!isspt(sp)) {
324*7c478bd9Sstevel@tonic-gate 			share_size = map_pgsz(MAPPGSZ_ISM,
325*7c478bd9Sstevel@tonic-gate 			    pp, addr, size, NULL);
326*7c478bd9Sstevel@tonic-gate 			if (share_size == 0) {
327*7c478bd9Sstevel@tonic-gate 				as_rangeunlock(as);
328*7c478bd9Sstevel@tonic-gate 				error = EINVAL;
329*7c478bd9Sstevel@tonic-gate 				goto errret;
330*7c478bd9Sstevel@tonic-gate 			}
331*7c478bd9Sstevel@tonic-gate 			share_szc = page_szc(share_size);
332*7c478bd9Sstevel@tonic-gate 		} else {
333*7c478bd9Sstevel@tonic-gate 			share_szc = sp->shm_sptseg->s_szc;
334*7c478bd9Sstevel@tonic-gate 			share_size = page_get_pagesize(share_szc);
335*7c478bd9Sstevel@tonic-gate 		}
336*7c478bd9Sstevel@tonic-gate 		size = P2ROUNDUP(size, share_size);
337*7c478bd9Sstevel@tonic-gate 
338*7c478bd9Sstevel@tonic-gate 		align_hint = share_size;
339*7c478bd9Sstevel@tonic-gate #if defined(__i386) || defined(__amd64)
340*7c478bd9Sstevel@tonic-gate 		/*
341*7c478bd9Sstevel@tonic-gate 		 * For 64 bit amd64, we want to share an entire page table
342*7c478bd9Sstevel@tonic-gate 		 * if possible. We know (ugh) that there are 512 entries in
343*7c478bd9Sstevel@tonic-gate 		 * in a page table. The number for 32 bit non-PAE should be
344*7c478bd9Sstevel@tonic-gate 		 * 1024, but I'm not going to special case that. Note using 512
345*7c478bd9Sstevel@tonic-gate 		 * won't cause a failure below. It retries with align_hint set
346*7c478bd9Sstevel@tonic-gate 		 * to share_size
347*7c478bd9Sstevel@tonic-gate 		 */
348*7c478bd9Sstevel@tonic-gate 		while (size >= 512 * (uint64_t)align_hint)
349*7c478bd9Sstevel@tonic-gate 			align_hint *= 512;
350*7c478bd9Sstevel@tonic-gate #endif /* __i386 || __amd64 */
351*7c478bd9Sstevel@tonic-gate 
352*7c478bd9Sstevel@tonic-gate #if defined(__sparcv9)
353*7c478bd9Sstevel@tonic-gate 		if (addr == 0 && curproc->p_model == DATAMODEL_LP64) {
354*7c478bd9Sstevel@tonic-gate 			/*
355*7c478bd9Sstevel@tonic-gate 			 * If no address has been passed in, and this is a
356*7c478bd9Sstevel@tonic-gate 			 * 64-bit process, we'll try to find an address
357*7c478bd9Sstevel@tonic-gate 			 * in the predict-ISM zone.
358*7c478bd9Sstevel@tonic-gate 			 */
359*7c478bd9Sstevel@tonic-gate 			caddr_t predbase = (caddr_t)PREDISM_1T_BASE;
360*7c478bd9Sstevel@tonic-gate 			size_t len = PREDISM_BOUND - PREDISM_1T_BASE;
361*7c478bd9Sstevel@tonic-gate 
362*7c478bd9Sstevel@tonic-gate 			as_purge(as);
363*7c478bd9Sstevel@tonic-gate 			if (as_gap(as, size + share_size, &predbase, &len,
364*7c478bd9Sstevel@tonic-gate 			    AH_LO, (caddr_t)NULL) != -1) {
365*7c478bd9Sstevel@tonic-gate 				/*
366*7c478bd9Sstevel@tonic-gate 				 * We found an address which looks like a
367*7c478bd9Sstevel@tonic-gate 				 * candidate.  We want to round it up, and
368*7c478bd9Sstevel@tonic-gate 				 * then check that it's a valid user range.
369*7c478bd9Sstevel@tonic-gate 				 * This assures that we won't fail below.
370*7c478bd9Sstevel@tonic-gate 				 */
371*7c478bd9Sstevel@tonic-gate 				addr = (caddr_t)P2ROUNDUP((uintptr_t)predbase,
372*7c478bd9Sstevel@tonic-gate 				    share_size);
373*7c478bd9Sstevel@tonic-gate 
374*7c478bd9Sstevel@tonic-gate 				if (valid_usr_range(addr, size, prot,
375*7c478bd9Sstevel@tonic-gate 				    as, as->a_userlimit) != RANGE_OKAY) {
376*7c478bd9Sstevel@tonic-gate 					addr = 0;
377*7c478bd9Sstevel@tonic-gate 				}
378*7c478bd9Sstevel@tonic-gate 			}
379*7c478bd9Sstevel@tonic-gate 		}
380*7c478bd9Sstevel@tonic-gate #endif /* __sparcv9 */
381*7c478bd9Sstevel@tonic-gate 
382*7c478bd9Sstevel@tonic-gate 		if (addr == 0) {
383*7c478bd9Sstevel@tonic-gate 			for (;;) {
384*7c478bd9Sstevel@tonic-gate 				addr = (caddr_t)align_hint;
385*7c478bd9Sstevel@tonic-gate 				map_addr(&addr, size, 0ll, 1, MAP_ALIGN);
386*7c478bd9Sstevel@tonic-gate 				if (addr != NULL || align_hint == share_size)
387*7c478bd9Sstevel@tonic-gate 					break;
388*7c478bd9Sstevel@tonic-gate 				align_hint = share_size;
389*7c478bd9Sstevel@tonic-gate 			}
390*7c478bd9Sstevel@tonic-gate 			if (addr == NULL) {
391*7c478bd9Sstevel@tonic-gate 				as_rangeunlock(as);
392*7c478bd9Sstevel@tonic-gate 				error = ENOMEM;
393*7c478bd9Sstevel@tonic-gate 				goto errret;
394*7c478bd9Sstevel@tonic-gate 			}
395*7c478bd9Sstevel@tonic-gate 			ASSERT(((uintptr_t)addr & (align_hint - 1)) == 0);
396*7c478bd9Sstevel@tonic-gate 		} else {
397*7c478bd9Sstevel@tonic-gate 			/* Use the user-supplied attach address */
398*7c478bd9Sstevel@tonic-gate 			caddr_t base;
399*7c478bd9Sstevel@tonic-gate 			size_t len;
400*7c478bd9Sstevel@tonic-gate 
401*7c478bd9Sstevel@tonic-gate 			/*
402*7c478bd9Sstevel@tonic-gate 			 * Check that the address range
403*7c478bd9Sstevel@tonic-gate 			 *  1) is properly aligned
404*7c478bd9Sstevel@tonic-gate 			 *  2) is correct in unix terms
405*7c478bd9Sstevel@tonic-gate 			 *  3) is within an unmapped address segment
406*7c478bd9Sstevel@tonic-gate 			 */
407*7c478bd9Sstevel@tonic-gate 			base = addr;
408*7c478bd9Sstevel@tonic-gate 			len = size;		/* use spt aligned size */
409*7c478bd9Sstevel@tonic-gate 			/* XXX - in SunOS, is sp->shm_segsz */
410*7c478bd9Sstevel@tonic-gate 			if ((uintptr_t)base & (share_size - 1)) {
411*7c478bd9Sstevel@tonic-gate 				error = EINVAL;
412*7c478bd9Sstevel@tonic-gate 				as_rangeunlock(as);
413*7c478bd9Sstevel@tonic-gate 				goto errret;
414*7c478bd9Sstevel@tonic-gate 			}
415*7c478bd9Sstevel@tonic-gate 			result = valid_usr_range(base, len, prot, as,
416*7c478bd9Sstevel@tonic-gate 			    as->a_userlimit);
417*7c478bd9Sstevel@tonic-gate 			if (result == RANGE_BADPROT) {
418*7c478bd9Sstevel@tonic-gate 				/*
419*7c478bd9Sstevel@tonic-gate 				 * We try to accomodate processors which
420*7c478bd9Sstevel@tonic-gate 				 * may not support execute permissions on
421*7c478bd9Sstevel@tonic-gate 				 * all ISM segments by trying the check
422*7c478bd9Sstevel@tonic-gate 				 * again but without PROT_EXEC.
423*7c478bd9Sstevel@tonic-gate 				 */
424*7c478bd9Sstevel@tonic-gate 				prot &= ~PROT_EXEC;
425*7c478bd9Sstevel@tonic-gate 				result = valid_usr_range(base, len, prot, as,
426*7c478bd9Sstevel@tonic-gate 				    as->a_userlimit);
427*7c478bd9Sstevel@tonic-gate 			}
428*7c478bd9Sstevel@tonic-gate 			as_purge(as);
429*7c478bd9Sstevel@tonic-gate 			if (result != RANGE_OKAY ||
430*7c478bd9Sstevel@tonic-gate 			    as_gap(as, len, &base, &len, AH_LO,
431*7c478bd9Sstevel@tonic-gate 			    (caddr_t)NULL) != 0) {
432*7c478bd9Sstevel@tonic-gate 				error = EINVAL;
433*7c478bd9Sstevel@tonic-gate 				as_rangeunlock(as);
434*7c478bd9Sstevel@tonic-gate 				goto errret;
435*7c478bd9Sstevel@tonic-gate 			}
436*7c478bd9Sstevel@tonic-gate 		}
437*7c478bd9Sstevel@tonic-gate 
438*7c478bd9Sstevel@tonic-gate 		if (!isspt(sp)) {
439*7c478bd9Sstevel@tonic-gate 			error = sptcreate(size, &segspt, sp->shm_amp, prot,
440*7c478bd9Sstevel@tonic-gate 			    flags, share_szc);
441*7c478bd9Sstevel@tonic-gate 			if (error) {
442*7c478bd9Sstevel@tonic-gate 				as_rangeunlock(as);
443*7c478bd9Sstevel@tonic-gate 				goto errret;
444*7c478bd9Sstevel@tonic-gate 			}
445*7c478bd9Sstevel@tonic-gate 			sp->shm_sptinfo->sptas = segspt->s_as;
446*7c478bd9Sstevel@tonic-gate 			sp->shm_sptseg = segspt;
447*7c478bd9Sstevel@tonic-gate 			sp->shm_sptprot = prot;
448*7c478bd9Sstevel@tonic-gate 			sp->shm_lkcnt = 0;
449*7c478bd9Sstevel@tonic-gate 		} else if ((prot & sp->shm_sptprot) != sp->shm_sptprot) {
450*7c478bd9Sstevel@tonic-gate 			/*
451*7c478bd9Sstevel@tonic-gate 			 * Ensure we're attaching to an ISM segment with
452*7c478bd9Sstevel@tonic-gate 			 * fewer or equal permissions than what we're
453*7c478bd9Sstevel@tonic-gate 			 * allowed.  Fail if the segment has more
454*7c478bd9Sstevel@tonic-gate 			 * permissions than what we're allowed.
455*7c478bd9Sstevel@tonic-gate 			 */
456*7c478bd9Sstevel@tonic-gate 			error = EACCES;
457*7c478bd9Sstevel@tonic-gate 			as_rangeunlock(as);
458*7c478bd9Sstevel@tonic-gate 			goto errret;
459*7c478bd9Sstevel@tonic-gate 		}
460*7c478bd9Sstevel@tonic-gate 
461*7c478bd9Sstevel@tonic-gate 		ssd.shm_sptseg = sp->shm_sptseg;
462*7c478bd9Sstevel@tonic-gate 		ssd.shm_sptas = sp->shm_sptinfo->sptas;
463*7c478bd9Sstevel@tonic-gate 		ssd.shm_amp = sp->shm_amp;
464*7c478bd9Sstevel@tonic-gate 		error = as_map(as, addr, size, segspt_shmattach, &ssd);
465*7c478bd9Sstevel@tonic-gate 		if (error == 0)
466*7c478bd9Sstevel@tonic-gate 			sp->shm_ismattch++; /* keep count of ISM attaches */
467*7c478bd9Sstevel@tonic-gate 	} else {
468*7c478bd9Sstevel@tonic-gate 
469*7c478bd9Sstevel@tonic-gate 		/*
470*7c478bd9Sstevel@tonic-gate 		 * Normal case.
471*7c478bd9Sstevel@tonic-gate 		 */
472*7c478bd9Sstevel@tonic-gate 		if (flags & SHM_RDONLY)
473*7c478bd9Sstevel@tonic-gate 			prot &= ~PROT_WRITE;
474*7c478bd9Sstevel@tonic-gate 
475*7c478bd9Sstevel@tonic-gate 		if (addr == 0) {
476*7c478bd9Sstevel@tonic-gate 			/* Let the system pick the attach address */
477*7c478bd9Sstevel@tonic-gate 			map_addr(&addr, size, 0ll, 1, 0);
478*7c478bd9Sstevel@tonic-gate 			if (addr == NULL) {
479*7c478bd9Sstevel@tonic-gate 				as_rangeunlock(as);
480*7c478bd9Sstevel@tonic-gate 				error = ENOMEM;
481*7c478bd9Sstevel@tonic-gate 				goto errret;
482*7c478bd9Sstevel@tonic-gate 			}
483*7c478bd9Sstevel@tonic-gate 		} else {
484*7c478bd9Sstevel@tonic-gate 			/* Use the user-supplied attach address */
485*7c478bd9Sstevel@tonic-gate 			caddr_t base;
486*7c478bd9Sstevel@tonic-gate 			size_t len;
487*7c478bd9Sstevel@tonic-gate 
488*7c478bd9Sstevel@tonic-gate 			if (flags & SHM_RND)
489*7c478bd9Sstevel@tonic-gate 				addr = (caddr_t)((uintptr_t)addr &
490*7c478bd9Sstevel@tonic-gate 				    ~(SHMLBA - 1));
491*7c478bd9Sstevel@tonic-gate 			/*
492*7c478bd9Sstevel@tonic-gate 			 * Check that the address range
493*7c478bd9Sstevel@tonic-gate 			 *  1) is properly aligned
494*7c478bd9Sstevel@tonic-gate 			 *  2) is correct in unix terms
495*7c478bd9Sstevel@tonic-gate 			 *  3) is within an unmapped address segment
496*7c478bd9Sstevel@tonic-gate 			 */
497*7c478bd9Sstevel@tonic-gate 			base = addr;
498*7c478bd9Sstevel@tonic-gate 			len = size;		/* use aligned size */
499*7c478bd9Sstevel@tonic-gate 			/* XXX - in SunOS, is sp->shm_segsz */
500*7c478bd9Sstevel@tonic-gate 			if ((uintptr_t)base & PAGEOFFSET) {
501*7c478bd9Sstevel@tonic-gate 				error = EINVAL;
502*7c478bd9Sstevel@tonic-gate 				as_rangeunlock(as);
503*7c478bd9Sstevel@tonic-gate 				goto errret;
504*7c478bd9Sstevel@tonic-gate 			}
505*7c478bd9Sstevel@tonic-gate 			result = valid_usr_range(base, len, prot, as,
506*7c478bd9Sstevel@tonic-gate 			    as->a_userlimit);
507*7c478bd9Sstevel@tonic-gate 			if (result == RANGE_BADPROT) {
508*7c478bd9Sstevel@tonic-gate 				prot &= ~PROT_EXEC;
509*7c478bd9Sstevel@tonic-gate 				result = valid_usr_range(base, len, prot, as,
510*7c478bd9Sstevel@tonic-gate 				    as->a_userlimit);
511*7c478bd9Sstevel@tonic-gate 			}
512*7c478bd9Sstevel@tonic-gate 			as_purge(as);
513*7c478bd9Sstevel@tonic-gate 			if (result != RANGE_OKAY ||
514*7c478bd9Sstevel@tonic-gate 			    as_gap(as, len, &base, &len,
515*7c478bd9Sstevel@tonic-gate 			    AH_LO, (caddr_t)NULL) != 0) {
516*7c478bd9Sstevel@tonic-gate 				error = EINVAL;
517*7c478bd9Sstevel@tonic-gate 				as_rangeunlock(as);
518*7c478bd9Sstevel@tonic-gate 				goto errret;
519*7c478bd9Sstevel@tonic-gate 			}
520*7c478bd9Sstevel@tonic-gate 		}
521*7c478bd9Sstevel@tonic-gate 
522*7c478bd9Sstevel@tonic-gate 		/* Initialize the create arguments and map the segment */
523*7c478bd9Sstevel@tonic-gate 		crargs = *(struct segvn_crargs *)zfod_argsp;
524*7c478bd9Sstevel@tonic-gate 		crargs.offset = 0;
525*7c478bd9Sstevel@tonic-gate 		crargs.type = MAP_SHARED;
526*7c478bd9Sstevel@tonic-gate 		crargs.amp = sp->shm_amp;
527*7c478bd9Sstevel@tonic-gate 		crargs.prot = prot;
528*7c478bd9Sstevel@tonic-gate 		crargs.maxprot = crargs.prot;
529*7c478bd9Sstevel@tonic-gate 		crargs.flags = 0;
530*7c478bd9Sstevel@tonic-gate 
531*7c478bd9Sstevel@tonic-gate 		error = as_map(as, addr, size, segvn_create, &crargs);
532*7c478bd9Sstevel@tonic-gate 	}
533*7c478bd9Sstevel@tonic-gate 
534*7c478bd9Sstevel@tonic-gate 	as_rangeunlock(as);
535*7c478bd9Sstevel@tonic-gate 	if (error)
536*7c478bd9Sstevel@tonic-gate 		goto errret;
537*7c478bd9Sstevel@tonic-gate 
538*7c478bd9Sstevel@tonic-gate 	/* record shmem range for the detach */
539*7c478bd9Sstevel@tonic-gate 	sa_add(pp, addr, (size_t)size, useISM ? SHMSA_ISM : 0, sp);
540*7c478bd9Sstevel@tonic-gate 	*rvp = (uintptr_t)addr;
541*7c478bd9Sstevel@tonic-gate 
542*7c478bd9Sstevel@tonic-gate 	sp->shm_atime = gethrestime_sec();
543*7c478bd9Sstevel@tonic-gate 	sp->shm_lpid = pp->p_pid;
544*7c478bd9Sstevel@tonic-gate 	ipc_hold(shm_svc, (kipc_perm_t *)sp);
545*7c478bd9Sstevel@tonic-gate errret:
546*7c478bd9Sstevel@tonic-gate 	mutex_exit(lock);
547*7c478bd9Sstevel@tonic-gate 	return (error);
548*7c478bd9Sstevel@tonic-gate }
549*7c478bd9Sstevel@tonic-gate 
550*7c478bd9Sstevel@tonic-gate static void
551*7c478bd9Sstevel@tonic-gate shm_dtor(kipc_perm_t *perm)
552*7c478bd9Sstevel@tonic-gate {
553*7c478bd9Sstevel@tonic-gate 	kshmid_t *sp = (kshmid_t *)perm;
554*7c478bd9Sstevel@tonic-gate 	uint_t cnt;
555*7c478bd9Sstevel@tonic-gate 
556*7c478bd9Sstevel@tonic-gate 	if (sp->shm_sptinfo) {
557*7c478bd9Sstevel@tonic-gate 		if (isspt(sp))
558*7c478bd9Sstevel@tonic-gate 			sptdestroy(sp->shm_sptinfo->sptas, sp->shm_amp);
559*7c478bd9Sstevel@tonic-gate 		kmem_free(sp->shm_sptinfo, sizeof (sptinfo_t));
560*7c478bd9Sstevel@tonic-gate 	}
561*7c478bd9Sstevel@tonic-gate 
562*7c478bd9Sstevel@tonic-gate 	ANON_LOCK_ENTER(&sp->shm_amp->a_rwlock, RW_WRITER);
563*7c478bd9Sstevel@tonic-gate 	cnt = --sp->shm_amp->refcnt;
564*7c478bd9Sstevel@tonic-gate 	ANON_LOCK_EXIT(&sp->shm_amp->a_rwlock);
565*7c478bd9Sstevel@tonic-gate 	ASSERT(cnt == 0);
566*7c478bd9Sstevel@tonic-gate 	shm_rm_amp(sp->shm_amp, sp->shm_lkcnt);
567*7c478bd9Sstevel@tonic-gate 
568*7c478bd9Sstevel@tonic-gate 	if (sp->shm_perm.ipc_id != IPC_ID_INVAL) {
569*7c478bd9Sstevel@tonic-gate 		ipcs_lock(shm_svc);
570*7c478bd9Sstevel@tonic-gate 		sp->shm_perm.ipc_proj->kpj_data.kpd_shmmax -=
571*7c478bd9Sstevel@tonic-gate 		    ptob(btopr(sp->shm_segsz));
572*7c478bd9Sstevel@tonic-gate 		ipcs_unlock(shm_svc);
573*7c478bd9Sstevel@tonic-gate 	}
574*7c478bd9Sstevel@tonic-gate }
575*7c478bd9Sstevel@tonic-gate 
576*7c478bd9Sstevel@tonic-gate /* ARGSUSED */
577*7c478bd9Sstevel@tonic-gate static void
578*7c478bd9Sstevel@tonic-gate shm_rmid(kipc_perm_t *perm)
579*7c478bd9Sstevel@tonic-gate {
580*7c478bd9Sstevel@tonic-gate 	/* nothing to do */
581*7c478bd9Sstevel@tonic-gate }
582*7c478bd9Sstevel@tonic-gate 
583*7c478bd9Sstevel@tonic-gate /*
584*7c478bd9Sstevel@tonic-gate  * Shmctl system call.
585*7c478bd9Sstevel@tonic-gate  */
586*7c478bd9Sstevel@tonic-gate /* ARGSUSED */
587*7c478bd9Sstevel@tonic-gate static int
588*7c478bd9Sstevel@tonic-gate shmctl(int shmid, int cmd, void *arg)
589*7c478bd9Sstevel@tonic-gate {
590*7c478bd9Sstevel@tonic-gate 	kshmid_t		*sp;	/* shared memory header ptr */
591*7c478bd9Sstevel@tonic-gate 	STRUCT_DECL(shmid_ds, ds);	/* for SVR4 IPC_SET */
592*7c478bd9Sstevel@tonic-gate 	int			error = 0;
593*7c478bd9Sstevel@tonic-gate 	struct cred 		*cr = CRED();
594*7c478bd9Sstevel@tonic-gate 	kmutex_t		*lock;
595*7c478bd9Sstevel@tonic-gate 	model_t			mdl = get_udatamodel();
596*7c478bd9Sstevel@tonic-gate 	struct shmid_ds64	ds64;
597*7c478bd9Sstevel@tonic-gate 	shmatt_t		nattch;
598*7c478bd9Sstevel@tonic-gate 
599*7c478bd9Sstevel@tonic-gate 	STRUCT_INIT(ds, mdl);
600*7c478bd9Sstevel@tonic-gate 
601*7c478bd9Sstevel@tonic-gate 	/*
602*7c478bd9Sstevel@tonic-gate 	 * Perform pre- or non-lookup actions (e.g. copyins, RMID).
603*7c478bd9Sstevel@tonic-gate 	 */
604*7c478bd9Sstevel@tonic-gate 	switch (cmd) {
605*7c478bd9Sstevel@tonic-gate 	case IPC_SET:
606*7c478bd9Sstevel@tonic-gate 		if (copyin(arg, STRUCT_BUF(ds), STRUCT_SIZE(ds)))
607*7c478bd9Sstevel@tonic-gate 			return (EFAULT);
608*7c478bd9Sstevel@tonic-gate 		break;
609*7c478bd9Sstevel@tonic-gate 
610*7c478bd9Sstevel@tonic-gate 	case IPC_SET64:
611*7c478bd9Sstevel@tonic-gate 		if (copyin(arg, &ds64, sizeof (struct shmid_ds64)))
612*7c478bd9Sstevel@tonic-gate 			return (EFAULT);
613*7c478bd9Sstevel@tonic-gate 		break;
614*7c478bd9Sstevel@tonic-gate 
615*7c478bd9Sstevel@tonic-gate 	case IPC_RMID:
616*7c478bd9Sstevel@tonic-gate 		return (ipc_rmid(shm_svc, shmid, cr));
617*7c478bd9Sstevel@tonic-gate 	}
618*7c478bd9Sstevel@tonic-gate 
619*7c478bd9Sstevel@tonic-gate 	if ((lock = ipc_lookup(shm_svc, shmid, (kipc_perm_t **)&sp)) == NULL)
620*7c478bd9Sstevel@tonic-gate 		return (EINVAL);
621*7c478bd9Sstevel@tonic-gate 
622*7c478bd9Sstevel@tonic-gate 	switch (cmd) {
623*7c478bd9Sstevel@tonic-gate 	/* Set ownership and permissions. */
624*7c478bd9Sstevel@tonic-gate 	case IPC_SET:
625*7c478bd9Sstevel@tonic-gate 		if (error = ipcperm_set(shm_svc, cr, &sp->shm_perm,
626*7c478bd9Sstevel@tonic-gate 		    &STRUCT_BUF(ds)->shm_perm, mdl))
627*7c478bd9Sstevel@tonic-gate 				break;
628*7c478bd9Sstevel@tonic-gate 		sp->shm_ctime = gethrestime_sec();
629*7c478bd9Sstevel@tonic-gate 		break;
630*7c478bd9Sstevel@tonic-gate 
631*7c478bd9Sstevel@tonic-gate 	case IPC_STAT:
632*7c478bd9Sstevel@tonic-gate 		if (error = ipcperm_access(&sp->shm_perm, SHM_R, cr))
633*7c478bd9Sstevel@tonic-gate 			break;
634*7c478bd9Sstevel@tonic-gate 
635*7c478bd9Sstevel@tonic-gate 		nattch = sp->shm_perm.ipc_ref - 1;
636*7c478bd9Sstevel@tonic-gate 
637*7c478bd9Sstevel@tonic-gate 		ipcperm_stat(&STRUCT_BUF(ds)->shm_perm, &sp->shm_perm, mdl);
638*7c478bd9Sstevel@tonic-gate 		STRUCT_FSET(ds, shm_segsz, sp->shm_segsz);
639*7c478bd9Sstevel@tonic-gate 		STRUCT_FSETP(ds, shm_amp, NULL);	/* kernel addr */
640*7c478bd9Sstevel@tonic-gate 		STRUCT_FSET(ds, shm_lkcnt, sp->shm_lkcnt);
641*7c478bd9Sstevel@tonic-gate 		STRUCT_FSET(ds, shm_lpid, sp->shm_lpid);
642*7c478bd9Sstevel@tonic-gate 		STRUCT_FSET(ds, shm_cpid, sp->shm_cpid);
643*7c478bd9Sstevel@tonic-gate 		STRUCT_FSET(ds, shm_nattch, nattch);
644*7c478bd9Sstevel@tonic-gate 		STRUCT_FSET(ds, shm_cnattch, sp->shm_ismattch);
645*7c478bd9Sstevel@tonic-gate 		STRUCT_FSET(ds, shm_atime, sp->shm_atime);
646*7c478bd9Sstevel@tonic-gate 		STRUCT_FSET(ds, shm_dtime, sp->shm_dtime);
647*7c478bd9Sstevel@tonic-gate 		STRUCT_FSET(ds, shm_ctime, sp->shm_ctime);
648*7c478bd9Sstevel@tonic-gate 
649*7c478bd9Sstevel@tonic-gate 		mutex_exit(lock);
650*7c478bd9Sstevel@tonic-gate 		if (copyout(STRUCT_BUF(ds), arg, STRUCT_SIZE(ds)))
651*7c478bd9Sstevel@tonic-gate 			return (EFAULT);
652*7c478bd9Sstevel@tonic-gate 
653*7c478bd9Sstevel@tonic-gate 		return (0);
654*7c478bd9Sstevel@tonic-gate 
655*7c478bd9Sstevel@tonic-gate 	case IPC_SET64:
656*7c478bd9Sstevel@tonic-gate 		if (error = ipcperm_set64(shm_svc, cr,
657*7c478bd9Sstevel@tonic-gate 		    &sp->shm_perm, &ds64.shmx_perm))
658*7c478bd9Sstevel@tonic-gate 			break;
659*7c478bd9Sstevel@tonic-gate 		sp->shm_ctime = gethrestime_sec();
660*7c478bd9Sstevel@tonic-gate 		break;
661*7c478bd9Sstevel@tonic-gate 
662*7c478bd9Sstevel@tonic-gate 	case IPC_STAT64:
663*7c478bd9Sstevel@tonic-gate 		nattch = sp->shm_perm.ipc_ref - 1;
664*7c478bd9Sstevel@tonic-gate 
665*7c478bd9Sstevel@tonic-gate 		ipcperm_stat64(&ds64.shmx_perm, &sp->shm_perm);
666*7c478bd9Sstevel@tonic-gate 		ds64.shmx_segsz = sp->shm_segsz;
667*7c478bd9Sstevel@tonic-gate 		ds64.shmx_lkcnt = sp->shm_lkcnt;
668*7c478bd9Sstevel@tonic-gate 		ds64.shmx_lpid = sp->shm_lpid;
669*7c478bd9Sstevel@tonic-gate 		ds64.shmx_cpid = sp->shm_cpid;
670*7c478bd9Sstevel@tonic-gate 		ds64.shmx_nattch = nattch;
671*7c478bd9Sstevel@tonic-gate 		ds64.shmx_cnattch = sp->shm_ismattch;
672*7c478bd9Sstevel@tonic-gate 		ds64.shmx_atime = sp->shm_atime;
673*7c478bd9Sstevel@tonic-gate 		ds64.shmx_dtime = sp->shm_dtime;
674*7c478bd9Sstevel@tonic-gate 		ds64.shmx_ctime = sp->shm_ctime;
675*7c478bd9Sstevel@tonic-gate 
676*7c478bd9Sstevel@tonic-gate 		mutex_exit(lock);
677*7c478bd9Sstevel@tonic-gate 		if (copyout(&ds64, arg, sizeof (struct shmid_ds64)))
678*7c478bd9Sstevel@tonic-gate 			return (EFAULT);
679*7c478bd9Sstevel@tonic-gate 
680*7c478bd9Sstevel@tonic-gate 		return (0);
681*7c478bd9Sstevel@tonic-gate 
682*7c478bd9Sstevel@tonic-gate 	/* Lock segment in memory */
683*7c478bd9Sstevel@tonic-gate 	case SHM_LOCK:
684*7c478bd9Sstevel@tonic-gate 		if ((error = secpolicy_lock_memory(cr)) != 0)
685*7c478bd9Sstevel@tonic-gate 			break;
686*7c478bd9Sstevel@tonic-gate 
687*7c478bd9Sstevel@tonic-gate 		if (!isspt(sp) && (sp->shm_lkcnt++ == 0)) {
688*7c478bd9Sstevel@tonic-gate 			if (error = shmem_lock(sp->shm_amp)) {
689*7c478bd9Sstevel@tonic-gate 			    ANON_LOCK_ENTER(&sp->shm_amp->a_rwlock, RW_WRITER);
690*7c478bd9Sstevel@tonic-gate 			    cmn_err(CE_NOTE,
691*7c478bd9Sstevel@tonic-gate 				"shmctl - couldn't lock %ld pages into memory",
692*7c478bd9Sstevel@tonic-gate 				sp->shm_amp->size);
693*7c478bd9Sstevel@tonic-gate 			    ANON_LOCK_EXIT(&sp->shm_amp->a_rwlock);
694*7c478bd9Sstevel@tonic-gate 			    error = ENOMEM;
695*7c478bd9Sstevel@tonic-gate 			    sp->shm_lkcnt--;
696*7c478bd9Sstevel@tonic-gate 			    shmem_unlock(sp->shm_amp, 0);
697*7c478bd9Sstevel@tonic-gate 			}
698*7c478bd9Sstevel@tonic-gate 		}
699*7c478bd9Sstevel@tonic-gate 		break;
700*7c478bd9Sstevel@tonic-gate 
701*7c478bd9Sstevel@tonic-gate 	/* Unlock segment */
702*7c478bd9Sstevel@tonic-gate 	case SHM_UNLOCK:
703*7c478bd9Sstevel@tonic-gate 		if ((error = secpolicy_lock_memory(cr)) != 0)
704*7c478bd9Sstevel@tonic-gate 			break;
705*7c478bd9Sstevel@tonic-gate 
706*7c478bd9Sstevel@tonic-gate 		if (!isspt(sp)) {
707*7c478bd9Sstevel@tonic-gate 			if (sp->shm_lkcnt && (--sp->shm_lkcnt == 0)) {
708*7c478bd9Sstevel@tonic-gate 				shmem_unlock(sp->shm_amp, 1);
709*7c478bd9Sstevel@tonic-gate 			}
710*7c478bd9Sstevel@tonic-gate 		}
711*7c478bd9Sstevel@tonic-gate 		break;
712*7c478bd9Sstevel@tonic-gate 
713*7c478bd9Sstevel@tonic-gate 	default:
714*7c478bd9Sstevel@tonic-gate 		error = EINVAL;
715*7c478bd9Sstevel@tonic-gate 		break;
716*7c478bd9Sstevel@tonic-gate 	}
717*7c478bd9Sstevel@tonic-gate 	mutex_exit(lock);
718*7c478bd9Sstevel@tonic-gate 	return (error);
719*7c478bd9Sstevel@tonic-gate }
720*7c478bd9Sstevel@tonic-gate 
721*7c478bd9Sstevel@tonic-gate static void
722*7c478bd9Sstevel@tonic-gate shm_detach(proc_t *pp, segacct_t *sap)
723*7c478bd9Sstevel@tonic-gate {
724*7c478bd9Sstevel@tonic-gate 	kshmid_t	*sp = sap->sa_id;
725*7c478bd9Sstevel@tonic-gate 	size_t		len = sap->sa_len;
726*7c478bd9Sstevel@tonic-gate 	caddr_t		addr = sap->sa_addr;
727*7c478bd9Sstevel@tonic-gate 
728*7c478bd9Sstevel@tonic-gate 	/*
729*7c478bd9Sstevel@tonic-gate 	 * Discard lwpchan mappings.
730*7c478bd9Sstevel@tonic-gate 	 */
731*7c478bd9Sstevel@tonic-gate 	if (pp->p_lcp != NULL)
732*7c478bd9Sstevel@tonic-gate 		lwpchan_delete_mapping(pp, addr, addr + len);
733*7c478bd9Sstevel@tonic-gate 	(void) as_unmap(pp->p_as, addr, len);
734*7c478bd9Sstevel@tonic-gate 
735*7c478bd9Sstevel@tonic-gate 	/*
736*7c478bd9Sstevel@tonic-gate 	 * Perform some detach-time accounting.
737*7c478bd9Sstevel@tonic-gate 	 */
738*7c478bd9Sstevel@tonic-gate 	(void) ipc_lock(shm_svc, sp->shm_perm.ipc_id);
739*7c478bd9Sstevel@tonic-gate 	if (sap->sa_flags & SHMSA_ISM)
740*7c478bd9Sstevel@tonic-gate 		sp->shm_ismattch--;
741*7c478bd9Sstevel@tonic-gate 	sp->shm_dtime = gethrestime_sec();
742*7c478bd9Sstevel@tonic-gate 	sp->shm_lpid = pp->p_pid;
743*7c478bd9Sstevel@tonic-gate 	ipc_rele(shm_svc, (kipc_perm_t *)sp);	/* Drops lock */
744*7c478bd9Sstevel@tonic-gate 
745*7c478bd9Sstevel@tonic-gate 	kmem_free(sap, sizeof (segacct_t));
746*7c478bd9Sstevel@tonic-gate }
747*7c478bd9Sstevel@tonic-gate 
748*7c478bd9Sstevel@tonic-gate static int
749*7c478bd9Sstevel@tonic-gate shmdt(caddr_t addr)
750*7c478bd9Sstevel@tonic-gate {
751*7c478bd9Sstevel@tonic-gate 	proc_t *pp = curproc;
752*7c478bd9Sstevel@tonic-gate 	segacct_t *sap, template;
753*7c478bd9Sstevel@tonic-gate 
754*7c478bd9Sstevel@tonic-gate 	mutex_enter(&pp->p_lock);
755*7c478bd9Sstevel@tonic-gate 	prbarrier(pp);			/* block /proc.  See shmgetid(). */
756*7c478bd9Sstevel@tonic-gate 
757*7c478bd9Sstevel@tonic-gate 	template.sa_addr = addr;
758*7c478bd9Sstevel@tonic-gate 	template.sa_len = 0;
759*7c478bd9Sstevel@tonic-gate 	if ((pp->p_segacct == NULL) ||
760*7c478bd9Sstevel@tonic-gate 	    ((sap = avl_find(pp->p_segacct, &template, NULL)) == NULL)) {
761*7c478bd9Sstevel@tonic-gate 		mutex_exit(&pp->p_lock);
762*7c478bd9Sstevel@tonic-gate 		return (EINVAL);
763*7c478bd9Sstevel@tonic-gate 	}
764*7c478bd9Sstevel@tonic-gate 	avl_remove(pp->p_segacct, sap);
765*7c478bd9Sstevel@tonic-gate 	mutex_exit(&pp->p_lock);
766*7c478bd9Sstevel@tonic-gate 
767*7c478bd9Sstevel@tonic-gate 	shm_detach(pp, sap);
768*7c478bd9Sstevel@tonic-gate 
769*7c478bd9Sstevel@tonic-gate 	return (0);
770*7c478bd9Sstevel@tonic-gate }
771*7c478bd9Sstevel@tonic-gate 
772*7c478bd9Sstevel@tonic-gate /*
773*7c478bd9Sstevel@tonic-gate  * Remove all shared memory segments associated with a given zone.
774*7c478bd9Sstevel@tonic-gate  * Called by zone_shutdown when the zone is halted.
775*7c478bd9Sstevel@tonic-gate  */
776*7c478bd9Sstevel@tonic-gate /*ARGSUSED1*/
777*7c478bd9Sstevel@tonic-gate static void
778*7c478bd9Sstevel@tonic-gate shm_remove_zone(zoneid_t zoneid, void *arg)
779*7c478bd9Sstevel@tonic-gate {
780*7c478bd9Sstevel@tonic-gate 	ipc_remove_zone(shm_svc, zoneid);
781*7c478bd9Sstevel@tonic-gate }
782*7c478bd9Sstevel@tonic-gate 
783*7c478bd9Sstevel@tonic-gate /*
784*7c478bd9Sstevel@tonic-gate  * Shmget (create new shmem) system call.
785*7c478bd9Sstevel@tonic-gate  */
786*7c478bd9Sstevel@tonic-gate static int
787*7c478bd9Sstevel@tonic-gate shmget(key_t key, size_t size, int shmflg, uintptr_t *rvp)
788*7c478bd9Sstevel@tonic-gate {
789*7c478bd9Sstevel@tonic-gate 	proc_t		*pp = curproc;
790*7c478bd9Sstevel@tonic-gate 	kshmid_t	*sp;
791*7c478bd9Sstevel@tonic-gate 	kmutex_t	*lock;
792*7c478bd9Sstevel@tonic-gate 	int		error;
793*7c478bd9Sstevel@tonic-gate 
794*7c478bd9Sstevel@tonic-gate top:
795*7c478bd9Sstevel@tonic-gate 	if (error = ipc_get(shm_svc, key, shmflg, (kipc_perm_t **)&sp, &lock))
796*7c478bd9Sstevel@tonic-gate 		return (error);
797*7c478bd9Sstevel@tonic-gate 
798*7c478bd9Sstevel@tonic-gate 	if (!IPC_FREE(&sp->shm_perm)) {
799*7c478bd9Sstevel@tonic-gate 		/*
800*7c478bd9Sstevel@tonic-gate 		 * A segment with the requested key exists.
801*7c478bd9Sstevel@tonic-gate 		 */
802*7c478bd9Sstevel@tonic-gate 		if (size > sp->shm_segsz) {
803*7c478bd9Sstevel@tonic-gate 			mutex_exit(lock);
804*7c478bd9Sstevel@tonic-gate 			return (EINVAL);
805*7c478bd9Sstevel@tonic-gate 		}
806*7c478bd9Sstevel@tonic-gate 	} else {
807*7c478bd9Sstevel@tonic-gate 		/*
808*7c478bd9Sstevel@tonic-gate 		 * A new segment should be created.
809*7c478bd9Sstevel@tonic-gate 		 */
810*7c478bd9Sstevel@tonic-gate 		size_t npages = btopr(size);
811*7c478bd9Sstevel@tonic-gate 		size_t rsize = ptob(npages);
812*7c478bd9Sstevel@tonic-gate 
813*7c478bd9Sstevel@tonic-gate 		/*
814*7c478bd9Sstevel@tonic-gate 		 * Check rsize and the per-project limit on shared
815*7c478bd9Sstevel@tonic-gate 		 * memory.  Checking rsize handles both the size == 0
816*7c478bd9Sstevel@tonic-gate 		 * case and the size < ULONG_MAX & PAGEMASK case (i.e.
817*7c478bd9Sstevel@tonic-gate 		 * rounding up wraps a size_t).
818*7c478bd9Sstevel@tonic-gate 		 */
819*7c478bd9Sstevel@tonic-gate 		if (rsize == 0 || (rctl_test(rc_project_shmmax,
820*7c478bd9Sstevel@tonic-gate 		    pp->p_task->tk_proj->kpj_rctls, pp, rsize,
821*7c478bd9Sstevel@tonic-gate 		    RCA_SAFE) & RCT_DENY)) {
822*7c478bd9Sstevel@tonic-gate 
823*7c478bd9Sstevel@tonic-gate 			mutex_exit(&pp->p_lock);
824*7c478bd9Sstevel@tonic-gate 			mutex_exit(lock);
825*7c478bd9Sstevel@tonic-gate 			ipc_cleanup(shm_svc, (kipc_perm_t *)sp);
826*7c478bd9Sstevel@tonic-gate 			return (EINVAL);
827*7c478bd9Sstevel@tonic-gate 		}
828*7c478bd9Sstevel@tonic-gate 		mutex_exit(&pp->p_lock);
829*7c478bd9Sstevel@tonic-gate 		mutex_exit(lock);
830*7c478bd9Sstevel@tonic-gate 
831*7c478bd9Sstevel@tonic-gate 		if (anon_resv(rsize) == 0) {
832*7c478bd9Sstevel@tonic-gate 			ipc_cleanup(shm_svc, (kipc_perm_t *)sp);
833*7c478bd9Sstevel@tonic-gate 			return (ENOMEM);
834*7c478bd9Sstevel@tonic-gate 		}
835*7c478bd9Sstevel@tonic-gate 
836*7c478bd9Sstevel@tonic-gate 		sp->shm_amp = anonmap_alloc(rsize, rsize);
837*7c478bd9Sstevel@tonic-gate 
838*7c478bd9Sstevel@tonic-gate 		/*
839*7c478bd9Sstevel@tonic-gate 		 * Store the original user's requested size, in bytes,
840*7c478bd9Sstevel@tonic-gate 		 * rather than the page-aligned size.  The former is
841*7c478bd9Sstevel@tonic-gate 		 * used for IPC_STAT and shmget() lookups.  The latter
842*7c478bd9Sstevel@tonic-gate 		 * is saved in the anon_map structure and is used for
843*7c478bd9Sstevel@tonic-gate 		 * calls to the vm layer.
844*7c478bd9Sstevel@tonic-gate 		 */
845*7c478bd9Sstevel@tonic-gate 		sp->shm_segsz = size;
846*7c478bd9Sstevel@tonic-gate 		sp->shm_atime = sp->shm_dtime = 0;
847*7c478bd9Sstevel@tonic-gate 		sp->shm_ctime = gethrestime_sec();
848*7c478bd9Sstevel@tonic-gate 		sp->shm_lpid = (pid_t)0;
849*7c478bd9Sstevel@tonic-gate 		sp->shm_cpid = curproc->p_pid;
850*7c478bd9Sstevel@tonic-gate 		sp->shm_ismattch = 0;
851*7c478bd9Sstevel@tonic-gate 		sp->shm_sptinfo = NULL;
852*7c478bd9Sstevel@tonic-gate 
853*7c478bd9Sstevel@tonic-gate 		/*
854*7c478bd9Sstevel@tonic-gate 		 * Check limits one last time, push id into global
855*7c478bd9Sstevel@tonic-gate 		 * visibility, and update resource usage counts.
856*7c478bd9Sstevel@tonic-gate 		 */
857*7c478bd9Sstevel@tonic-gate 		if (error = ipc_commit_begin(shm_svc, key, shmflg,
858*7c478bd9Sstevel@tonic-gate 		    (kipc_perm_t *)sp)) {
859*7c478bd9Sstevel@tonic-gate 			if (error == EAGAIN)
860*7c478bd9Sstevel@tonic-gate 				goto top;
861*7c478bd9Sstevel@tonic-gate 			return (error);
862*7c478bd9Sstevel@tonic-gate 		}
863*7c478bd9Sstevel@tonic-gate 
864*7c478bd9Sstevel@tonic-gate 		if (rctl_test(rc_project_shmmax,
865*7c478bd9Sstevel@tonic-gate 		    sp->shm_perm.ipc_proj->kpj_rctls, pp, rsize,
866*7c478bd9Sstevel@tonic-gate 		    RCA_SAFE) & RCT_DENY) {
867*7c478bd9Sstevel@tonic-gate 			ipc_cleanup(shm_svc, (kipc_perm_t *)sp);
868*7c478bd9Sstevel@tonic-gate 			return (EINVAL);
869*7c478bd9Sstevel@tonic-gate 		}
870*7c478bd9Sstevel@tonic-gate 		sp->shm_perm.ipc_proj->kpj_data.kpd_shmmax += rsize;
871*7c478bd9Sstevel@tonic-gate 
872*7c478bd9Sstevel@tonic-gate 		lock = ipc_commit_end(shm_svc, &sp->shm_perm);
873*7c478bd9Sstevel@tonic-gate 	}
874*7c478bd9Sstevel@tonic-gate 
875*7c478bd9Sstevel@tonic-gate #ifdef C2_AUDIT
876*7c478bd9Sstevel@tonic-gate 	if (audit_active)
877*7c478bd9Sstevel@tonic-gate 		audit_ipcget(AT_IPC_SHM, (void *)sp);
878*7c478bd9Sstevel@tonic-gate #endif
879*7c478bd9Sstevel@tonic-gate 
880*7c478bd9Sstevel@tonic-gate 	*rvp = (uintptr_t)(sp->shm_perm.ipc_id);
881*7c478bd9Sstevel@tonic-gate 
882*7c478bd9Sstevel@tonic-gate 	mutex_exit(lock);
883*7c478bd9Sstevel@tonic-gate 	return (0);
884*7c478bd9Sstevel@tonic-gate }
885*7c478bd9Sstevel@tonic-gate 
886*7c478bd9Sstevel@tonic-gate /*
887*7c478bd9Sstevel@tonic-gate  * shmids system call.
888*7c478bd9Sstevel@tonic-gate  */
889*7c478bd9Sstevel@tonic-gate static int
890*7c478bd9Sstevel@tonic-gate shmids(int *buf, uint_t nids, uint_t *pnids)
891*7c478bd9Sstevel@tonic-gate {
892*7c478bd9Sstevel@tonic-gate 	return (ipc_ids(shm_svc, buf, nids, pnids));
893*7c478bd9Sstevel@tonic-gate }
894*7c478bd9Sstevel@tonic-gate 
895*7c478bd9Sstevel@tonic-gate /*
896*7c478bd9Sstevel@tonic-gate  * System entry point for shmat, shmctl, shmdt, and shmget system calls.
897*7c478bd9Sstevel@tonic-gate  */
898*7c478bd9Sstevel@tonic-gate static uintptr_t
899*7c478bd9Sstevel@tonic-gate shmsys(int opcode, uintptr_t a0, uintptr_t a1, uintptr_t a2)
900*7c478bd9Sstevel@tonic-gate {
901*7c478bd9Sstevel@tonic-gate 	int	error;
902*7c478bd9Sstevel@tonic-gate 	uintptr_t r_val = 0;
903*7c478bd9Sstevel@tonic-gate 
904*7c478bd9Sstevel@tonic-gate 	switch (opcode) {
905*7c478bd9Sstevel@tonic-gate 	case SHMAT:
906*7c478bd9Sstevel@tonic-gate 		error = shmat((int)a0, (caddr_t)a1, (int)a2, &r_val);
907*7c478bd9Sstevel@tonic-gate 		break;
908*7c478bd9Sstevel@tonic-gate 	case SHMCTL:
909*7c478bd9Sstevel@tonic-gate 		error = shmctl((int)a0, (int)a1, (void *)a2);
910*7c478bd9Sstevel@tonic-gate 		break;
911*7c478bd9Sstevel@tonic-gate 	case SHMDT:
912*7c478bd9Sstevel@tonic-gate 		error = shmdt((caddr_t)a0);
913*7c478bd9Sstevel@tonic-gate 		break;
914*7c478bd9Sstevel@tonic-gate 	case SHMGET:
915*7c478bd9Sstevel@tonic-gate 		error = shmget((key_t)a0, (size_t)a1, (int)a2, &r_val);
916*7c478bd9Sstevel@tonic-gate 		break;
917*7c478bd9Sstevel@tonic-gate 	case SHMIDS:
918*7c478bd9Sstevel@tonic-gate 		error = shmids((int *)a0, (uint_t)a1, (uint_t *)a2);
919*7c478bd9Sstevel@tonic-gate 		break;
920*7c478bd9Sstevel@tonic-gate 	default:
921*7c478bd9Sstevel@tonic-gate 		error = EINVAL;
922*7c478bd9Sstevel@tonic-gate 		break;
923*7c478bd9Sstevel@tonic-gate 	}
924*7c478bd9Sstevel@tonic-gate 
925*7c478bd9Sstevel@tonic-gate 	if (error)
926*7c478bd9Sstevel@tonic-gate 		return ((uintptr_t)set_errno(error));
927*7c478bd9Sstevel@tonic-gate 
928*7c478bd9Sstevel@tonic-gate 	return (r_val);
929*7c478bd9Sstevel@tonic-gate }
930*7c478bd9Sstevel@tonic-gate 
931*7c478bd9Sstevel@tonic-gate /*
932*7c478bd9Sstevel@tonic-gate  * segacct_t comparator
933*7c478bd9Sstevel@tonic-gate  * This works as expected, with one minor change: the first of two real
934*7c478bd9Sstevel@tonic-gate  * segments with equal addresses is considered to be 'greater than' the
935*7c478bd9Sstevel@tonic-gate  * second.  We only return equal when searching using a template, in
936*7c478bd9Sstevel@tonic-gate  * which case we explicitly set the template segment's length to 0
937*7c478bd9Sstevel@tonic-gate  * (which is invalid for a real segment).
938*7c478bd9Sstevel@tonic-gate  */
939*7c478bd9Sstevel@tonic-gate static int
940*7c478bd9Sstevel@tonic-gate shm_sacompar(const void *x, const void *y)
941*7c478bd9Sstevel@tonic-gate {
942*7c478bd9Sstevel@tonic-gate 	segacct_t *sa1 = (segacct_t *)x;
943*7c478bd9Sstevel@tonic-gate 	segacct_t *sa2 = (segacct_t *)y;
944*7c478bd9Sstevel@tonic-gate 
945*7c478bd9Sstevel@tonic-gate 	if (sa1->sa_addr < sa2->sa_addr)
946*7c478bd9Sstevel@tonic-gate 		return (-1);
947*7c478bd9Sstevel@tonic-gate 	if (sa1->sa_addr > sa2->sa_addr)
948*7c478bd9Sstevel@tonic-gate 		return (1);
949*7c478bd9Sstevel@tonic-gate 	if ((sa1->sa_len == 0) || (sa2->sa_len == 0))
950*7c478bd9Sstevel@tonic-gate 		return (0);
951*7c478bd9Sstevel@tonic-gate 	return (1);
952*7c478bd9Sstevel@tonic-gate }
953*7c478bd9Sstevel@tonic-gate 
954*7c478bd9Sstevel@tonic-gate /*
955*7c478bd9Sstevel@tonic-gate  * add this record to the segacct list.
956*7c478bd9Sstevel@tonic-gate  */
957*7c478bd9Sstevel@tonic-gate static void
958*7c478bd9Sstevel@tonic-gate sa_add(struct proc *pp, caddr_t addr, size_t len, ulong_t flags, kshmid_t *id)
959*7c478bd9Sstevel@tonic-gate {
960*7c478bd9Sstevel@tonic-gate 	segacct_t *nsap;
961*7c478bd9Sstevel@tonic-gate 	avl_tree_t *tree = NULL;
962*7c478bd9Sstevel@tonic-gate 	avl_index_t where;
963*7c478bd9Sstevel@tonic-gate 
964*7c478bd9Sstevel@tonic-gate 	nsap = kmem_alloc(sizeof (segacct_t), KM_SLEEP);
965*7c478bd9Sstevel@tonic-gate 	nsap->sa_addr = addr;
966*7c478bd9Sstevel@tonic-gate 	nsap->sa_len  = len;
967*7c478bd9Sstevel@tonic-gate 	nsap->sa_flags = flags;
968*7c478bd9Sstevel@tonic-gate 	nsap->sa_id = id;
969*7c478bd9Sstevel@tonic-gate 
970*7c478bd9Sstevel@tonic-gate 	if (pp->p_segacct == NULL)
971*7c478bd9Sstevel@tonic-gate 		tree = kmem_alloc(sizeof (avl_tree_t), KM_SLEEP);
972*7c478bd9Sstevel@tonic-gate 
973*7c478bd9Sstevel@tonic-gate 	mutex_enter(&pp->p_lock);
974*7c478bd9Sstevel@tonic-gate 	prbarrier(pp);			/* block /proc.  See shmgetid(). */
975*7c478bd9Sstevel@tonic-gate 
976*7c478bd9Sstevel@tonic-gate 	if (pp->p_segacct == NULL) {
977*7c478bd9Sstevel@tonic-gate 		avl_create(tree, shm_sacompar, sizeof (segacct_t),
978*7c478bd9Sstevel@tonic-gate 		    offsetof(segacct_t, sa_tree));
979*7c478bd9Sstevel@tonic-gate 		pp->p_segacct = tree;
980*7c478bd9Sstevel@tonic-gate 	} else if (tree) {
981*7c478bd9Sstevel@tonic-gate 		kmem_free(tree, sizeof (avl_tree_t));
982*7c478bd9Sstevel@tonic-gate 	}
983*7c478bd9Sstevel@tonic-gate 
984*7c478bd9Sstevel@tonic-gate 	/*
985*7c478bd9Sstevel@tonic-gate 	 * We can ignore the result of avl_find, as the comparator will
986*7c478bd9Sstevel@tonic-gate 	 * never return equal for segments with non-zero length.  This
987*7c478bd9Sstevel@tonic-gate 	 * is a necessary hack to get around the fact that we do, in
988*7c478bd9Sstevel@tonic-gate 	 * fact, have duplicate keys.
989*7c478bd9Sstevel@tonic-gate 	 */
990*7c478bd9Sstevel@tonic-gate 	(void) avl_find(pp->p_segacct, nsap, &where);
991*7c478bd9Sstevel@tonic-gate 	avl_insert(pp->p_segacct, nsap, where);
992*7c478bd9Sstevel@tonic-gate 
993*7c478bd9Sstevel@tonic-gate 	mutex_exit(&pp->p_lock);
994*7c478bd9Sstevel@tonic-gate }
995*7c478bd9Sstevel@tonic-gate 
996*7c478bd9Sstevel@tonic-gate /*
997*7c478bd9Sstevel@tonic-gate  * Duplicate parent's segacct records in child.
998*7c478bd9Sstevel@tonic-gate  */
999*7c478bd9Sstevel@tonic-gate void
1000*7c478bd9Sstevel@tonic-gate shmfork(struct proc *ppp, struct proc *cpp)
1001*7c478bd9Sstevel@tonic-gate {
1002*7c478bd9Sstevel@tonic-gate 	segacct_t *sap;
1003*7c478bd9Sstevel@tonic-gate 	kshmid_t *sp;
1004*7c478bd9Sstevel@tonic-gate 	kmutex_t *mp;
1005*7c478bd9Sstevel@tonic-gate 
1006*7c478bd9Sstevel@tonic-gate 	ASSERT(ppp->p_segacct != NULL);
1007*7c478bd9Sstevel@tonic-gate 
1008*7c478bd9Sstevel@tonic-gate 	/*
1009*7c478bd9Sstevel@tonic-gate 	 * We are the only lwp running in the parent so nobody can
1010*7c478bd9Sstevel@tonic-gate 	 * mess with our p_segacct list.  Thus it is safe to traverse
1011*7c478bd9Sstevel@tonic-gate 	 * the list without holding p_lock.  This is essential because
1012*7c478bd9Sstevel@tonic-gate 	 * we can't hold p_lock during a KM_SLEEP allocation.
1013*7c478bd9Sstevel@tonic-gate 	 */
1014*7c478bd9Sstevel@tonic-gate 	for (sap = (segacct_t *)avl_first(ppp->p_segacct); sap != NULL;
1015*7c478bd9Sstevel@tonic-gate 	    sap = (segacct_t *)AVL_NEXT(ppp->p_segacct, sap)) {
1016*7c478bd9Sstevel@tonic-gate 		sa_add(cpp, sap->sa_addr, sap->sa_len, sap->sa_flags,
1017*7c478bd9Sstevel@tonic-gate 		    sap->sa_id);
1018*7c478bd9Sstevel@tonic-gate 		sp = sap->sa_id;
1019*7c478bd9Sstevel@tonic-gate 		mp = ipc_lock(shm_svc, sp->shm_perm.ipc_id);
1020*7c478bd9Sstevel@tonic-gate 		if (sap->sa_flags & SHMSA_ISM)
1021*7c478bd9Sstevel@tonic-gate 			sp->shm_ismattch++;
1022*7c478bd9Sstevel@tonic-gate 		ipc_hold(shm_svc, (kipc_perm_t *)sp);
1023*7c478bd9Sstevel@tonic-gate 		mutex_exit(mp);
1024*7c478bd9Sstevel@tonic-gate 	}
1025*7c478bd9Sstevel@tonic-gate }
1026*7c478bd9Sstevel@tonic-gate 
1027*7c478bd9Sstevel@tonic-gate /*
1028*7c478bd9Sstevel@tonic-gate  * Detach shared memory segments from exiting process.
1029*7c478bd9Sstevel@tonic-gate  */
1030*7c478bd9Sstevel@tonic-gate void
1031*7c478bd9Sstevel@tonic-gate shmexit(struct proc *pp)
1032*7c478bd9Sstevel@tonic-gate {
1033*7c478bd9Sstevel@tonic-gate 	segacct_t *sap;
1034*7c478bd9Sstevel@tonic-gate 	avl_tree_t *tree;
1035*7c478bd9Sstevel@tonic-gate 	void *cookie = NULL;
1036*7c478bd9Sstevel@tonic-gate 
1037*7c478bd9Sstevel@tonic-gate 	ASSERT(pp->p_segacct != NULL);
1038*7c478bd9Sstevel@tonic-gate 
1039*7c478bd9Sstevel@tonic-gate 	mutex_enter(&pp->p_lock);
1040*7c478bd9Sstevel@tonic-gate 	prbarrier(pp);
1041*7c478bd9Sstevel@tonic-gate 	tree = pp->p_segacct;
1042*7c478bd9Sstevel@tonic-gate 	pp->p_segacct = NULL;
1043*7c478bd9Sstevel@tonic-gate 	mutex_exit(&pp->p_lock);
1044*7c478bd9Sstevel@tonic-gate 
1045*7c478bd9Sstevel@tonic-gate 	while ((sap = avl_destroy_nodes(tree, &cookie)) != NULL)
1046*7c478bd9Sstevel@tonic-gate 		(void) shm_detach(pp, sap);
1047*7c478bd9Sstevel@tonic-gate 
1048*7c478bd9Sstevel@tonic-gate 	avl_destroy(tree);
1049*7c478bd9Sstevel@tonic-gate 	kmem_free(tree, sizeof (avl_tree_t));
1050*7c478bd9Sstevel@tonic-gate }
1051*7c478bd9Sstevel@tonic-gate 
1052*7c478bd9Sstevel@tonic-gate /*
1053*7c478bd9Sstevel@tonic-gate  * At this time pages should be in memory, so just lock them.
1054*7c478bd9Sstevel@tonic-gate  */
1055*7c478bd9Sstevel@tonic-gate static void
1056*7c478bd9Sstevel@tonic-gate lock_again(size_t npages, struct anon_map *amp)
1057*7c478bd9Sstevel@tonic-gate {
1058*7c478bd9Sstevel@tonic-gate 	struct anon *ap;
1059*7c478bd9Sstevel@tonic-gate 	struct page *pp;
1060*7c478bd9Sstevel@tonic-gate 	struct vnode *vp;
1061*7c478bd9Sstevel@tonic-gate 	anoff_t off;
1062*7c478bd9Sstevel@tonic-gate 	ulong_t anon_idx;
1063*7c478bd9Sstevel@tonic-gate 	anon_sync_obj_t cookie;
1064*7c478bd9Sstevel@tonic-gate 
1065*7c478bd9Sstevel@tonic-gate 	ANON_LOCK_ENTER(&amp->a_rwlock, RW_READER);
1066*7c478bd9Sstevel@tonic-gate 
1067*7c478bd9Sstevel@tonic-gate 	for (anon_idx = 0; npages != 0; anon_idx++, npages--) {
1068*7c478bd9Sstevel@tonic-gate 
1069*7c478bd9Sstevel@tonic-gate 		anon_array_enter(amp, anon_idx, &cookie);
1070*7c478bd9Sstevel@tonic-gate 		ap = anon_get_ptr(amp->ahp, anon_idx);
1071*7c478bd9Sstevel@tonic-gate 		swap_xlate(ap, &vp, &off);
1072*7c478bd9Sstevel@tonic-gate 		anon_array_exit(&cookie);
1073*7c478bd9Sstevel@tonic-gate 
1074*7c478bd9Sstevel@tonic-gate 		pp = page_lookup(vp, (u_offset_t)off, SE_SHARED);
1075*7c478bd9Sstevel@tonic-gate 		if (pp == NULL) {
1076*7c478bd9Sstevel@tonic-gate 			panic("lock_again: page not in the system");
1077*7c478bd9Sstevel@tonic-gate 			/*NOTREACHED*/
1078*7c478bd9Sstevel@tonic-gate 		}
1079*7c478bd9Sstevel@tonic-gate 		(void) page_pp_lock(pp, 0, 0);
1080*7c478bd9Sstevel@tonic-gate 		page_unlock(pp);
1081*7c478bd9Sstevel@tonic-gate 	}
1082*7c478bd9Sstevel@tonic-gate 	ANON_LOCK_EXIT(&amp->a_rwlock);
1083*7c478bd9Sstevel@tonic-gate }
1084*7c478bd9Sstevel@tonic-gate 
1085*7c478bd9Sstevel@tonic-gate /* check if this segment is already locked. */
1086*7c478bd9Sstevel@tonic-gate /*ARGSUSED*/
1087*7c478bd9Sstevel@tonic-gate static int
1088*7c478bd9Sstevel@tonic-gate check_locked(struct as *as, struct segvn_data *svd, size_t npages)
1089*7c478bd9Sstevel@tonic-gate {
1090*7c478bd9Sstevel@tonic-gate 	struct vpage *vpp = svd->vpage;
1091*7c478bd9Sstevel@tonic-gate 	size_t i;
1092*7c478bd9Sstevel@tonic-gate 	if (svd->vpage == NULL)
1093*7c478bd9Sstevel@tonic-gate 		return (0);		/* unlocked */
1094*7c478bd9Sstevel@tonic-gate 
1095*7c478bd9Sstevel@tonic-gate 	SEGVN_LOCK_ENTER(as, &svd->lock, RW_READER);
1096*7c478bd9Sstevel@tonic-gate 	for (i = 0; i < npages; i++, vpp++) {
1097*7c478bd9Sstevel@tonic-gate 		if (VPP_ISPPLOCK(vpp) == 0) {
1098*7c478bd9Sstevel@tonic-gate 			SEGVN_LOCK_EXIT(as, &svd->lock);
1099*7c478bd9Sstevel@tonic-gate 			return (1);	/* partially locked */
1100*7c478bd9Sstevel@tonic-gate 		}
1101*7c478bd9Sstevel@tonic-gate 	}
1102*7c478bd9Sstevel@tonic-gate 	SEGVN_LOCK_EXIT(as, &svd->lock);
1103*7c478bd9Sstevel@tonic-gate 	return (2);			/* locked */
1104*7c478bd9Sstevel@tonic-gate }
1105*7c478bd9Sstevel@tonic-gate 
1106*7c478bd9Sstevel@tonic-gate 
1107*7c478bd9Sstevel@tonic-gate /*
1108*7c478bd9Sstevel@tonic-gate  * Attach the shared memory segment to the process
1109*7c478bd9Sstevel@tonic-gate  * address space and lock the pages.
1110*7c478bd9Sstevel@tonic-gate  */
1111*7c478bd9Sstevel@tonic-gate static int
1112*7c478bd9Sstevel@tonic-gate shmem_lock(struct anon_map *amp)
1113*7c478bd9Sstevel@tonic-gate {
1114*7c478bd9Sstevel@tonic-gate 	size_t npages = btopr(amp->size);
1115*7c478bd9Sstevel@tonic-gate 	struct seg *seg;
1116*7c478bd9Sstevel@tonic-gate 	struct as *as;
1117*7c478bd9Sstevel@tonic-gate 	struct segvn_crargs crargs;
1118*7c478bd9Sstevel@tonic-gate 	struct segvn_data *svd;
1119*7c478bd9Sstevel@tonic-gate 	proc_t *p = curproc;
1120*7c478bd9Sstevel@tonic-gate 	caddr_t addr;
1121*7c478bd9Sstevel@tonic-gate 	uint_t error, ret;
1122*7c478bd9Sstevel@tonic-gate 	caddr_t seg_base;
1123*7c478bd9Sstevel@tonic-gate 	size_t  seg_sz;
1124*7c478bd9Sstevel@tonic-gate 
1125*7c478bd9Sstevel@tonic-gate 	as = p->p_as;
1126*7c478bd9Sstevel@tonic-gate 	AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
1127*7c478bd9Sstevel@tonic-gate 	/* check if shared memory is already attached */
1128*7c478bd9Sstevel@tonic-gate 	for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
1129*7c478bd9Sstevel@tonic-gate 		svd = (struct segvn_data *)seg->s_data;
1130*7c478bd9Sstevel@tonic-gate 		if ((seg->s_ops == &segvn_ops) && (svd->amp == amp) &&
1131*7c478bd9Sstevel@tonic-gate 		    (amp->size == seg->s_size)) {
1132*7c478bd9Sstevel@tonic-gate 			switch (ret = check_locked(as, svd, npages)) {
1133*7c478bd9Sstevel@tonic-gate 			case 0:			/* unlocked */
1134*7c478bd9Sstevel@tonic-gate 			case 1:			/* partially locked */
1135*7c478bd9Sstevel@tonic-gate 				seg_base = seg->s_base;
1136*7c478bd9Sstevel@tonic-gate 				seg_sz = seg->s_size;
1137*7c478bd9Sstevel@tonic-gate 
1138*7c478bd9Sstevel@tonic-gate 				AS_LOCK_EXIT(as, &as->a_lock);
1139*7c478bd9Sstevel@tonic-gate 				if ((error = as_ctl(as, seg_base, seg_sz,
1140*7c478bd9Sstevel@tonic-gate 					MC_LOCK, 0, 0, NULL, 0)) == 0)
1141*7c478bd9Sstevel@tonic-gate 					lock_again(npages, amp);
1142*7c478bd9Sstevel@tonic-gate 				(void) as_ctl(as, seg_base, seg_sz, MC_UNLOCK,
1143*7c478bd9Sstevel@tonic-gate 					0, 0, NULL, NULL);
1144*7c478bd9Sstevel@tonic-gate 				return (error);
1145*7c478bd9Sstevel@tonic-gate 			case 2:			/* locked */
1146*7c478bd9Sstevel@tonic-gate 				AS_LOCK_EXIT(as, &as->a_lock);
1147*7c478bd9Sstevel@tonic-gate 				lock_again(npages, amp);
1148*7c478bd9Sstevel@tonic-gate 				return (0);
1149*7c478bd9Sstevel@tonic-gate 			default:
1150*7c478bd9Sstevel@tonic-gate 				cmn_err(CE_WARN, "shmem_lock: deflt %d", ret);
1151*7c478bd9Sstevel@tonic-gate 				break;
1152*7c478bd9Sstevel@tonic-gate 			}
1153*7c478bd9Sstevel@tonic-gate 		}
1154*7c478bd9Sstevel@tonic-gate 	}
1155*7c478bd9Sstevel@tonic-gate 	AS_LOCK_EXIT(as, &as->a_lock);
1156*7c478bd9Sstevel@tonic-gate 
1157*7c478bd9Sstevel@tonic-gate 	/* attach shm segment to our address space */
1158*7c478bd9Sstevel@tonic-gate 	as_rangelock(as);
1159*7c478bd9Sstevel@tonic-gate 	map_addr(&addr, amp->size, 0ll, 1, 0);
1160*7c478bd9Sstevel@tonic-gate 	if (addr == NULL) {
1161*7c478bd9Sstevel@tonic-gate 		as_rangeunlock(as);
1162*7c478bd9Sstevel@tonic-gate 		return (ENOMEM);
1163*7c478bd9Sstevel@tonic-gate 	}
1164*7c478bd9Sstevel@tonic-gate 
1165*7c478bd9Sstevel@tonic-gate 	/* Initialize the create arguments and map the segment */
1166*7c478bd9Sstevel@tonic-gate 	crargs = *(struct segvn_crargs *)zfod_argsp;	/* structure copy */
1167*7c478bd9Sstevel@tonic-gate 	crargs.offset = (u_offset_t)0;
1168*7c478bd9Sstevel@tonic-gate 	crargs.type = MAP_SHARED;
1169*7c478bd9Sstevel@tonic-gate 	crargs.amp = amp;
1170*7c478bd9Sstevel@tonic-gate 	crargs.prot = PROT_ALL;
1171*7c478bd9Sstevel@tonic-gate 	crargs.maxprot = crargs.prot;
1172*7c478bd9Sstevel@tonic-gate 	crargs.flags = 0;
1173*7c478bd9Sstevel@tonic-gate 
1174*7c478bd9Sstevel@tonic-gate 	error = as_map(as, addr, amp->size, segvn_create, &crargs);
1175*7c478bd9Sstevel@tonic-gate 	as_rangeunlock(as);
1176*7c478bd9Sstevel@tonic-gate 	if (!error) {
1177*7c478bd9Sstevel@tonic-gate 		if ((error = as_ctl(as, addr, amp->size, MC_LOCK, 0, 0,
1178*7c478bd9Sstevel@tonic-gate 			NULL, 0)) == 0) {
1179*7c478bd9Sstevel@tonic-gate 			lock_again(npages, amp);
1180*7c478bd9Sstevel@tonic-gate 		}
1181*7c478bd9Sstevel@tonic-gate 		(void) as_unmap(as, addr, amp->size);
1182*7c478bd9Sstevel@tonic-gate 	}
1183*7c478bd9Sstevel@tonic-gate 	return (error);
1184*7c478bd9Sstevel@tonic-gate }
1185*7c478bd9Sstevel@tonic-gate 
1186*7c478bd9Sstevel@tonic-gate 
1187*7c478bd9Sstevel@tonic-gate /*
1188*7c478bd9Sstevel@tonic-gate  * Unlock shared memory
1189*7c478bd9Sstevel@tonic-gate  */
1190*7c478bd9Sstevel@tonic-gate static void
1191*7c478bd9Sstevel@tonic-gate shmem_unlock(struct anon_map *amp, uint_t lck)
1192*7c478bd9Sstevel@tonic-gate {
1193*7c478bd9Sstevel@tonic-gate 	struct anon *ap;
1194*7c478bd9Sstevel@tonic-gate 	pgcnt_t npages = btopr(amp->size);
1195*7c478bd9Sstevel@tonic-gate 	struct vnode *vp;
1196*7c478bd9Sstevel@tonic-gate 	struct page *pp;
1197*7c478bd9Sstevel@tonic-gate 	anoff_t off;
1198*7c478bd9Sstevel@tonic-gate 	ulong_t anon_idx;
1199*7c478bd9Sstevel@tonic-gate 
1200*7c478bd9Sstevel@tonic-gate 	for (anon_idx = 0; anon_idx < npages; anon_idx++) {
1201*7c478bd9Sstevel@tonic-gate 
1202*7c478bd9Sstevel@tonic-gate 		if ((ap = anon_get_ptr(amp->ahp, anon_idx)) == NULL) {
1203*7c478bd9Sstevel@tonic-gate 			if (lck) {
1204*7c478bd9Sstevel@tonic-gate 				panic("shmem_unlock: null app");
1205*7c478bd9Sstevel@tonic-gate 				/*NOTREACHED*/
1206*7c478bd9Sstevel@tonic-gate 			}
1207*7c478bd9Sstevel@tonic-gate 			continue;
1208*7c478bd9Sstevel@tonic-gate 		}
1209*7c478bd9Sstevel@tonic-gate 		swap_xlate(ap, &vp, &off);
1210*7c478bd9Sstevel@tonic-gate 		pp = page_lookup(vp, off, SE_SHARED);
1211*7c478bd9Sstevel@tonic-gate 		if (pp == NULL) {
1212*7c478bd9Sstevel@tonic-gate 			if (lck) {
1213*7c478bd9Sstevel@tonic-gate 				panic("shmem_unlock: page not in the system");
1214*7c478bd9Sstevel@tonic-gate 				/*NOTREACHED*/
1215*7c478bd9Sstevel@tonic-gate 			}
1216*7c478bd9Sstevel@tonic-gate 			continue;
1217*7c478bd9Sstevel@tonic-gate 		}
1218*7c478bd9Sstevel@tonic-gate 		if (pp->p_lckcnt) {
1219*7c478bd9Sstevel@tonic-gate 			page_pp_unlock(pp, 0, 0);
1220*7c478bd9Sstevel@tonic-gate 		}
1221*7c478bd9Sstevel@tonic-gate 		page_unlock(pp);
1222*7c478bd9Sstevel@tonic-gate 	}
1223*7c478bd9Sstevel@tonic-gate }
1224*7c478bd9Sstevel@tonic-gate 
1225*7c478bd9Sstevel@tonic-gate /*
1226*7c478bd9Sstevel@tonic-gate  * We call this routine when we have removed all references to this
1227*7c478bd9Sstevel@tonic-gate  * amp.  This means all shmdt()s and the IPC_RMID have been done.
1228*7c478bd9Sstevel@tonic-gate  */
1229*7c478bd9Sstevel@tonic-gate static void
1230*7c478bd9Sstevel@tonic-gate shm_rm_amp(struct anon_map *amp, uint_t lckflag)
1231*7c478bd9Sstevel@tonic-gate {
1232*7c478bd9Sstevel@tonic-gate 	/*
1233*7c478bd9Sstevel@tonic-gate 	 * If we are finally deleting the
1234*7c478bd9Sstevel@tonic-gate 	 * shared memory, and if no one did
1235*7c478bd9Sstevel@tonic-gate 	 * the SHM_UNLOCK, we must do it now.
1236*7c478bd9Sstevel@tonic-gate 	 */
1237*7c478bd9Sstevel@tonic-gate 	shmem_unlock(amp, lckflag);
1238*7c478bd9Sstevel@tonic-gate 
1239*7c478bd9Sstevel@tonic-gate 	/*
1240*7c478bd9Sstevel@tonic-gate 	 * Free up the anon_map.
1241*7c478bd9Sstevel@tonic-gate 	 */
1242*7c478bd9Sstevel@tonic-gate 	lgrp_shm_policy_fini(amp, NULL);
1243*7c478bd9Sstevel@tonic-gate 	anon_free(amp->ahp, 0, amp->size);
1244*7c478bd9Sstevel@tonic-gate 	anon_unresv(amp->swresv);
1245*7c478bd9Sstevel@tonic-gate 	anonmap_free(amp);
1246*7c478bd9Sstevel@tonic-gate }
1247*7c478bd9Sstevel@tonic-gate 
1248*7c478bd9Sstevel@tonic-gate /*
1249*7c478bd9Sstevel@tonic-gate  * Return the shared memory id for the process's virtual address.
1250*7c478bd9Sstevel@tonic-gate  * Return SHMID_NONE if addr is not within a SysV shared memory segment.
1251*7c478bd9Sstevel@tonic-gate  * Return SHMID_FREE if addr's SysV shared memory segment's id has been freed.
1252*7c478bd9Sstevel@tonic-gate  *
1253*7c478bd9Sstevel@tonic-gate  * shmgetid() is called from code in /proc with the process locked but
1254*7c478bd9Sstevel@tonic-gate  * with pp->p_lock not held.  The address space lock is held, so we
1255*7c478bd9Sstevel@tonic-gate  * cannot grab pp->p_lock here due to lock-ordering constraints.
1256*7c478bd9Sstevel@tonic-gate  * Because of all this, modifications to the p_segacct list must only
1257*7c478bd9Sstevel@tonic-gate  * be made after calling prbarrier() to ensure the process is not locked.
1258*7c478bd9Sstevel@tonic-gate  * See shmdt() and sa_add(), above. shmgetid() may also be called on a
1259*7c478bd9Sstevel@tonic-gate  * thread's own process without the process locked.
1260*7c478bd9Sstevel@tonic-gate  */
1261*7c478bd9Sstevel@tonic-gate int
1262*7c478bd9Sstevel@tonic-gate shmgetid(proc_t *pp, caddr_t addr)
1263*7c478bd9Sstevel@tonic-gate {
1264*7c478bd9Sstevel@tonic-gate 	segacct_t *sap, template;
1265*7c478bd9Sstevel@tonic-gate 
1266*7c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_NOT_HELD(&pp->p_lock));
1267*7c478bd9Sstevel@tonic-gate 	ASSERT((pp->p_proc_flag & P_PR_LOCK) || pp == curproc);
1268*7c478bd9Sstevel@tonic-gate 
1269*7c478bd9Sstevel@tonic-gate 	if (pp->p_segacct == NULL)
1270*7c478bd9Sstevel@tonic-gate 		return (SHMID_NONE);
1271*7c478bd9Sstevel@tonic-gate 
1272*7c478bd9Sstevel@tonic-gate 	template.sa_addr = addr;
1273*7c478bd9Sstevel@tonic-gate 	template.sa_len = 0;
1274*7c478bd9Sstevel@tonic-gate 	if ((sap = avl_find(pp->p_segacct, &template, NULL)) == NULL)
1275*7c478bd9Sstevel@tonic-gate 		return (SHMID_NONE);
1276*7c478bd9Sstevel@tonic-gate 
1277*7c478bd9Sstevel@tonic-gate 	if (IPC_FREE(&sap->sa_id->shm_perm))
1278*7c478bd9Sstevel@tonic-gate 		return (SHMID_FREE);
1279*7c478bd9Sstevel@tonic-gate 
1280*7c478bd9Sstevel@tonic-gate 	return (sap->sa_id->shm_perm.ipc_id);
1281*7c478bd9Sstevel@tonic-gate }
1282