xref: /titanic_54/usr/src/uts/common/os/schedctl.c (revision 7c478bd95313f5f23a4c958a745db2134aa03244)
1*7c478bd9Sstevel@tonic-gate /*
2*7c478bd9Sstevel@tonic-gate  * CDDL HEADER START
3*7c478bd9Sstevel@tonic-gate  *
4*7c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*7c478bd9Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
6*7c478bd9Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
7*7c478bd9Sstevel@tonic-gate  * with the License.
8*7c478bd9Sstevel@tonic-gate  *
9*7c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10*7c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
11*7c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
12*7c478bd9Sstevel@tonic-gate  * and limitations under the License.
13*7c478bd9Sstevel@tonic-gate  *
14*7c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
15*7c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16*7c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
17*7c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
18*7c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
19*7c478bd9Sstevel@tonic-gate  *
20*7c478bd9Sstevel@tonic-gate  * CDDL HEADER END
21*7c478bd9Sstevel@tonic-gate  */
22*7c478bd9Sstevel@tonic-gate /*
23*7c478bd9Sstevel@tonic-gate  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24*7c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
25*7c478bd9Sstevel@tonic-gate  */
26*7c478bd9Sstevel@tonic-gate 
27*7c478bd9Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
28*7c478bd9Sstevel@tonic-gate 
29*7c478bd9Sstevel@tonic-gate #include <sys/types.h>
30*7c478bd9Sstevel@tonic-gate #include <sys/systm.h>
31*7c478bd9Sstevel@tonic-gate #include <sys/schedctl.h>
32*7c478bd9Sstevel@tonic-gate #include <sys/proc.h>
33*7c478bd9Sstevel@tonic-gate #include <sys/thread.h>
34*7c478bd9Sstevel@tonic-gate #include <sys/class.h>
35*7c478bd9Sstevel@tonic-gate #include <sys/cred.h>
36*7c478bd9Sstevel@tonic-gate #include <sys/kmem.h>
37*7c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h>
38*7c478bd9Sstevel@tonic-gate #include <sys/stack.h>
39*7c478bd9Sstevel@tonic-gate #include <sys/debug.h>
40*7c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h>
41*7c478bd9Sstevel@tonic-gate #include <sys/sobject.h>
42*7c478bd9Sstevel@tonic-gate #include <sys/door.h>
43*7c478bd9Sstevel@tonic-gate #include <sys/modctl.h>
44*7c478bd9Sstevel@tonic-gate #include <sys/syscall.h>
45*7c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h>
46*7c478bd9Sstevel@tonic-gate #include <sys/vmsystm.h>
47*7c478bd9Sstevel@tonic-gate #include <sys/mman.h>
48*7c478bd9Sstevel@tonic-gate #include <sys/vnode.h>
49*7c478bd9Sstevel@tonic-gate #include <sys/swap.h>
50*7c478bd9Sstevel@tonic-gate #include <sys/lwp.h>
51*7c478bd9Sstevel@tonic-gate #include <sys/bitmap.h>
52*7c478bd9Sstevel@tonic-gate #include <sys/atomic.h>
53*7c478bd9Sstevel@tonic-gate #include <sys/fcntl.h>
54*7c478bd9Sstevel@tonic-gate #include <vm/seg_kp.h>
55*7c478bd9Sstevel@tonic-gate #include <vm/seg_vn.h>
56*7c478bd9Sstevel@tonic-gate #include <vm/as.h>
57*7c478bd9Sstevel@tonic-gate #include <fs/fs_subr.h>
58*7c478bd9Sstevel@tonic-gate 
59*7c478bd9Sstevel@tonic-gate 
60*7c478bd9Sstevel@tonic-gate /*
61*7c478bd9Sstevel@tonic-gate  * Page handling structures.  This is set up as a list of per-page
62*7c478bd9Sstevel@tonic-gate  * control structures (sc_page_ctl), with p->p_pagep pointing to
63*7c478bd9Sstevel@tonic-gate  * the first.  The per-page structures point to the actual pages
64*7c478bd9Sstevel@tonic-gate  * and contain pointers to the user address for each mapped page.
65*7c478bd9Sstevel@tonic-gate  *
66*7c478bd9Sstevel@tonic-gate  * All data is protected by p->p_sc_lock.  Since this lock is
67*7c478bd9Sstevel@tonic-gate  * held while waiting for memory, schedctl_shared_alloc() should
68*7c478bd9Sstevel@tonic-gate  * not be called while holding p_lock.
69*7c478bd9Sstevel@tonic-gate  */
70*7c478bd9Sstevel@tonic-gate 
71*7c478bd9Sstevel@tonic-gate typedef struct sc_page_ctl {
72*7c478bd9Sstevel@tonic-gate 	struct sc_page_ctl *spc_next;
73*7c478bd9Sstevel@tonic-gate 	sc_shared_t	*spc_base;	/* base of kernel page */
74*7c478bd9Sstevel@tonic-gate 	sc_shared_t	*spc_end;	/* end of usable space */
75*7c478bd9Sstevel@tonic-gate 	ulong_t		*spc_map;	/* bitmap of allocated space on page */
76*7c478bd9Sstevel@tonic-gate 	size_t		spc_space;	/* amount of space on page */
77*7c478bd9Sstevel@tonic-gate 	caddr_t		spc_uaddr;	/* user-level address of the page */
78*7c478bd9Sstevel@tonic-gate 	struct anon_map	*spc_amp;	/* anonymous memory structure */
79*7c478bd9Sstevel@tonic-gate } sc_page_ctl_t;
80*7c478bd9Sstevel@tonic-gate 
81*7c478bd9Sstevel@tonic-gate static size_t	sc_pagesize;		/* size of usable space on page */
82*7c478bd9Sstevel@tonic-gate static size_t	sc_bitmap_len;		/* # of bits in allocation bitmap */
83*7c478bd9Sstevel@tonic-gate static size_t	sc_bitmap_words;	/* # of words in allocation bitmap */
84*7c478bd9Sstevel@tonic-gate 
85*7c478bd9Sstevel@tonic-gate /* Context ops */
86*7c478bd9Sstevel@tonic-gate static void	schedctl_save(sc_shared_t *);
87*7c478bd9Sstevel@tonic-gate static void	schedctl_restore(sc_shared_t *);
88*7c478bd9Sstevel@tonic-gate static void	schedctl_fork(kthread_t *, kthread_t *);
89*7c478bd9Sstevel@tonic-gate 
90*7c478bd9Sstevel@tonic-gate /* Functions for handling shared pages */
91*7c478bd9Sstevel@tonic-gate static int	schedctl_shared_alloc(sc_shared_t **, uintptr_t *);
92*7c478bd9Sstevel@tonic-gate static sc_page_ctl_t *schedctl_page_lookup(sc_shared_t *);
93*7c478bd9Sstevel@tonic-gate static int	schedctl_map(struct anon_map *, caddr_t *, caddr_t);
94*7c478bd9Sstevel@tonic-gate static int	schedctl_getpage(struct anon_map **, caddr_t *);
95*7c478bd9Sstevel@tonic-gate static void	schedctl_freepage(struct anon_map *, caddr_t);
96*7c478bd9Sstevel@tonic-gate 
97*7c478bd9Sstevel@tonic-gate /*
98*7c478bd9Sstevel@tonic-gate  * System call interface to scheduler activations.
99*7c478bd9Sstevel@tonic-gate  * This always operates on the current lwp.
100*7c478bd9Sstevel@tonic-gate  */
101*7c478bd9Sstevel@tonic-gate caddr_t
102*7c478bd9Sstevel@tonic-gate schedctl(void)
103*7c478bd9Sstevel@tonic-gate {
104*7c478bd9Sstevel@tonic-gate 	kthread_t	*t = curthread;
105*7c478bd9Sstevel@tonic-gate 	sc_shared_t	*ssp;
106*7c478bd9Sstevel@tonic-gate 	uintptr_t	uaddr;
107*7c478bd9Sstevel@tonic-gate 	int		error;
108*7c478bd9Sstevel@tonic-gate 
109*7c478bd9Sstevel@tonic-gate 	if (t->t_schedctl == NULL) {
110*7c478bd9Sstevel@tonic-gate 		/*
111*7c478bd9Sstevel@tonic-gate 		 * Allocate and initialize the shared structure.
112*7c478bd9Sstevel@tonic-gate 		 */
113*7c478bd9Sstevel@tonic-gate 		if ((error = schedctl_shared_alloc(&ssp, &uaddr)) != 0)
114*7c478bd9Sstevel@tonic-gate 			return ((caddr_t)(uintptr_t)set_errno(error));
115*7c478bd9Sstevel@tonic-gate 		bzero(ssp, sizeof (*ssp));
116*7c478bd9Sstevel@tonic-gate 
117*7c478bd9Sstevel@tonic-gate 		installctx(t, ssp, schedctl_save, schedctl_restore,
118*7c478bd9Sstevel@tonic-gate 		    schedctl_fork, NULL, NULL, NULL);
119*7c478bd9Sstevel@tonic-gate 
120*7c478bd9Sstevel@tonic-gate 		thread_lock(t);	/* protect against ts_tick and ts_update */
121*7c478bd9Sstevel@tonic-gate 		t->t_schedctl = ssp;
122*7c478bd9Sstevel@tonic-gate 		t->t_sc_uaddr = uaddr;
123*7c478bd9Sstevel@tonic-gate 		thread_unlock(t);
124*7c478bd9Sstevel@tonic-gate 	}
125*7c478bd9Sstevel@tonic-gate 
126*7c478bd9Sstevel@tonic-gate 	return ((caddr_t)t->t_sc_uaddr);
127*7c478bd9Sstevel@tonic-gate }
128*7c478bd9Sstevel@tonic-gate 
129*7c478bd9Sstevel@tonic-gate 
130*7c478bd9Sstevel@tonic-gate /*
131*7c478bd9Sstevel@tonic-gate  * Clean up scheduler activations state associated with an exiting
132*7c478bd9Sstevel@tonic-gate  * (or execing) lwp.  t is always the current thread.
133*7c478bd9Sstevel@tonic-gate  */
134*7c478bd9Sstevel@tonic-gate void
135*7c478bd9Sstevel@tonic-gate schedctl_lwp_cleanup(kthread_t *t)
136*7c478bd9Sstevel@tonic-gate {
137*7c478bd9Sstevel@tonic-gate 	sc_shared_t	*ssp = t->t_schedctl;
138*7c478bd9Sstevel@tonic-gate 	proc_t		*p = ttoproc(t);
139*7c478bd9Sstevel@tonic-gate 	sc_page_ctl_t	*pagep;
140*7c478bd9Sstevel@tonic-gate 	index_t		index;
141*7c478bd9Sstevel@tonic-gate 
142*7c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_NOT_HELD(&p->p_lock));
143*7c478bd9Sstevel@tonic-gate 
144*7c478bd9Sstevel@tonic-gate 	thread_lock(t);		/* protect against ts_tick and ts_update */
145*7c478bd9Sstevel@tonic-gate 	t->t_schedctl = NULL;
146*7c478bd9Sstevel@tonic-gate 	t->t_sc_uaddr = 0;
147*7c478bd9Sstevel@tonic-gate 	thread_unlock(t);
148*7c478bd9Sstevel@tonic-gate 
149*7c478bd9Sstevel@tonic-gate 	/*
150*7c478bd9Sstevel@tonic-gate 	 * Remove the context op to avoid the final call to
151*7c478bd9Sstevel@tonic-gate 	 * schedctl_save when switching away from this lwp.
152*7c478bd9Sstevel@tonic-gate 	 */
153*7c478bd9Sstevel@tonic-gate 	(void) removectx(t, ssp, schedctl_save, schedctl_restore,
154*7c478bd9Sstevel@tonic-gate 	    schedctl_fork, NULL, NULL, NULL);
155*7c478bd9Sstevel@tonic-gate 
156*7c478bd9Sstevel@tonic-gate 	/*
157*7c478bd9Sstevel@tonic-gate 	 * Do not unmap the shared page until the process exits.
158*7c478bd9Sstevel@tonic-gate 	 * User-level library code relies on this for adaptive mutex locking.
159*7c478bd9Sstevel@tonic-gate 	 */
160*7c478bd9Sstevel@tonic-gate 	mutex_enter(&p->p_sc_lock);
161*7c478bd9Sstevel@tonic-gate 	ssp->sc_state = SC_FREE;
162*7c478bd9Sstevel@tonic-gate 	pagep = schedctl_page_lookup(ssp);
163*7c478bd9Sstevel@tonic-gate 	index = (index_t)(ssp - pagep->spc_base);
164*7c478bd9Sstevel@tonic-gate 	BT_CLEAR(pagep->spc_map, index);
165*7c478bd9Sstevel@tonic-gate 	pagep->spc_space += sizeof (sc_shared_t);
166*7c478bd9Sstevel@tonic-gate 	mutex_exit(&p->p_sc_lock);
167*7c478bd9Sstevel@tonic-gate }
168*7c478bd9Sstevel@tonic-gate 
169*7c478bd9Sstevel@tonic-gate /*
170*7c478bd9Sstevel@tonic-gate  * Cleanup the list of schedctl shared pages for the process.
171*7c478bd9Sstevel@tonic-gate  * Called from exec() and exit() system calls.
172*7c478bd9Sstevel@tonic-gate  */
173*7c478bd9Sstevel@tonic-gate void
174*7c478bd9Sstevel@tonic-gate schedctl_proc_cleanup()
175*7c478bd9Sstevel@tonic-gate {
176*7c478bd9Sstevel@tonic-gate 	proc_t *p = curproc;
177*7c478bd9Sstevel@tonic-gate 	sc_page_ctl_t *pagep;
178*7c478bd9Sstevel@tonic-gate 	sc_page_ctl_t *next;
179*7c478bd9Sstevel@tonic-gate 
180*7c478bd9Sstevel@tonic-gate 	ASSERT(p->p_lwpcnt == 1);	/* we are single-threaded now */
181*7c478bd9Sstevel@tonic-gate 	ASSERT(curthread->t_schedctl == NULL);
182*7c478bd9Sstevel@tonic-gate 
183*7c478bd9Sstevel@tonic-gate 	/*
184*7c478bd9Sstevel@tonic-gate 	 * Since we are single-threaded, we don't have to hold p->p_sc_lock.
185*7c478bd9Sstevel@tonic-gate 	 */
186*7c478bd9Sstevel@tonic-gate 	pagep = p->p_pagep;
187*7c478bd9Sstevel@tonic-gate 	p->p_pagep = NULL;
188*7c478bd9Sstevel@tonic-gate 	while (pagep != NULL) {
189*7c478bd9Sstevel@tonic-gate 		ASSERT(pagep->spc_space == sc_pagesize);
190*7c478bd9Sstevel@tonic-gate 		next = pagep->spc_next;
191*7c478bd9Sstevel@tonic-gate 		/*
192*7c478bd9Sstevel@tonic-gate 		 * Unmap the user space and free the mapping structure.
193*7c478bd9Sstevel@tonic-gate 		 */
194*7c478bd9Sstevel@tonic-gate 		(void) as_unmap(p->p_as, pagep->spc_uaddr, PAGESIZE);
195*7c478bd9Sstevel@tonic-gate 		schedctl_freepage(pagep->spc_amp, (caddr_t)(pagep->spc_base));
196*7c478bd9Sstevel@tonic-gate 		kmem_free(pagep->spc_map, sizeof (ulong_t) * sc_bitmap_words);
197*7c478bd9Sstevel@tonic-gate 		kmem_free(pagep, sizeof (sc_page_ctl_t));
198*7c478bd9Sstevel@tonic-gate 		pagep = next;
199*7c478bd9Sstevel@tonic-gate 	}
200*7c478bd9Sstevel@tonic-gate }
201*7c478bd9Sstevel@tonic-gate 
202*7c478bd9Sstevel@tonic-gate /*
203*7c478bd9Sstevel@tonic-gate  * Called by resume just before switching away from the current thread.
204*7c478bd9Sstevel@tonic-gate  * Save new thread state.
205*7c478bd9Sstevel@tonic-gate  */
206*7c478bd9Sstevel@tonic-gate void
207*7c478bd9Sstevel@tonic-gate schedctl_save(sc_shared_t *ssp)
208*7c478bd9Sstevel@tonic-gate {
209*7c478bd9Sstevel@tonic-gate 	ssp->sc_state = curthread->t_state;
210*7c478bd9Sstevel@tonic-gate }
211*7c478bd9Sstevel@tonic-gate 
212*7c478bd9Sstevel@tonic-gate 
213*7c478bd9Sstevel@tonic-gate /*
214*7c478bd9Sstevel@tonic-gate  * Called by resume after switching to the current thread.
215*7c478bd9Sstevel@tonic-gate  * Save new thread state and CPU.
216*7c478bd9Sstevel@tonic-gate  */
217*7c478bd9Sstevel@tonic-gate void
218*7c478bd9Sstevel@tonic-gate schedctl_restore(sc_shared_t *ssp)
219*7c478bd9Sstevel@tonic-gate {
220*7c478bd9Sstevel@tonic-gate 	ssp->sc_state = SC_ONPROC;
221*7c478bd9Sstevel@tonic-gate 	ssp->sc_cpu = CPU->cpu_id;
222*7c478bd9Sstevel@tonic-gate }
223*7c478bd9Sstevel@tonic-gate 
224*7c478bd9Sstevel@tonic-gate 
225*7c478bd9Sstevel@tonic-gate /*
226*7c478bd9Sstevel@tonic-gate  * On fork, remove inherited mappings from the child's address space.
227*7c478bd9Sstevel@tonic-gate  * The child's threads must call schedctl() to get new shared mappings.
228*7c478bd9Sstevel@tonic-gate  */
229*7c478bd9Sstevel@tonic-gate void
230*7c478bd9Sstevel@tonic-gate schedctl_fork(kthread_t *pt, kthread_t *ct)
231*7c478bd9Sstevel@tonic-gate {
232*7c478bd9Sstevel@tonic-gate 	proc_t *pp = ttoproc(pt);
233*7c478bd9Sstevel@tonic-gate 	proc_t *cp = ttoproc(ct);
234*7c478bd9Sstevel@tonic-gate 	sc_page_ctl_t *pagep;
235*7c478bd9Sstevel@tonic-gate 
236*7c478bd9Sstevel@tonic-gate 	ASSERT(ct->t_schedctl == NULL);
237*7c478bd9Sstevel@tonic-gate 
238*7c478bd9Sstevel@tonic-gate 	/*
239*7c478bd9Sstevel@tonic-gate 	 * Do this only once, whether we are doing fork1() or forkall().
240*7c478bd9Sstevel@tonic-gate 	 * Don't do it at all if the child process is a child of vfork()
241*7c478bd9Sstevel@tonic-gate 	 * because a child of vfork() borrows the parent's address space.
242*7c478bd9Sstevel@tonic-gate 	 */
243*7c478bd9Sstevel@tonic-gate 	if (pt != curthread || (cp->p_flag & SVFORK))
244*7c478bd9Sstevel@tonic-gate 		return;
245*7c478bd9Sstevel@tonic-gate 
246*7c478bd9Sstevel@tonic-gate 	mutex_enter(&pp->p_sc_lock);
247*7c478bd9Sstevel@tonic-gate 	for (pagep = pp->p_pagep; pagep != NULL; pagep = pagep->spc_next)
248*7c478bd9Sstevel@tonic-gate 		(void) as_unmap(cp->p_as, pagep->spc_uaddr, PAGESIZE);
249*7c478bd9Sstevel@tonic-gate 	mutex_exit(&pp->p_sc_lock);
250*7c478bd9Sstevel@tonic-gate }
251*7c478bd9Sstevel@tonic-gate 
252*7c478bd9Sstevel@tonic-gate /*
253*7c478bd9Sstevel@tonic-gate  * Returns non-zero if the specified thread shouldn't be preempted at this time.
254*7c478bd9Sstevel@tonic-gate  * Called by ts_preempt, ts_tick, and ts_update.
255*7c478bd9Sstevel@tonic-gate  */
256*7c478bd9Sstevel@tonic-gate int
257*7c478bd9Sstevel@tonic-gate schedctl_get_nopreempt(kthread_t *t)
258*7c478bd9Sstevel@tonic-gate {
259*7c478bd9Sstevel@tonic-gate 	ASSERT(THREAD_LOCK_HELD(t));
260*7c478bd9Sstevel@tonic-gate 	return (t->t_schedctl->sc_preemptctl.sc_nopreempt);
261*7c478bd9Sstevel@tonic-gate }
262*7c478bd9Sstevel@tonic-gate 
263*7c478bd9Sstevel@tonic-gate 
264*7c478bd9Sstevel@tonic-gate /*
265*7c478bd9Sstevel@tonic-gate  * Sets the value of the nopreempt field for the specified thread.
266*7c478bd9Sstevel@tonic-gate  * Called by ts_preempt to clear the field on preemption.
267*7c478bd9Sstevel@tonic-gate  */
268*7c478bd9Sstevel@tonic-gate void
269*7c478bd9Sstevel@tonic-gate schedctl_set_nopreempt(kthread_t *t, short val)
270*7c478bd9Sstevel@tonic-gate {
271*7c478bd9Sstevel@tonic-gate 	ASSERT(THREAD_LOCK_HELD(t));
272*7c478bd9Sstevel@tonic-gate 	t->t_schedctl->sc_preemptctl.sc_nopreempt = val;
273*7c478bd9Sstevel@tonic-gate }
274*7c478bd9Sstevel@tonic-gate 
275*7c478bd9Sstevel@tonic-gate 
276*7c478bd9Sstevel@tonic-gate /*
277*7c478bd9Sstevel@tonic-gate  * Sets the value of the yield field for the specified thread.  Called by
278*7c478bd9Sstevel@tonic-gate  * ts_preempt and ts_tick to set the field, and ts_yield to clear it.
279*7c478bd9Sstevel@tonic-gate  * The kernel never looks at this field so we don't need a schedctl_get_yield
280*7c478bd9Sstevel@tonic-gate  * function.
281*7c478bd9Sstevel@tonic-gate  */
282*7c478bd9Sstevel@tonic-gate void
283*7c478bd9Sstevel@tonic-gate schedctl_set_yield(kthread_t *t, short val)
284*7c478bd9Sstevel@tonic-gate {
285*7c478bd9Sstevel@tonic-gate 	ASSERT(THREAD_LOCK_HELD(t));
286*7c478bd9Sstevel@tonic-gate 	t->t_schedctl->sc_preemptctl.sc_yield = val;
287*7c478bd9Sstevel@tonic-gate }
288*7c478bd9Sstevel@tonic-gate 
289*7c478bd9Sstevel@tonic-gate 
290*7c478bd9Sstevel@tonic-gate /*
291*7c478bd9Sstevel@tonic-gate  * Returns non-zero if the specified thread has requested that all
292*7c478bd9Sstevel@tonic-gate  * signals be blocked.  Called by signal-related code that tests
293*7c478bd9Sstevel@tonic-gate  * the signal mask of a thread that may not be the current thread
294*7c478bd9Sstevel@tonic-gate  * and where the process's p_lock cannot be acquired.
295*7c478bd9Sstevel@tonic-gate  */
296*7c478bd9Sstevel@tonic-gate int
297*7c478bd9Sstevel@tonic-gate schedctl_sigblock(kthread_t *t)
298*7c478bd9Sstevel@tonic-gate {
299*7c478bd9Sstevel@tonic-gate 	sc_shared_t *tdp = t->t_schedctl;
300*7c478bd9Sstevel@tonic-gate 
301*7c478bd9Sstevel@tonic-gate 	if (tdp)
302*7c478bd9Sstevel@tonic-gate 		return (tdp->sc_sigblock);
303*7c478bd9Sstevel@tonic-gate 	return (0);
304*7c478bd9Sstevel@tonic-gate }
305*7c478bd9Sstevel@tonic-gate 
306*7c478bd9Sstevel@tonic-gate 
307*7c478bd9Sstevel@tonic-gate /*
308*7c478bd9Sstevel@tonic-gate  * If the sc_sigblock field is set for the specified thread, set
309*7c478bd9Sstevel@tonic-gate  * its signal mask to block all maskable signals, then clear the
310*7c478bd9Sstevel@tonic-gate  * sc_sigblock field.  This finishes what user-level code requested
311*7c478bd9Sstevel@tonic-gate  * to be done when it set tdp->sc_shared->sc_sigblock non-zero.
312*7c478bd9Sstevel@tonic-gate  * Called by signal-related code that holds the process's p_lock.
313*7c478bd9Sstevel@tonic-gate  */
314*7c478bd9Sstevel@tonic-gate void
315*7c478bd9Sstevel@tonic-gate schedctl_finish_sigblock(kthread_t *t)
316*7c478bd9Sstevel@tonic-gate {
317*7c478bd9Sstevel@tonic-gate 	sc_shared_t *tdp = t->t_schedctl;
318*7c478bd9Sstevel@tonic-gate 
319*7c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock));
320*7c478bd9Sstevel@tonic-gate 
321*7c478bd9Sstevel@tonic-gate 	if (tdp && tdp->sc_sigblock) {
322*7c478bd9Sstevel@tonic-gate 		t->t_hold.__sigbits[0] = FILLSET0 & ~CANTMASK0;
323*7c478bd9Sstevel@tonic-gate 		t->t_hold.__sigbits[1] = FILLSET1 & ~CANTMASK1;
324*7c478bd9Sstevel@tonic-gate 		tdp->sc_sigblock = 0;
325*7c478bd9Sstevel@tonic-gate 	}
326*7c478bd9Sstevel@tonic-gate }
327*7c478bd9Sstevel@tonic-gate 
328*7c478bd9Sstevel@tonic-gate 
329*7c478bd9Sstevel@tonic-gate /*
330*7c478bd9Sstevel@tonic-gate  * Return non-zero if the current thread has declared that
331*7c478bd9Sstevel@tonic-gate  * it is calling into the kernel to park, else return zero.
332*7c478bd9Sstevel@tonic-gate  */
333*7c478bd9Sstevel@tonic-gate int
334*7c478bd9Sstevel@tonic-gate schedctl_is_park()
335*7c478bd9Sstevel@tonic-gate {
336*7c478bd9Sstevel@tonic-gate 	sc_shared_t *tdp = curthread->t_schedctl;
337*7c478bd9Sstevel@tonic-gate 
338*7c478bd9Sstevel@tonic-gate 	if (tdp)
339*7c478bd9Sstevel@tonic-gate 		return (tdp->sc_park);
340*7c478bd9Sstevel@tonic-gate 	/*
341*7c478bd9Sstevel@tonic-gate 	 * If we're here and there is no shared memory (how could
342*7c478bd9Sstevel@tonic-gate 	 * that happen?) then just assume we really are here to park.
343*7c478bd9Sstevel@tonic-gate 	 */
344*7c478bd9Sstevel@tonic-gate 	return (1);
345*7c478bd9Sstevel@tonic-gate }
346*7c478bd9Sstevel@tonic-gate 
347*7c478bd9Sstevel@tonic-gate 
348*7c478bd9Sstevel@tonic-gate /*
349*7c478bd9Sstevel@tonic-gate  * Clear the shared sc_park flag on return from parking in the kernel.
350*7c478bd9Sstevel@tonic-gate  */
351*7c478bd9Sstevel@tonic-gate void
352*7c478bd9Sstevel@tonic-gate schedctl_unpark()
353*7c478bd9Sstevel@tonic-gate {
354*7c478bd9Sstevel@tonic-gate 	sc_shared_t *tdp = curthread->t_schedctl;
355*7c478bd9Sstevel@tonic-gate 
356*7c478bd9Sstevel@tonic-gate 	if (tdp)
357*7c478bd9Sstevel@tonic-gate 		tdp->sc_park = 0;
358*7c478bd9Sstevel@tonic-gate }
359*7c478bd9Sstevel@tonic-gate 
360*7c478bd9Sstevel@tonic-gate 
361*7c478bd9Sstevel@tonic-gate /*
362*7c478bd9Sstevel@tonic-gate  * Page handling code.
363*7c478bd9Sstevel@tonic-gate  */
364*7c478bd9Sstevel@tonic-gate 
365*7c478bd9Sstevel@tonic-gate void
366*7c478bd9Sstevel@tonic-gate schedctl_init()
367*7c478bd9Sstevel@tonic-gate {
368*7c478bd9Sstevel@tonic-gate 	/*
369*7c478bd9Sstevel@tonic-gate 	 * Amount of page that can hold sc_shared_t structures.  If
370*7c478bd9Sstevel@tonic-gate 	 * sizeof (sc_shared_t) is a power of 2, this should just be
371*7c478bd9Sstevel@tonic-gate 	 * PAGESIZE.
372*7c478bd9Sstevel@tonic-gate 	 */
373*7c478bd9Sstevel@tonic-gate 	sc_pagesize = PAGESIZE - (PAGESIZE % sizeof (sc_shared_t));
374*7c478bd9Sstevel@tonic-gate 
375*7c478bd9Sstevel@tonic-gate 	/*
376*7c478bd9Sstevel@tonic-gate 	 * Allocation bitmap is one bit per struct on a page.
377*7c478bd9Sstevel@tonic-gate 	 */
378*7c478bd9Sstevel@tonic-gate 	sc_bitmap_len = sc_pagesize / sizeof (sc_shared_t);
379*7c478bd9Sstevel@tonic-gate 	sc_bitmap_words = howmany(sc_bitmap_len, BT_NBIPUL);
380*7c478bd9Sstevel@tonic-gate }
381*7c478bd9Sstevel@tonic-gate 
382*7c478bd9Sstevel@tonic-gate int
383*7c478bd9Sstevel@tonic-gate schedctl_shared_alloc(sc_shared_t **kaddrp, uintptr_t *uaddrp)
384*7c478bd9Sstevel@tonic-gate {
385*7c478bd9Sstevel@tonic-gate 	proc_t		*p = curproc;
386*7c478bd9Sstevel@tonic-gate 	sc_page_ctl_t	*pagep;
387*7c478bd9Sstevel@tonic-gate 	sc_shared_t	*ssp;
388*7c478bd9Sstevel@tonic-gate 	caddr_t		base;
389*7c478bd9Sstevel@tonic-gate 	index_t		index;
390*7c478bd9Sstevel@tonic-gate 	int		error;
391*7c478bd9Sstevel@tonic-gate 
392*7c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_NOT_HELD(&p->p_lock));
393*7c478bd9Sstevel@tonic-gate 	mutex_enter(&p->p_sc_lock);
394*7c478bd9Sstevel@tonic-gate 
395*7c478bd9Sstevel@tonic-gate 	/*
396*7c478bd9Sstevel@tonic-gate 	 * Try to find space for the new data in existing pages
397*7c478bd9Sstevel@tonic-gate 	 * within the process's list of shared pages.
398*7c478bd9Sstevel@tonic-gate 	 */
399*7c478bd9Sstevel@tonic-gate 	for (pagep = p->p_pagep; pagep != NULL; pagep = pagep->spc_next)
400*7c478bd9Sstevel@tonic-gate 		if (pagep->spc_space != 0)
401*7c478bd9Sstevel@tonic-gate 			break;
402*7c478bd9Sstevel@tonic-gate 
403*7c478bd9Sstevel@tonic-gate 	if (pagep != NULL)
404*7c478bd9Sstevel@tonic-gate 		base = pagep->spc_uaddr;
405*7c478bd9Sstevel@tonic-gate 	else {
406*7c478bd9Sstevel@tonic-gate 		struct anon_map *amp;
407*7c478bd9Sstevel@tonic-gate 		caddr_t kaddr;
408*7c478bd9Sstevel@tonic-gate 
409*7c478bd9Sstevel@tonic-gate 		/*
410*7c478bd9Sstevel@tonic-gate 		 * No room, need to allocate a new page.  Also set up
411*7c478bd9Sstevel@tonic-gate 		 * a mapping to the kernel address space for the new
412*7c478bd9Sstevel@tonic-gate 		 * page and lock it in memory.
413*7c478bd9Sstevel@tonic-gate 		 */
414*7c478bd9Sstevel@tonic-gate 		if ((error = schedctl_getpage(&amp, &kaddr)) != 0) {
415*7c478bd9Sstevel@tonic-gate 			mutex_exit(&p->p_sc_lock);
416*7c478bd9Sstevel@tonic-gate 			return (error);
417*7c478bd9Sstevel@tonic-gate 		}
418*7c478bd9Sstevel@tonic-gate 		if ((error = schedctl_map(amp, &base, kaddr)) != 0) {
419*7c478bd9Sstevel@tonic-gate 			schedctl_freepage(amp, kaddr);
420*7c478bd9Sstevel@tonic-gate 			mutex_exit(&p->p_sc_lock);
421*7c478bd9Sstevel@tonic-gate 			return (error);
422*7c478bd9Sstevel@tonic-gate 		}
423*7c478bd9Sstevel@tonic-gate 
424*7c478bd9Sstevel@tonic-gate 		/*
425*7c478bd9Sstevel@tonic-gate 		 * Allocate and initialize the page control structure.
426*7c478bd9Sstevel@tonic-gate 		 */
427*7c478bd9Sstevel@tonic-gate 		pagep = kmem_alloc(sizeof (sc_page_ctl_t), KM_SLEEP);
428*7c478bd9Sstevel@tonic-gate 		pagep->spc_amp = amp;
429*7c478bd9Sstevel@tonic-gate 		pagep->spc_base = (sc_shared_t *)kaddr;
430*7c478bd9Sstevel@tonic-gate 		pagep->spc_end = (sc_shared_t *)(kaddr + sc_pagesize);
431*7c478bd9Sstevel@tonic-gate 		pagep->spc_uaddr = base;
432*7c478bd9Sstevel@tonic-gate 
433*7c478bd9Sstevel@tonic-gate 		pagep->spc_map = kmem_zalloc(sizeof (ulong_t) * sc_bitmap_words,
434*7c478bd9Sstevel@tonic-gate 		    KM_SLEEP);
435*7c478bd9Sstevel@tonic-gate 		pagep->spc_space = sc_pagesize;
436*7c478bd9Sstevel@tonic-gate 
437*7c478bd9Sstevel@tonic-gate 		pagep->spc_next = p->p_pagep;
438*7c478bd9Sstevel@tonic-gate 		p->p_pagep = pagep;
439*7c478bd9Sstevel@tonic-gate 	}
440*7c478bd9Sstevel@tonic-gate 
441*7c478bd9Sstevel@tonic-gate 	/*
442*7c478bd9Sstevel@tonic-gate 	 * Got a page, now allocate space for the data.  There should
443*7c478bd9Sstevel@tonic-gate 	 * be space unless something's wrong.
444*7c478bd9Sstevel@tonic-gate 	 */
445*7c478bd9Sstevel@tonic-gate 	ASSERT(pagep != NULL && pagep->spc_space >= sizeof (sc_shared_t));
446*7c478bd9Sstevel@tonic-gate 	index = bt_availbit(pagep->spc_map, sc_bitmap_len);
447*7c478bd9Sstevel@tonic-gate 	ASSERT(index != -1);
448*7c478bd9Sstevel@tonic-gate 
449*7c478bd9Sstevel@tonic-gate 	/*
450*7c478bd9Sstevel@tonic-gate 	 * Get location with pointer arithmetic.  spc_base is of type
451*7c478bd9Sstevel@tonic-gate 	 * sc_shared_t *.  Mark as allocated.
452*7c478bd9Sstevel@tonic-gate 	 */
453*7c478bd9Sstevel@tonic-gate 	ssp = pagep->spc_base + index;
454*7c478bd9Sstevel@tonic-gate 	BT_SET(pagep->spc_map, index);
455*7c478bd9Sstevel@tonic-gate 	pagep->spc_space -= sizeof (sc_shared_t);
456*7c478bd9Sstevel@tonic-gate 
457*7c478bd9Sstevel@tonic-gate 	mutex_exit(&p->p_sc_lock);
458*7c478bd9Sstevel@tonic-gate 
459*7c478bd9Sstevel@tonic-gate 	/*
460*7c478bd9Sstevel@tonic-gate 	 * Return kernel and user addresses.
461*7c478bd9Sstevel@tonic-gate 	 */
462*7c478bd9Sstevel@tonic-gate 	*kaddrp = ssp;
463*7c478bd9Sstevel@tonic-gate 	*uaddrp = (uintptr_t)base + ((uintptr_t)ssp & PAGEOFFSET);
464*7c478bd9Sstevel@tonic-gate 	return (0);
465*7c478bd9Sstevel@tonic-gate }
466*7c478bd9Sstevel@tonic-gate 
467*7c478bd9Sstevel@tonic-gate 
468*7c478bd9Sstevel@tonic-gate /*
469*7c478bd9Sstevel@tonic-gate  * Find the page control structure corresponding to a kernel address.
470*7c478bd9Sstevel@tonic-gate  */
471*7c478bd9Sstevel@tonic-gate static sc_page_ctl_t *
472*7c478bd9Sstevel@tonic-gate schedctl_page_lookup(sc_shared_t *ssp)
473*7c478bd9Sstevel@tonic-gate {
474*7c478bd9Sstevel@tonic-gate 	proc_t *p = curproc;
475*7c478bd9Sstevel@tonic-gate 	sc_page_ctl_t *pagep;
476*7c478bd9Sstevel@tonic-gate 
477*7c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&p->p_sc_lock));
478*7c478bd9Sstevel@tonic-gate 	for (pagep = p->p_pagep; pagep != NULL; pagep = pagep->spc_next) {
479*7c478bd9Sstevel@tonic-gate 		if (ssp >= pagep->spc_base && ssp < pagep->spc_end)
480*7c478bd9Sstevel@tonic-gate 			return (pagep);
481*7c478bd9Sstevel@tonic-gate 	}
482*7c478bd9Sstevel@tonic-gate 	return (NULL);		/* This "can't happen".  Should we panic? */
483*7c478bd9Sstevel@tonic-gate }
484*7c478bd9Sstevel@tonic-gate 
485*7c478bd9Sstevel@tonic-gate 
486*7c478bd9Sstevel@tonic-gate /*
487*7c478bd9Sstevel@tonic-gate  * This function is called when a page needs to be mapped into a
488*7c478bd9Sstevel@tonic-gate  * process's address space.  Allocate the user address space and
489*7c478bd9Sstevel@tonic-gate  * set up the mapping to the page.  Assumes the page has already
490*7c478bd9Sstevel@tonic-gate  * been allocated and locked in memory via schedctl_getpage.
491*7c478bd9Sstevel@tonic-gate  */
492*7c478bd9Sstevel@tonic-gate static int
493*7c478bd9Sstevel@tonic-gate schedctl_map(struct anon_map *amp, caddr_t *uaddrp, caddr_t kaddr)
494*7c478bd9Sstevel@tonic-gate {
495*7c478bd9Sstevel@tonic-gate 	caddr_t addr;
496*7c478bd9Sstevel@tonic-gate 	struct as *as = curproc->p_as;
497*7c478bd9Sstevel@tonic-gate 	struct segvn_crargs vn_a;
498*7c478bd9Sstevel@tonic-gate 	int error;
499*7c478bd9Sstevel@tonic-gate 
500*7c478bd9Sstevel@tonic-gate 	as_rangelock(as);
501*7c478bd9Sstevel@tonic-gate 	/* pass address of kernel mapping as offset to avoid VAC conflicts */
502*7c478bd9Sstevel@tonic-gate 	map_addr(&addr, PAGESIZE, (offset_t)(uintptr_t)kaddr, 1, 0);
503*7c478bd9Sstevel@tonic-gate 	if (addr == NULL) {
504*7c478bd9Sstevel@tonic-gate 		as_rangeunlock(as);
505*7c478bd9Sstevel@tonic-gate 		return (ENOMEM);
506*7c478bd9Sstevel@tonic-gate 	}
507*7c478bd9Sstevel@tonic-gate 
508*7c478bd9Sstevel@tonic-gate 	/*
509*7c478bd9Sstevel@tonic-gate 	 * Use segvn to set up the mapping to the page.
510*7c478bd9Sstevel@tonic-gate 	 */
511*7c478bd9Sstevel@tonic-gate 	vn_a.vp = NULL;
512*7c478bd9Sstevel@tonic-gate 	vn_a.offset = 0;
513*7c478bd9Sstevel@tonic-gate 	vn_a.cred = NULL;
514*7c478bd9Sstevel@tonic-gate 	vn_a.type = MAP_SHARED;
515*7c478bd9Sstevel@tonic-gate 	vn_a.prot = vn_a.maxprot = PROT_ALL;
516*7c478bd9Sstevel@tonic-gate 	vn_a.flags = 0;
517*7c478bd9Sstevel@tonic-gate 	vn_a.amp = amp;
518*7c478bd9Sstevel@tonic-gate 	vn_a.szc = 0;
519*7c478bd9Sstevel@tonic-gate 	vn_a.lgrp_mem_policy_flags = 0;
520*7c478bd9Sstevel@tonic-gate 	error = as_map(as, addr, PAGESIZE, segvn_create, &vn_a);
521*7c478bd9Sstevel@tonic-gate 	as_rangeunlock(as);
522*7c478bd9Sstevel@tonic-gate 
523*7c478bd9Sstevel@tonic-gate 	if (error)
524*7c478bd9Sstevel@tonic-gate 		return (error);
525*7c478bd9Sstevel@tonic-gate 
526*7c478bd9Sstevel@tonic-gate 	*uaddrp = addr;
527*7c478bd9Sstevel@tonic-gate 	return (0);
528*7c478bd9Sstevel@tonic-gate }
529*7c478bd9Sstevel@tonic-gate 
530*7c478bd9Sstevel@tonic-gate 
531*7c478bd9Sstevel@tonic-gate /*
532*7c478bd9Sstevel@tonic-gate  * Allocate a new page from anonymous memory.  Also, create a kernel
533*7c478bd9Sstevel@tonic-gate  * mapping to the page and lock the page in memory.
534*7c478bd9Sstevel@tonic-gate  */
535*7c478bd9Sstevel@tonic-gate static int
536*7c478bd9Sstevel@tonic-gate schedctl_getpage(struct anon_map **newamp, caddr_t *newaddr)
537*7c478bd9Sstevel@tonic-gate {
538*7c478bd9Sstevel@tonic-gate 	struct anon_map *amp;
539*7c478bd9Sstevel@tonic-gate 	caddr_t kaddr;
540*7c478bd9Sstevel@tonic-gate 
541*7c478bd9Sstevel@tonic-gate 	/*
542*7c478bd9Sstevel@tonic-gate 	 * Set up anonymous memory struct.  No swap reservation is
543*7c478bd9Sstevel@tonic-gate 	 * needed since the page will be locked into memory.
544*7c478bd9Sstevel@tonic-gate 	 */
545*7c478bd9Sstevel@tonic-gate 	amp = anonmap_alloc(PAGESIZE, PAGESIZE);
546*7c478bd9Sstevel@tonic-gate 
547*7c478bd9Sstevel@tonic-gate 	/*
548*7c478bd9Sstevel@tonic-gate 	 * Allocate the page.
549*7c478bd9Sstevel@tonic-gate 	 */
550*7c478bd9Sstevel@tonic-gate 	kaddr = segkp_get_withanonmap(segkp, PAGESIZE, KPD_LOCKED | KPD_ZERO,
551*7c478bd9Sstevel@tonic-gate 	    amp);
552*7c478bd9Sstevel@tonic-gate 	if (kaddr == NULL) {
553*7c478bd9Sstevel@tonic-gate 		amp->refcnt--;
554*7c478bd9Sstevel@tonic-gate 		anonmap_free(amp);
555*7c478bd9Sstevel@tonic-gate 		return (ENOMEM);
556*7c478bd9Sstevel@tonic-gate 	}
557*7c478bd9Sstevel@tonic-gate 
558*7c478bd9Sstevel@tonic-gate 	/*
559*7c478bd9Sstevel@tonic-gate 	 * The page is left SE_SHARED locked so that it won't be
560*7c478bd9Sstevel@tonic-gate 	 * paged out or relocated (KPD_LOCKED above).
561*7c478bd9Sstevel@tonic-gate 	 */
562*7c478bd9Sstevel@tonic-gate 
563*7c478bd9Sstevel@tonic-gate 	*newamp = amp;
564*7c478bd9Sstevel@tonic-gate 	*newaddr = kaddr;
565*7c478bd9Sstevel@tonic-gate 	return (0);
566*7c478bd9Sstevel@tonic-gate }
567*7c478bd9Sstevel@tonic-gate 
568*7c478bd9Sstevel@tonic-gate 
569*7c478bd9Sstevel@tonic-gate /*
570*7c478bd9Sstevel@tonic-gate  * Take the necessary steps to allow a page to be released.
571*7c478bd9Sstevel@tonic-gate  * This is called when the process is doing exit() or exec().
572*7c478bd9Sstevel@tonic-gate  * There should be no accesses to the page after this.
573*7c478bd9Sstevel@tonic-gate  * The kernel mapping of the page is released and the page is unlocked.
574*7c478bd9Sstevel@tonic-gate  */
575*7c478bd9Sstevel@tonic-gate static void
576*7c478bd9Sstevel@tonic-gate schedctl_freepage(struct anon_map *amp, caddr_t kaddr)
577*7c478bd9Sstevel@tonic-gate {
578*7c478bd9Sstevel@tonic-gate 	/*
579*7c478bd9Sstevel@tonic-gate 	 * Release the lock on the page and remove the kernel mapping.
580*7c478bd9Sstevel@tonic-gate 	 */
581*7c478bd9Sstevel@tonic-gate 	ANON_LOCK_ENTER(&amp->a_rwlock, RW_WRITER);
582*7c478bd9Sstevel@tonic-gate 	segkp_release(segkp, kaddr);
583*7c478bd9Sstevel@tonic-gate 
584*7c478bd9Sstevel@tonic-gate 	/*
585*7c478bd9Sstevel@tonic-gate 	 * Decrement the refcnt so the anon_map structure will be freed.
586*7c478bd9Sstevel@tonic-gate 	 */
587*7c478bd9Sstevel@tonic-gate 	if (--amp->refcnt == 0) {
588*7c478bd9Sstevel@tonic-gate 		/*
589*7c478bd9Sstevel@tonic-gate 		 * The current process no longer has the page mapped, so
590*7c478bd9Sstevel@tonic-gate 		 * we have to free everything rather than letting as_free
591*7c478bd9Sstevel@tonic-gate 		 * do the work.
592*7c478bd9Sstevel@tonic-gate 		 */
593*7c478bd9Sstevel@tonic-gate 		anon_free(amp->ahp, 0, PAGESIZE);
594*7c478bd9Sstevel@tonic-gate 		ANON_LOCK_EXIT(&amp->a_rwlock);
595*7c478bd9Sstevel@tonic-gate 		anonmap_free(amp);
596*7c478bd9Sstevel@tonic-gate 	} else {
597*7c478bd9Sstevel@tonic-gate 		ANON_LOCK_EXIT(&amp->a_rwlock);
598*7c478bd9Sstevel@tonic-gate 	}
599*7c478bd9Sstevel@tonic-gate }
600