xref: /illumos-gate/usr/src/uts/common/os/schedctl.c (revision 5a469116729183a46e77dc0620955bbde58d93f7)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  * Copyright 2021 Joyent, Inc.
26  * Copyright 2021 Oxide Computer Company
27  */
28 
29 #include <sys/types.h>
30 #include <sys/systm.h>
31 #include <sys/schedctl.h>
32 #include <sys/proc.h>
33 #include <sys/thread.h>
34 #include <sys/class.h>
35 #include <sys/cred.h>
36 #include <sys/kmem.h>
37 #include <sys/cmn_err.h>
38 #include <sys/stack.h>
39 #include <sys/debug.h>
40 #include <sys/cpuvar.h>
41 #include <sys/sobject.h>
42 #include <sys/door.h>
43 #include <sys/modctl.h>
44 #include <sys/syscall.h>
45 #include <sys/sysmacros.h>
46 #include <sys/vmsystm.h>
47 #include <sys/mman.h>
48 #include <sys/vnode.h>
49 #include <sys/swap.h>
50 #include <sys/lwp.h>
51 #include <sys/bitmap.h>
52 #include <sys/atomic.h>
53 #include <sys/fcntl.h>
54 #include <vm/seg_kp.h>
55 #include <vm/seg_vn.h>
56 #include <vm/as.h>
57 #include <fs/fs_subr.h>
58 
59 /*
60  * Page handling structures.  This is set up as a list of per-page
61  * control structures (sc_page_ctl), with p->p_pagep pointing to
62  * the first.  The per-page structures point to the actual pages
63  * and contain pointers to the user address for each mapped page.
64  *
65  * All data is protected by p->p_sc_lock.  Since this lock is
66  * held while waiting for memory, schedctl_shared_alloc() should
67  * not be called while holding p_lock.
68  */
69 
70 typedef struct sc_page_ctl {
71 	struct sc_page_ctl *spc_next;
72 	sc_shared_t	*spc_base;	/* base of kernel page */
73 	sc_shared_t	*spc_end;	/* end of usable space */
74 	ulong_t		*spc_map;	/* bitmap of allocated space on page */
75 	size_t		spc_space;	/* amount of space on page */
76 	caddr_t		spc_uaddr;	/* user-level address of the page */
77 	struct anon_map	*spc_amp;	/* anonymous memory structure */
78 } sc_page_ctl_t;
79 
80 static size_t	sc_pagesize;		/* size of usable space on page */
81 static size_t	sc_bitmap_len;		/* # of bits in allocation bitmap */
82 static size_t	sc_bitmap_words;	/* # of words in allocation bitmap */
83 
84 /* Context ops */
85 static void schedctl_save(void *);
86 static void schedctl_restore(void *);
87 static void schedctl_fork(void *, void *);
88 
89 /* Functions for handling shared pages */
90 static int	schedctl_shared_alloc(sc_shared_t **, uintptr_t *);
91 static sc_page_ctl_t *schedctl_page_lookup(sc_shared_t *);
92 static int	schedctl_map(struct anon_map *, caddr_t *, caddr_t);
93 static int	schedctl_getpage(struct anon_map **, caddr_t *);
94 static void	schedctl_freepage(struct anon_map *, caddr_t);
95 
96 static const struct ctxop_template schedctl_ctxop_tpl = {
97 	.ct_rev		= CTXOP_TPL_REV,
98 	.ct_save	= schedctl_save,
99 	.ct_restore	= schedctl_restore,
100 	.ct_fork	= schedctl_fork,
101 };
102 
103 /*
104  * System call interface to scheduler activations.
105  * This always operates on the current lwp.
106  */
107 caddr_t
schedctl(void)108 schedctl(void)
109 {
110 	kthread_t	*t = curthread;
111 	sc_shared_t	*ssp;
112 	uintptr_t	uaddr;
113 	int		error;
114 
115 	if (t->t_schedctl == NULL) {
116 		/*
117 		 * Allocate and initialize the shared structure.
118 		 */
119 		if ((error = schedctl_shared_alloc(&ssp, &uaddr)) != 0)
120 			return ((caddr_t)(uintptr_t)set_errno(error));
121 		bzero(ssp, sizeof (*ssp));
122 
123 		ctxop_install(t, &schedctl_ctxop_tpl, ssp);
124 
125 		thread_lock(t);	/* protect against ts_tick and ts_update */
126 		t->t_schedctl = ssp;
127 		t->t_sc_uaddr = uaddr;
128 		ssp->sc_cid = t->t_cid;
129 		ssp->sc_cpri = t->t_cpri;
130 		ssp->sc_priority = DISP_PRIO(t);
131 		thread_unlock(t);
132 	}
133 
134 	return ((caddr_t)t->t_sc_uaddr);
135 }
136 
137 
138 /*
139  * Clean up scheduler activations state associated with an exiting
140  * (or execing) lwp.  t is always the current thread.
141  */
142 void
schedctl_lwp_cleanup(kthread_t * t)143 schedctl_lwp_cleanup(kthread_t *t)
144 {
145 	sc_shared_t	*ssp = t->t_schedctl;
146 	proc_t		*p = ttoproc(t);
147 	sc_page_ctl_t	*pagep;
148 	index_t		index;
149 
150 	ASSERT(MUTEX_NOT_HELD(&p->p_lock));
151 
152 	thread_lock(t);		/* protect against ts_tick and ts_update */
153 	t->t_schedctl = NULL;
154 	t->t_sc_uaddr = 0;
155 	thread_unlock(t);
156 
157 	/*
158 	 * Remove the context op to avoid the final call to
159 	 * schedctl_save when switching away from this lwp.
160 	 */
161 	(void) ctxop_remove(t, &schedctl_ctxop_tpl, ssp);
162 
163 	/*
164 	 * Do not unmap the shared page until the process exits.
165 	 * User-level library code relies on this for adaptive mutex locking.
166 	 */
167 	mutex_enter(&p->p_sc_lock);
168 	ssp->sc_state = SC_FREE;
169 	pagep = schedctl_page_lookup(ssp);
170 	index = (index_t)(ssp - pagep->spc_base);
171 	BT_CLEAR(pagep->spc_map, index);
172 	pagep->spc_space += sizeof (sc_shared_t);
173 	mutex_exit(&p->p_sc_lock);
174 }
175 
176 
177 /*
178  * Cleanup the list of schedctl shared pages for the process.
179  * Called from exec() and exit() system calls.
180  */
181 void
schedctl_proc_cleanup(void)182 schedctl_proc_cleanup(void)
183 {
184 	proc_t *p = curproc;
185 	sc_page_ctl_t *pagep;
186 	sc_page_ctl_t *next;
187 
188 	ASSERT(p->p_lwpcnt == 1);	/* we are single-threaded now */
189 	ASSERT(curthread->t_schedctl == NULL);
190 
191 	/*
192 	 * Since we are single-threaded, we don't have to hold p->p_sc_lock.
193 	 */
194 	pagep = p->p_pagep;
195 	p->p_pagep = NULL;
196 	while (pagep != NULL) {
197 		ASSERT(pagep->spc_space == sc_pagesize);
198 		next = pagep->spc_next;
199 		/*
200 		 * Unmap the user space and free the mapping structure.
201 		 */
202 		(void) as_unmap(p->p_as, pagep->spc_uaddr, PAGESIZE);
203 		schedctl_freepage(pagep->spc_amp, (caddr_t)(pagep->spc_base));
204 		kmem_free(pagep->spc_map, sizeof (ulong_t) * sc_bitmap_words);
205 		kmem_free(pagep, sizeof (sc_page_ctl_t));
206 		pagep = next;
207 	}
208 }
209 
210 
211 /*
212  * Called by resume just before switching away from the current thread.
213  * Save new thread state.
214  */
215 static void
schedctl_save(void * arg)216 schedctl_save(void *arg)
217 {
218 	sc_shared_t *ssp = arg;
219 
220 	ssp->sc_state = curthread->t_state;
221 }
222 
223 
224 /*
225  * Called by resume after switching to the current thread.
226  * Save new thread state and CPU.
227  */
228 static void
schedctl_restore(void * arg)229 schedctl_restore(void *arg)
230 {
231 	sc_shared_t *ssp = arg;
232 
233 	ssp->sc_state = SC_ONPROC;
234 	ssp->sc_cpu = CPU->cpu_id;
235 }
236 
237 
238 /*
239  * On fork, remove inherited mappings from the child's address space.
240  * The child's threads must call schedctl() to get new shared mappings.
241  */
242 static void
schedctl_fork(void * parent,void * child)243 schedctl_fork(void *parent, void *child)
244 {
245 	kthread_t *pt = parent, *ct = child;
246 	proc_t *pp = ttoproc(pt);
247 	proc_t *cp = ttoproc(ct);
248 	sc_page_ctl_t *pagep;
249 
250 	ASSERT(ct->t_schedctl == NULL);
251 
252 	/*
253 	 * Do this only once, whether we are doing fork1() or forkall().
254 	 * Don't do it at all if the child process is a child of vfork()
255 	 * because a child of vfork() borrows the parent's address space.
256 	 */
257 	if (pt != curthread || (cp->p_flag & SVFORK))
258 		return;
259 
260 	mutex_enter(&pp->p_sc_lock);
261 	for (pagep = pp->p_pagep; pagep != NULL; pagep = pagep->spc_next)
262 		(void) as_unmap(cp->p_as, pagep->spc_uaddr, PAGESIZE);
263 	mutex_exit(&pp->p_sc_lock);
264 }
265 
266 
267 /*
268  * Returns non-zero if the specified thread shouldn't be preempted at this time.
269  * Called by ts_preempt(), ts_tick(), and ts_update().
270  */
271 int
schedctl_get_nopreempt(kthread_t * t)272 schedctl_get_nopreempt(kthread_t *t)
273 {
274 	ASSERT(THREAD_LOCK_HELD(t));
275 	return (t->t_schedctl->sc_preemptctl.sc_nopreempt);
276 }
277 
278 
279 /*
280  * Sets the value of the nopreempt field for the specified thread.
281  * Called by ts_preempt() to clear the field on preemption.
282  */
283 void
schedctl_set_nopreempt(kthread_t * t,short val)284 schedctl_set_nopreempt(kthread_t *t, short val)
285 {
286 	ASSERT(THREAD_LOCK_HELD(t));
287 	t->t_schedctl->sc_preemptctl.sc_nopreempt = val;
288 }
289 
290 
291 /*
292  * Sets the value of the yield field for the specified thread.
293  * Called by ts_preempt() and ts_tick() to set the field, and
294  * ts_yield() to clear it.
295  * The kernel never looks at this field so we don't need a
296  * schedctl_get_yield() function.
297  */
298 void
schedctl_set_yield(kthread_t * t,short val)299 schedctl_set_yield(kthread_t *t, short val)
300 {
301 	ASSERT(THREAD_LOCK_HELD(t));
302 	t->t_schedctl->sc_preemptctl.sc_yield = val;
303 }
304 
305 
306 /*
307  * Sets the values of the cid and priority fields for the specified thread.
308  * Called from thread_change_pri(), thread_change_epri(), THREAD_CHANGE_PRI().
309  * Called following calls to CL_FORKRET() and CL_ENTERCLASS().
310  */
311 void
schedctl_set_cidpri(kthread_t * t)312 schedctl_set_cidpri(kthread_t *t)
313 {
314 	sc_shared_t *tdp = t->t_schedctl;
315 
316 	if (tdp != NULL) {
317 		tdp->sc_cid = t->t_cid;
318 		tdp->sc_cpri = t->t_cpri;
319 		tdp->sc_priority = DISP_PRIO(t);
320 	}
321 }
322 
323 
324 /*
325  * Returns non-zero if the specified thread has requested that all
326  * signals be blocked.  Called by signal-related code that tests
327  * the signal mask of a thread that may not be the current thread
328  * and where the process's p_lock cannot be acquired.
329  */
330 int
schedctl_sigblock(kthread_t * t)331 schedctl_sigblock(kthread_t *t)
332 {
333 	sc_shared_t *tdp = t->t_schedctl;
334 
335 	if (tdp != NULL)
336 		return (tdp->sc_sigblock);
337 	return (0);
338 }
339 
340 
341 /*
342  * If the sc_sigblock field is set for the specified thread, set its signal
343  * mask to block all maskable signals, then clear the sc_sigblock field.  This
344  * accomplishes what user-level code requested to be done when it set
345  * tdp->sc_shared->sc_sigblock non-zero.
346  *
347  * This is generally called by signal-related code in the current thread.  In
348  * order to call against a thread other than curthread, p_lock for the
349  * containing process must be held.  Even then, the caller is not protected
350  * from races with the thread in question updating its own fields.  It is the
351  * responsibility of the caller to perform additional synchronization.
352  *
353  */
354 void
schedctl_finish_sigblock(kthread_t * t)355 schedctl_finish_sigblock(kthread_t *t)
356 {
357 	sc_shared_t *tdp = t->t_schedctl;
358 
359 	ASSERT(t == curthread || MUTEX_HELD(&ttoproc(t)->p_lock));
360 
361 	if (tdp != NULL && tdp->sc_sigblock) {
362 		t->t_hold.__sigbits[0] = FILLSET0 & ~CANTMASK0;
363 		t->t_hold.__sigbits[1] = FILLSET1 & ~CANTMASK1;
364 		t->t_hold.__sigbits[2] = FILLSET2 & ~CANTMASK2;
365 		tdp->sc_sigblock = 0;
366 	}
367 }
368 
369 
370 /*
371  * Return non-zero if the current thread has declared that it has
372  * a cancellation pending and that cancellation is not disabled.
373  * If SIGCANCEL is blocked, we must be going over the wire in an
374  * NFS transaction (sigintr() was called); return zero in this case.
375  */
376 int
schedctl_cancel_pending(void)377 schedctl_cancel_pending(void)
378 {
379 	sc_shared_t *tdp = curthread->t_schedctl;
380 
381 	if (tdp != NULL &&
382 	    (tdp->sc_flgs & SC_CANCEL_FLG) &&
383 	    !tdp->sc_sigblock &&
384 	    !sigismember(&curthread->t_hold, SIGCANCEL))
385 		return (1);
386 	return (0);
387 }
388 
389 
390 /*
391  * Inform libc that the kernel returned EINTR from some system call
392  * due to there being a cancellation pending (SC_CANCEL_FLG set or
393  * we received an SI_LWP SIGCANCEL while in a system call), rather
394  * than because of some other signal.  User-level code can try to
395  * recover from receiving other signals, but it can't recover from
396  * being cancelled.
397  */
398 void
schedctl_cancel_eintr(void)399 schedctl_cancel_eintr(void)
400 {
401 	sc_shared_t *tdp = curthread->t_schedctl;
402 
403 	if (tdp != NULL)
404 		tdp->sc_flgs |= SC_EINTR_FLG;
405 }
406 
407 
408 /*
409  * Return non-zero if the current thread has declared that
410  * it is calling into the kernel to park, else return zero.
411  */
412 int
schedctl_is_park(void)413 schedctl_is_park(void)
414 {
415 	sc_shared_t *tdp = curthread->t_schedctl;
416 
417 	if (tdp != NULL)
418 		return ((tdp->sc_flgs & SC_PARK_FLG) != 0);
419 	/*
420 	 * If we're here and there is no shared memory (how could
421 	 * that happen?) then just assume we really are here to park.
422 	 */
423 	return (1);
424 }
425 
426 
427 /*
428  * Declare thread is parking.
429  *
430  * libc will set "sc_flgs |= SC_PARK_FLG" before calling lwpsys_park(0, tid)
431  * in order to declare that the thread is calling into the kernel to park.
432  *
433  * This interface exists ONLY to support older versions of libthread which
434  * are not aware of the SC_PARK_FLG flag.
435  *
436  * Older versions of libthread which are not aware of the SC_PARK_FLG flag
437  * need to be modified or emulated to call lwpsys_park(4, ...) instead of
438  * lwpsys_park(0, ...).  This will invoke schedctl_set_park() before
439  * lwp_park() to declare that the thread is parking.
440  */
441 void
schedctl_set_park(void)442 schedctl_set_park(void)
443 {
444 	sc_shared_t *tdp = curthread->t_schedctl;
445 	if (tdp != NULL)
446 		tdp->sc_flgs |= SC_PARK_FLG;
447 }
448 
449 
450 /*
451  * Clear the parking flag on return from parking in the kernel.
452  */
453 void
schedctl_unpark(void)454 schedctl_unpark(void)
455 {
456 	sc_shared_t *tdp = curthread->t_schedctl;
457 
458 	if (tdp != NULL)
459 		tdp->sc_flgs &= ~SC_PARK_FLG;
460 }
461 
462 
463 /*
464  * Page handling code.
465  */
466 
467 void
schedctl_init(void)468 schedctl_init(void)
469 {
470 	/*
471 	 * Amount of page that can hold sc_shared_t structures.  If
472 	 * sizeof (sc_shared_t) is a power of 2, this should just be
473 	 * PAGESIZE.
474 	 */
475 	sc_pagesize = PAGESIZE - (PAGESIZE % sizeof (sc_shared_t));
476 
477 	/*
478 	 * Allocation bitmap is one bit per struct on a page.
479 	 */
480 	sc_bitmap_len = sc_pagesize / sizeof (sc_shared_t);
481 	sc_bitmap_words = howmany(sc_bitmap_len, BT_NBIPUL);
482 }
483 
484 
485 static int
schedctl_shared_alloc(sc_shared_t ** kaddrp,uintptr_t * uaddrp)486 schedctl_shared_alloc(sc_shared_t **kaddrp, uintptr_t *uaddrp)
487 {
488 	proc_t		*p = curproc;
489 	sc_page_ctl_t	*pagep;
490 	sc_shared_t	*ssp;
491 	caddr_t		base;
492 	index_t		index;
493 	int		error;
494 
495 	ASSERT(MUTEX_NOT_HELD(&p->p_lock));
496 	mutex_enter(&p->p_sc_lock);
497 
498 	/*
499 	 * Try to find space for the new data in existing pages
500 	 * within the process's list of shared pages.
501 	 */
502 	for (pagep = p->p_pagep; pagep != NULL; pagep = pagep->spc_next)
503 		if (pagep->spc_space != 0)
504 			break;
505 
506 	if (pagep != NULL)
507 		base = pagep->spc_uaddr;
508 	else {
509 		struct anon_map *amp;
510 		caddr_t kaddr;
511 
512 		/*
513 		 * No room, need to allocate a new page.  Also set up
514 		 * a mapping to the kernel address space for the new
515 		 * page and lock it in memory.
516 		 */
517 		if ((error = schedctl_getpage(&amp, &kaddr)) != 0) {
518 			mutex_exit(&p->p_sc_lock);
519 			return (error);
520 		}
521 		if ((error = schedctl_map(amp, &base, kaddr)) != 0) {
522 			schedctl_freepage(amp, kaddr);
523 			mutex_exit(&p->p_sc_lock);
524 			return (error);
525 		}
526 
527 		/*
528 		 * Allocate and initialize the page control structure.
529 		 */
530 		pagep = kmem_alloc(sizeof (sc_page_ctl_t), KM_SLEEP);
531 		pagep->spc_amp = amp;
532 		pagep->spc_base = (sc_shared_t *)kaddr;
533 		pagep->spc_end = (sc_shared_t *)(kaddr + sc_pagesize);
534 		pagep->spc_uaddr = base;
535 
536 		pagep->spc_map = kmem_zalloc(sizeof (ulong_t) * sc_bitmap_words,
537 		    KM_SLEEP);
538 		pagep->spc_space = sc_pagesize;
539 
540 		pagep->spc_next = p->p_pagep;
541 		p->p_pagep = pagep;
542 	}
543 
544 	/*
545 	 * Got a page, now allocate space for the data.  There should
546 	 * be space unless something's wrong.
547 	 */
548 	ASSERT(pagep != NULL && pagep->spc_space >= sizeof (sc_shared_t));
549 	index = bt_availbit(pagep->spc_map, sc_bitmap_len);
550 	ASSERT(index != -1);
551 
552 	/*
553 	 * Get location with pointer arithmetic.  spc_base is of type
554 	 * sc_shared_t *.  Mark as allocated.
555 	 */
556 	ssp = pagep->spc_base + index;
557 	BT_SET(pagep->spc_map, index);
558 	pagep->spc_space -= sizeof (sc_shared_t);
559 
560 	mutex_exit(&p->p_sc_lock);
561 
562 	/*
563 	 * Return kernel and user addresses.
564 	 */
565 	*kaddrp = ssp;
566 	*uaddrp = (uintptr_t)base + ((uintptr_t)ssp & PAGEOFFSET);
567 	return (0);
568 }
569 
570 
571 /*
572  * Find the page control structure corresponding to a kernel address.
573  */
574 static sc_page_ctl_t *
schedctl_page_lookup(sc_shared_t * ssp)575 schedctl_page_lookup(sc_shared_t *ssp)
576 {
577 	proc_t *p = curproc;
578 	sc_page_ctl_t *pagep;
579 
580 	ASSERT(MUTEX_HELD(&p->p_sc_lock));
581 	for (pagep = p->p_pagep; pagep != NULL; pagep = pagep->spc_next) {
582 		if (ssp >= pagep->spc_base && ssp < pagep->spc_end)
583 			return (pagep);
584 	}
585 	return (NULL);		/* This "can't happen".  Should we panic? */
586 }
587 
588 
589 /*
590  * This function is called when a page needs to be mapped into a
591  * process's address space.  Allocate the user address space and
592  * set up the mapping to the page.  Assumes the page has already
593  * been allocated and locked in memory via schedctl_getpage.
594  */
595 static int
schedctl_map(struct anon_map * amp,caddr_t * uaddrp,caddr_t kaddr)596 schedctl_map(struct anon_map *amp, caddr_t *uaddrp, caddr_t kaddr)
597 {
598 	caddr_t addr = NULL;
599 	struct as *as = curproc->p_as;
600 	struct segvn_crargs vn_a;
601 	int error;
602 
603 	as_rangelock(as);
604 	/* pass address of kernel mapping as offset to avoid VAC conflicts */
605 	map_addr(&addr, PAGESIZE, (offset_t)(uintptr_t)kaddr, 1, 0);
606 	if (addr == NULL) {
607 		as_rangeunlock(as);
608 		return (ENOMEM);
609 	}
610 
611 	/*
612 	 * Use segvn to set up the mapping to the page.
613 	 */
614 	vn_a.vp = NULL;
615 	vn_a.offset = 0;
616 	vn_a.cred = NULL;
617 	vn_a.type = MAP_SHARED;
618 	vn_a.prot = vn_a.maxprot = PROT_ALL;
619 	vn_a.flags = 0;
620 	vn_a.amp = amp;
621 	vn_a.szc = 0;
622 	vn_a.lgrp_mem_policy_flags = 0;
623 	error = as_map(as, addr, PAGESIZE, segvn_create, &vn_a);
624 	as_rangeunlock(as);
625 
626 	if (error)
627 		return (error);
628 
629 	*uaddrp = addr;
630 	return (0);
631 }
632 
633 
634 /*
635  * Allocate a new page from anonymous memory.  Also, create a kernel
636  * mapping to the page and lock the page in memory.
637  */
638 static int
schedctl_getpage(struct anon_map ** newamp,caddr_t * newaddr)639 schedctl_getpage(struct anon_map **newamp, caddr_t *newaddr)
640 {
641 	struct anon_map *amp;
642 	caddr_t kaddr;
643 
644 	/*
645 	 * Set up anonymous memory struct.  No swap reservation is
646 	 * needed since the page will be locked into memory.
647 	 */
648 	amp = anonmap_alloc(PAGESIZE, 0, ANON_SLEEP);
649 
650 	/*
651 	 * Allocate the page.
652 	 */
653 	kaddr = segkp_get_withanonmap(segkp, PAGESIZE,
654 	    KPD_NO_ANON | KPD_LOCKED | KPD_ZERO, amp);
655 	if (kaddr == NULL) {
656 		amp->refcnt--;
657 		anonmap_free(amp);
658 		return (ENOMEM);
659 	}
660 
661 	/*
662 	 * The page is left SE_SHARED locked so that it won't be
663 	 * paged out or relocated (KPD_LOCKED above).
664 	 */
665 
666 	*newamp = amp;
667 	*newaddr = kaddr;
668 	return (0);
669 }
670 
671 
672 /*
673  * Take the necessary steps to allow a page to be released.
674  * This is called when the process is doing exit() or exec().
675  * There should be no accesses to the page after this.
676  * The kernel mapping of the page is released and the page is unlocked.
677  */
678 static void
schedctl_freepage(struct anon_map * amp,caddr_t kaddr)679 schedctl_freepage(struct anon_map *amp, caddr_t kaddr)
680 {
681 	/*
682 	 * Release the lock on the page and remove the kernel mapping.
683 	 */
684 	ANON_LOCK_ENTER(&amp->a_rwlock, RW_WRITER);
685 	segkp_release(segkp, kaddr);
686 
687 	/*
688 	 * Decrement the refcnt so the anon_map structure will be freed.
689 	 */
690 	if (--amp->refcnt == 0) {
691 		/*
692 		 * The current process no longer has the page mapped, so
693 		 * we have to free everything rather than letting as_free
694 		 * do the work.
695 		 */
696 		anonmap_purge(amp);
697 		anon_free(amp->ahp, 0, PAGESIZE);
698 		ANON_LOCK_EXIT(&amp->a_rwlock);
699 		anonmap_free(amp);
700 	} else {
701 		ANON_LOCK_EXIT(&amp->a_rwlock);
702 	}
703 }
704