17c478bd9Sstevel@tonic-gate /* 27c478bd9Sstevel@tonic-gate * CDDL HEADER START 37c478bd9Sstevel@tonic-gate * 47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 50209230bSgjelinek * Common Development and Distribution License (the "License"). 60209230bSgjelinek * You may not use this file except in compliance with the License. 77c478bd9Sstevel@tonic-gate * 87c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 97c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 107c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 117c478bd9Sstevel@tonic-gate * and limitations under the License. 127c478bd9Sstevel@tonic-gate * 137c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 147c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 157c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 167c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 177c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 187c478bd9Sstevel@tonic-gate * 197c478bd9Sstevel@tonic-gate * CDDL HEADER END 207c478bd9Sstevel@tonic-gate */ 21a574db85Sraf 227c478bd9Sstevel@tonic-gate /* 23*bdf0047cSRoger A. Faulkner * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 247c478bd9Sstevel@tonic-gate * Use is subject to license terms. 257c478bd9Sstevel@tonic-gate */ 267c478bd9Sstevel@tonic-gate 277c478bd9Sstevel@tonic-gate #include <sys/types.h> 287c478bd9Sstevel@tonic-gate #include <sys/systm.h> 297c478bd9Sstevel@tonic-gate #include <sys/schedctl.h> 307c478bd9Sstevel@tonic-gate #include <sys/proc.h> 317c478bd9Sstevel@tonic-gate #include <sys/thread.h> 327c478bd9Sstevel@tonic-gate #include <sys/class.h> 337c478bd9Sstevel@tonic-gate #include <sys/cred.h> 347c478bd9Sstevel@tonic-gate #include <sys/kmem.h> 357c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h> 367c478bd9Sstevel@tonic-gate #include <sys/stack.h> 377c478bd9Sstevel@tonic-gate #include <sys/debug.h> 387c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h> 397c478bd9Sstevel@tonic-gate #include <sys/sobject.h> 407c478bd9Sstevel@tonic-gate #include <sys/door.h> 417c478bd9Sstevel@tonic-gate #include <sys/modctl.h> 427c478bd9Sstevel@tonic-gate #include <sys/syscall.h> 437c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h> 447c478bd9Sstevel@tonic-gate #include <sys/vmsystm.h> 457c478bd9Sstevel@tonic-gate #include <sys/mman.h> 467c478bd9Sstevel@tonic-gate #include <sys/vnode.h> 477c478bd9Sstevel@tonic-gate #include <sys/swap.h> 487c478bd9Sstevel@tonic-gate #include <sys/lwp.h> 497c478bd9Sstevel@tonic-gate #include <sys/bitmap.h> 507c478bd9Sstevel@tonic-gate #include <sys/atomic.h> 517c478bd9Sstevel@tonic-gate #include <sys/fcntl.h> 527c478bd9Sstevel@tonic-gate #include <vm/seg_kp.h> 537c478bd9Sstevel@tonic-gate #include <vm/seg_vn.h> 547c478bd9Sstevel@tonic-gate #include <vm/as.h> 557c478bd9Sstevel@tonic-gate #include <fs/fs_subr.h> 567c478bd9Sstevel@tonic-gate 577c478bd9Sstevel@tonic-gate /* 587c478bd9Sstevel@tonic-gate * Page handling structures. This is set up as a list of per-page 597c478bd9Sstevel@tonic-gate * control structures (sc_page_ctl), with p->p_pagep pointing to 607c478bd9Sstevel@tonic-gate * the first. The per-page structures point to the actual pages 617c478bd9Sstevel@tonic-gate * and contain pointers to the user address for each mapped page. 627c478bd9Sstevel@tonic-gate * 637c478bd9Sstevel@tonic-gate * All data is protected by p->p_sc_lock. Since this lock is 647c478bd9Sstevel@tonic-gate * held while waiting for memory, schedctl_shared_alloc() should 657c478bd9Sstevel@tonic-gate * not be called while holding p_lock. 667c478bd9Sstevel@tonic-gate */ 677c478bd9Sstevel@tonic-gate 687c478bd9Sstevel@tonic-gate typedef struct sc_page_ctl { 697c478bd9Sstevel@tonic-gate struct sc_page_ctl *spc_next; 707c478bd9Sstevel@tonic-gate sc_shared_t *spc_base; /* base of kernel page */ 717c478bd9Sstevel@tonic-gate sc_shared_t *spc_end; /* end of usable space */ 727c478bd9Sstevel@tonic-gate ulong_t *spc_map; /* bitmap of allocated space on page */ 737c478bd9Sstevel@tonic-gate size_t spc_space; /* amount of space on page */ 747c478bd9Sstevel@tonic-gate caddr_t spc_uaddr; /* user-level address of the page */ 757c478bd9Sstevel@tonic-gate struct anon_map *spc_amp; /* anonymous memory structure */ 767c478bd9Sstevel@tonic-gate } sc_page_ctl_t; 777c478bd9Sstevel@tonic-gate 787c478bd9Sstevel@tonic-gate static size_t sc_pagesize; /* size of usable space on page */ 797c478bd9Sstevel@tonic-gate static size_t sc_bitmap_len; /* # of bits in allocation bitmap */ 807c478bd9Sstevel@tonic-gate static size_t sc_bitmap_words; /* # of words in allocation bitmap */ 817c478bd9Sstevel@tonic-gate 827c478bd9Sstevel@tonic-gate /* Context ops */ 837c478bd9Sstevel@tonic-gate static void schedctl_save(sc_shared_t *); 847c478bd9Sstevel@tonic-gate static void schedctl_restore(sc_shared_t *); 857c478bd9Sstevel@tonic-gate static void schedctl_fork(kthread_t *, kthread_t *); 867c478bd9Sstevel@tonic-gate 877c478bd9Sstevel@tonic-gate /* Functions for handling shared pages */ 887c478bd9Sstevel@tonic-gate static int schedctl_shared_alloc(sc_shared_t **, uintptr_t *); 897c478bd9Sstevel@tonic-gate static sc_page_ctl_t *schedctl_page_lookup(sc_shared_t *); 907c478bd9Sstevel@tonic-gate static int schedctl_map(struct anon_map *, caddr_t *, caddr_t); 917c478bd9Sstevel@tonic-gate static int schedctl_getpage(struct anon_map **, caddr_t *); 927c478bd9Sstevel@tonic-gate static void schedctl_freepage(struct anon_map *, caddr_t); 937c478bd9Sstevel@tonic-gate 947c478bd9Sstevel@tonic-gate /* 957c478bd9Sstevel@tonic-gate * System call interface to scheduler activations. 967c478bd9Sstevel@tonic-gate * This always operates on the current lwp. 977c478bd9Sstevel@tonic-gate */ 987c478bd9Sstevel@tonic-gate caddr_t 997c478bd9Sstevel@tonic-gate schedctl(void) 1007c478bd9Sstevel@tonic-gate { 1017c478bd9Sstevel@tonic-gate kthread_t *t = curthread; 1027c478bd9Sstevel@tonic-gate sc_shared_t *ssp; 1037c478bd9Sstevel@tonic-gate uintptr_t uaddr; 1047c478bd9Sstevel@tonic-gate int error; 1057c478bd9Sstevel@tonic-gate 1067c478bd9Sstevel@tonic-gate if (t->t_schedctl == NULL) { 1077c478bd9Sstevel@tonic-gate /* 1087c478bd9Sstevel@tonic-gate * Allocate and initialize the shared structure. 1097c478bd9Sstevel@tonic-gate */ 1107c478bd9Sstevel@tonic-gate if ((error = schedctl_shared_alloc(&ssp, &uaddr)) != 0) 1117c478bd9Sstevel@tonic-gate return ((caddr_t)(uintptr_t)set_errno(error)); 1127c478bd9Sstevel@tonic-gate bzero(ssp, sizeof (*ssp)); 1137c478bd9Sstevel@tonic-gate 1147c478bd9Sstevel@tonic-gate installctx(t, ssp, schedctl_save, schedctl_restore, 1157c478bd9Sstevel@tonic-gate schedctl_fork, NULL, NULL, NULL); 1167c478bd9Sstevel@tonic-gate 1177c478bd9Sstevel@tonic-gate thread_lock(t); /* protect against ts_tick and ts_update */ 1187c478bd9Sstevel@tonic-gate t->t_schedctl = ssp; 1197c478bd9Sstevel@tonic-gate t->t_sc_uaddr = uaddr; 120d4204c85Sraf ssp->sc_cid = t->t_cid; 121d4204c85Sraf ssp->sc_cpri = t->t_cpri; 122d4204c85Sraf ssp->sc_priority = DISP_PRIO(t); 1237c478bd9Sstevel@tonic-gate thread_unlock(t); 1247c478bd9Sstevel@tonic-gate } 1257c478bd9Sstevel@tonic-gate 1267c478bd9Sstevel@tonic-gate return ((caddr_t)t->t_sc_uaddr); 1277c478bd9Sstevel@tonic-gate } 1287c478bd9Sstevel@tonic-gate 1297c478bd9Sstevel@tonic-gate 1307c478bd9Sstevel@tonic-gate /* 1317c478bd9Sstevel@tonic-gate * Clean up scheduler activations state associated with an exiting 1327c478bd9Sstevel@tonic-gate * (or execing) lwp. t is always the current thread. 1337c478bd9Sstevel@tonic-gate */ 1347c478bd9Sstevel@tonic-gate void 1357c478bd9Sstevel@tonic-gate schedctl_lwp_cleanup(kthread_t *t) 1367c478bd9Sstevel@tonic-gate { 1377c478bd9Sstevel@tonic-gate sc_shared_t *ssp = t->t_schedctl; 1387c478bd9Sstevel@tonic-gate proc_t *p = ttoproc(t); 1397c478bd9Sstevel@tonic-gate sc_page_ctl_t *pagep; 1407c478bd9Sstevel@tonic-gate index_t index; 1417c478bd9Sstevel@tonic-gate 1427c478bd9Sstevel@tonic-gate ASSERT(MUTEX_NOT_HELD(&p->p_lock)); 1437c478bd9Sstevel@tonic-gate 1447c478bd9Sstevel@tonic-gate thread_lock(t); /* protect against ts_tick and ts_update */ 1457c478bd9Sstevel@tonic-gate t->t_schedctl = NULL; 1467c478bd9Sstevel@tonic-gate t->t_sc_uaddr = 0; 1477c478bd9Sstevel@tonic-gate thread_unlock(t); 1487c478bd9Sstevel@tonic-gate 1497c478bd9Sstevel@tonic-gate /* 1507c478bd9Sstevel@tonic-gate * Remove the context op to avoid the final call to 1517c478bd9Sstevel@tonic-gate * schedctl_save when switching away from this lwp. 1527c478bd9Sstevel@tonic-gate */ 1537c478bd9Sstevel@tonic-gate (void) removectx(t, ssp, schedctl_save, schedctl_restore, 1547c478bd9Sstevel@tonic-gate schedctl_fork, NULL, NULL, NULL); 1557c478bd9Sstevel@tonic-gate 1567c478bd9Sstevel@tonic-gate /* 1577c478bd9Sstevel@tonic-gate * Do not unmap the shared page until the process exits. 1587c478bd9Sstevel@tonic-gate * User-level library code relies on this for adaptive mutex locking. 1597c478bd9Sstevel@tonic-gate */ 1607c478bd9Sstevel@tonic-gate mutex_enter(&p->p_sc_lock); 1617c478bd9Sstevel@tonic-gate ssp->sc_state = SC_FREE; 1627c478bd9Sstevel@tonic-gate pagep = schedctl_page_lookup(ssp); 1637c478bd9Sstevel@tonic-gate index = (index_t)(ssp - pagep->spc_base); 1647c478bd9Sstevel@tonic-gate BT_CLEAR(pagep->spc_map, index); 1657c478bd9Sstevel@tonic-gate pagep->spc_space += sizeof (sc_shared_t); 1667c478bd9Sstevel@tonic-gate mutex_exit(&p->p_sc_lock); 1677c478bd9Sstevel@tonic-gate } 1687c478bd9Sstevel@tonic-gate 169a574db85Sraf 1707c478bd9Sstevel@tonic-gate /* 1717c478bd9Sstevel@tonic-gate * Cleanup the list of schedctl shared pages for the process. 1727c478bd9Sstevel@tonic-gate * Called from exec() and exit() system calls. 1737c478bd9Sstevel@tonic-gate */ 1747c478bd9Sstevel@tonic-gate void 175a574db85Sraf schedctl_proc_cleanup(void) 1767c478bd9Sstevel@tonic-gate { 1777c478bd9Sstevel@tonic-gate proc_t *p = curproc; 1787c478bd9Sstevel@tonic-gate sc_page_ctl_t *pagep; 1797c478bd9Sstevel@tonic-gate sc_page_ctl_t *next; 1807c478bd9Sstevel@tonic-gate 1817c478bd9Sstevel@tonic-gate ASSERT(p->p_lwpcnt == 1); /* we are single-threaded now */ 1827c478bd9Sstevel@tonic-gate ASSERT(curthread->t_schedctl == NULL); 1837c478bd9Sstevel@tonic-gate 1847c478bd9Sstevel@tonic-gate /* 1857c478bd9Sstevel@tonic-gate * Since we are single-threaded, we don't have to hold p->p_sc_lock. 1867c478bd9Sstevel@tonic-gate */ 1877c478bd9Sstevel@tonic-gate pagep = p->p_pagep; 1887c478bd9Sstevel@tonic-gate p->p_pagep = NULL; 1897c478bd9Sstevel@tonic-gate while (pagep != NULL) { 1907c478bd9Sstevel@tonic-gate ASSERT(pagep->spc_space == sc_pagesize); 1917c478bd9Sstevel@tonic-gate next = pagep->spc_next; 1927c478bd9Sstevel@tonic-gate /* 1937c478bd9Sstevel@tonic-gate * Unmap the user space and free the mapping structure. 1947c478bd9Sstevel@tonic-gate */ 1957c478bd9Sstevel@tonic-gate (void) as_unmap(p->p_as, pagep->spc_uaddr, PAGESIZE); 1967c478bd9Sstevel@tonic-gate schedctl_freepage(pagep->spc_amp, (caddr_t)(pagep->spc_base)); 1977c478bd9Sstevel@tonic-gate kmem_free(pagep->spc_map, sizeof (ulong_t) * sc_bitmap_words); 1987c478bd9Sstevel@tonic-gate kmem_free(pagep, sizeof (sc_page_ctl_t)); 1997c478bd9Sstevel@tonic-gate pagep = next; 2007c478bd9Sstevel@tonic-gate } 2017c478bd9Sstevel@tonic-gate } 2027c478bd9Sstevel@tonic-gate 203a574db85Sraf 2047c478bd9Sstevel@tonic-gate /* 2057c478bd9Sstevel@tonic-gate * Called by resume just before switching away from the current thread. 2067c478bd9Sstevel@tonic-gate * Save new thread state. 2077c478bd9Sstevel@tonic-gate */ 208d4204c85Sraf static void 2097c478bd9Sstevel@tonic-gate schedctl_save(sc_shared_t *ssp) 2107c478bd9Sstevel@tonic-gate { 2117c478bd9Sstevel@tonic-gate ssp->sc_state = curthread->t_state; 2127c478bd9Sstevel@tonic-gate } 2137c478bd9Sstevel@tonic-gate 2147c478bd9Sstevel@tonic-gate 2157c478bd9Sstevel@tonic-gate /* 2167c478bd9Sstevel@tonic-gate * Called by resume after switching to the current thread. 2177c478bd9Sstevel@tonic-gate * Save new thread state and CPU. 2187c478bd9Sstevel@tonic-gate */ 219d4204c85Sraf static void 2207c478bd9Sstevel@tonic-gate schedctl_restore(sc_shared_t *ssp) 2217c478bd9Sstevel@tonic-gate { 2227c478bd9Sstevel@tonic-gate ssp->sc_state = SC_ONPROC; 2237c478bd9Sstevel@tonic-gate ssp->sc_cpu = CPU->cpu_id; 2247c478bd9Sstevel@tonic-gate } 2257c478bd9Sstevel@tonic-gate 2267c478bd9Sstevel@tonic-gate 2277c478bd9Sstevel@tonic-gate /* 2287c478bd9Sstevel@tonic-gate * On fork, remove inherited mappings from the child's address space. 2297c478bd9Sstevel@tonic-gate * The child's threads must call schedctl() to get new shared mappings. 2307c478bd9Sstevel@tonic-gate */ 231d4204c85Sraf static void 2327c478bd9Sstevel@tonic-gate schedctl_fork(kthread_t *pt, kthread_t *ct) 2337c478bd9Sstevel@tonic-gate { 2347c478bd9Sstevel@tonic-gate proc_t *pp = ttoproc(pt); 2357c478bd9Sstevel@tonic-gate proc_t *cp = ttoproc(ct); 2367c478bd9Sstevel@tonic-gate sc_page_ctl_t *pagep; 2377c478bd9Sstevel@tonic-gate 2387c478bd9Sstevel@tonic-gate ASSERT(ct->t_schedctl == NULL); 2397c478bd9Sstevel@tonic-gate 2407c478bd9Sstevel@tonic-gate /* 2417c478bd9Sstevel@tonic-gate * Do this only once, whether we are doing fork1() or forkall(). 2427c478bd9Sstevel@tonic-gate * Don't do it at all if the child process is a child of vfork() 2437c478bd9Sstevel@tonic-gate * because a child of vfork() borrows the parent's address space. 2447c478bd9Sstevel@tonic-gate */ 2457c478bd9Sstevel@tonic-gate if (pt != curthread || (cp->p_flag & SVFORK)) 2467c478bd9Sstevel@tonic-gate return; 2477c478bd9Sstevel@tonic-gate 2487c478bd9Sstevel@tonic-gate mutex_enter(&pp->p_sc_lock); 2497c478bd9Sstevel@tonic-gate for (pagep = pp->p_pagep; pagep != NULL; pagep = pagep->spc_next) 2507c478bd9Sstevel@tonic-gate (void) as_unmap(cp->p_as, pagep->spc_uaddr, PAGESIZE); 2517c478bd9Sstevel@tonic-gate mutex_exit(&pp->p_sc_lock); 2527c478bd9Sstevel@tonic-gate } 2537c478bd9Sstevel@tonic-gate 254a574db85Sraf 2557c478bd9Sstevel@tonic-gate /* 2567c478bd9Sstevel@tonic-gate * Returns non-zero if the specified thread shouldn't be preempted at this time. 257d4204c85Sraf * Called by ts_preempt(), ts_tick(), and ts_update(). 2587c478bd9Sstevel@tonic-gate */ 2597c478bd9Sstevel@tonic-gate int 2607c478bd9Sstevel@tonic-gate schedctl_get_nopreempt(kthread_t *t) 2617c478bd9Sstevel@tonic-gate { 2627c478bd9Sstevel@tonic-gate ASSERT(THREAD_LOCK_HELD(t)); 2637c478bd9Sstevel@tonic-gate return (t->t_schedctl->sc_preemptctl.sc_nopreempt); 2647c478bd9Sstevel@tonic-gate } 2657c478bd9Sstevel@tonic-gate 2667c478bd9Sstevel@tonic-gate 2677c478bd9Sstevel@tonic-gate /* 2687c478bd9Sstevel@tonic-gate * Sets the value of the nopreempt field for the specified thread. 269d4204c85Sraf * Called by ts_preempt() to clear the field on preemption. 2707c478bd9Sstevel@tonic-gate */ 2717c478bd9Sstevel@tonic-gate void 2727c478bd9Sstevel@tonic-gate schedctl_set_nopreempt(kthread_t *t, short val) 2737c478bd9Sstevel@tonic-gate { 2747c478bd9Sstevel@tonic-gate ASSERT(THREAD_LOCK_HELD(t)); 2757c478bd9Sstevel@tonic-gate t->t_schedctl->sc_preemptctl.sc_nopreempt = val; 2767c478bd9Sstevel@tonic-gate } 2777c478bd9Sstevel@tonic-gate 2787c478bd9Sstevel@tonic-gate 2797c478bd9Sstevel@tonic-gate /* 280d4204c85Sraf * Sets the value of the yield field for the specified thread. 281d4204c85Sraf * Called by ts_preempt() and ts_tick() to set the field, and 282d4204c85Sraf * ts_yield() to clear it. 283d4204c85Sraf * The kernel never looks at this field so we don't need a 284d4204c85Sraf * schedctl_get_yield() function. 2857c478bd9Sstevel@tonic-gate */ 2867c478bd9Sstevel@tonic-gate void 2877c478bd9Sstevel@tonic-gate schedctl_set_yield(kthread_t *t, short val) 2887c478bd9Sstevel@tonic-gate { 2897c478bd9Sstevel@tonic-gate ASSERT(THREAD_LOCK_HELD(t)); 2907c478bd9Sstevel@tonic-gate t->t_schedctl->sc_preemptctl.sc_yield = val; 2917c478bd9Sstevel@tonic-gate } 2927c478bd9Sstevel@tonic-gate 2937c478bd9Sstevel@tonic-gate 2947c478bd9Sstevel@tonic-gate /* 295d4204c85Sraf * Sets the values of the cid and priority fields for the specified thread. 296d4204c85Sraf * Called from thread_change_pri(), thread_change_epri(), THREAD_CHANGE_PRI(). 297d4204c85Sraf * Called following calls to CL_FORKRET() and CL_ENTERCLASS(). 298d4204c85Sraf */ 299d4204c85Sraf void 300d4204c85Sraf schedctl_set_cidpri(kthread_t *t) 301d4204c85Sraf { 302d4204c85Sraf sc_shared_t *tdp = t->t_schedctl; 303d4204c85Sraf 304d4204c85Sraf if (tdp != NULL) { 305d4204c85Sraf tdp->sc_cid = t->t_cid; 306d4204c85Sraf tdp->sc_cpri = t->t_cpri; 307d4204c85Sraf tdp->sc_priority = DISP_PRIO(t); 308d4204c85Sraf } 309d4204c85Sraf } 310d4204c85Sraf 311d4204c85Sraf 312d4204c85Sraf /* 3137c478bd9Sstevel@tonic-gate * Returns non-zero if the specified thread has requested that all 3147c478bd9Sstevel@tonic-gate * signals be blocked. Called by signal-related code that tests 3157c478bd9Sstevel@tonic-gate * the signal mask of a thread that may not be the current thread 3167c478bd9Sstevel@tonic-gate * and where the process's p_lock cannot be acquired. 3177c478bd9Sstevel@tonic-gate */ 3187c478bd9Sstevel@tonic-gate int 3197c478bd9Sstevel@tonic-gate schedctl_sigblock(kthread_t *t) 3207c478bd9Sstevel@tonic-gate { 3217c478bd9Sstevel@tonic-gate sc_shared_t *tdp = t->t_schedctl; 3227c478bd9Sstevel@tonic-gate 32347eb4d1eSsl108498 if (tdp != NULL) 3247c478bd9Sstevel@tonic-gate return (tdp->sc_sigblock); 3257c478bd9Sstevel@tonic-gate return (0); 3267c478bd9Sstevel@tonic-gate } 3277c478bd9Sstevel@tonic-gate 3287c478bd9Sstevel@tonic-gate 3297c478bd9Sstevel@tonic-gate /* 3307c478bd9Sstevel@tonic-gate * If the sc_sigblock field is set for the specified thread, set 3317c478bd9Sstevel@tonic-gate * its signal mask to block all maskable signals, then clear the 3327c478bd9Sstevel@tonic-gate * sc_sigblock field. This finishes what user-level code requested 3337c478bd9Sstevel@tonic-gate * to be done when it set tdp->sc_shared->sc_sigblock non-zero. 3347be238fcSRoger A. Faulkner * Called from signal-related code either by the current thread for 3357be238fcSRoger A. Faulkner * itself or by a thread that holds the process's p_lock (/proc code). 3367c478bd9Sstevel@tonic-gate */ 3377c478bd9Sstevel@tonic-gate void 3387c478bd9Sstevel@tonic-gate schedctl_finish_sigblock(kthread_t *t) 3397c478bd9Sstevel@tonic-gate { 3407c478bd9Sstevel@tonic-gate sc_shared_t *tdp = t->t_schedctl; 3417c478bd9Sstevel@tonic-gate 3427be238fcSRoger A. Faulkner ASSERT(t == curthread || MUTEX_HELD(&ttoproc(t)->p_lock)); 3437c478bd9Sstevel@tonic-gate 34447eb4d1eSsl108498 if (tdp != NULL && tdp->sc_sigblock) { 3457c478bd9Sstevel@tonic-gate t->t_hold.__sigbits[0] = FILLSET0 & ~CANTMASK0; 3467c478bd9Sstevel@tonic-gate t->t_hold.__sigbits[1] = FILLSET1 & ~CANTMASK1; 347*bdf0047cSRoger A. Faulkner t->t_hold.__sigbits[2] = FILLSET2 & ~CANTMASK2; 3487c478bd9Sstevel@tonic-gate tdp->sc_sigblock = 0; 3497c478bd9Sstevel@tonic-gate } 3507c478bd9Sstevel@tonic-gate } 3517c478bd9Sstevel@tonic-gate 3527c478bd9Sstevel@tonic-gate 3537c478bd9Sstevel@tonic-gate /* 354a574db85Sraf * Return non-zero if the current thread has declared that it has 355a574db85Sraf * a cancellation pending and that cancellation is not disabled. 356a574db85Sraf * If SIGCANCEL is blocked, we must be going over the wire in an 357a574db85Sraf * NFS transaction (sigintr() was called); return zero in this case. 3587c478bd9Sstevel@tonic-gate */ 3597c478bd9Sstevel@tonic-gate int 360a574db85Sraf schedctl_cancel_pending(void) 361a574db85Sraf { 362a574db85Sraf sc_shared_t *tdp = curthread->t_schedctl; 363a574db85Sraf 364a574db85Sraf if (tdp != NULL && 365a574db85Sraf (tdp->sc_flgs & SC_CANCEL_FLG) && 366a574db85Sraf !tdp->sc_sigblock && 367a574db85Sraf !sigismember(&curthread->t_hold, SIGCANCEL)) 368a574db85Sraf return (1); 369a574db85Sraf return (0); 370a574db85Sraf } 371a574db85Sraf 372a574db85Sraf 373a574db85Sraf /* 374a574db85Sraf * Inform libc that the kernel returned EINTR from some system call 375a574db85Sraf * due to there being a cancellation pending (SC_CANCEL_FLG set or 376a574db85Sraf * we received an SI_LWP SIGCANCEL while in a system call), rather 377a574db85Sraf * than because of some other signal. User-level code can try to 378a574db85Sraf * recover from receiving other signals, but it can't recover from 379a574db85Sraf * being cancelled. 380a574db85Sraf */ 381a574db85Sraf void 382a574db85Sraf schedctl_cancel_eintr(void) 3837c478bd9Sstevel@tonic-gate { 3847c478bd9Sstevel@tonic-gate sc_shared_t *tdp = curthread->t_schedctl; 3857c478bd9Sstevel@tonic-gate 38647eb4d1eSsl108498 if (tdp != NULL) 387a574db85Sraf tdp->sc_flgs |= SC_EINTR_FLG; 388a574db85Sraf } 389a574db85Sraf 390a574db85Sraf 391a574db85Sraf /* 392a574db85Sraf * Return non-zero if the current thread has declared that 393a574db85Sraf * it is calling into the kernel to park, else return zero. 394a574db85Sraf */ 395a574db85Sraf int 396a574db85Sraf schedctl_is_park(void) 397a574db85Sraf { 398a574db85Sraf sc_shared_t *tdp = curthread->t_schedctl; 399a574db85Sraf 400a574db85Sraf if (tdp != NULL) 401a574db85Sraf return ((tdp->sc_flgs & SC_PARK_FLG) != 0); 4027c478bd9Sstevel@tonic-gate /* 4037c478bd9Sstevel@tonic-gate * If we're here and there is no shared memory (how could 4047c478bd9Sstevel@tonic-gate * that happen?) then just assume we really are here to park. 4057c478bd9Sstevel@tonic-gate */ 4067c478bd9Sstevel@tonic-gate return (1); 4077c478bd9Sstevel@tonic-gate } 4087c478bd9Sstevel@tonic-gate 409a574db85Sraf 41047eb4d1eSsl108498 /* 41147eb4d1eSsl108498 * Declare thread is parking. 41247eb4d1eSsl108498 * 413a574db85Sraf * libc will set "sc_flgs |= SC_PARK_FLG" before calling lwpsys_park(0, tid) 414a574db85Sraf * in order to declare that the thread is calling into the kernel to park. 41547eb4d1eSsl108498 * 41647eb4d1eSsl108498 * This interface exists ONLY to support older versions of libthread which 417a574db85Sraf * are not aware of the SC_PARK_FLG flag. 41847eb4d1eSsl108498 * 419a574db85Sraf * Older versions of libthread which are not aware of the SC_PARK_FLG flag 420a574db85Sraf * need to be modified or emulated to call lwpsys_park(4, ...) instead of 42147eb4d1eSsl108498 * lwpsys_park(0, ...). This will invoke schedctl_set_park() before 42247eb4d1eSsl108498 * lwp_park() to declare that the thread is parking. 42347eb4d1eSsl108498 */ 42447eb4d1eSsl108498 void 425a574db85Sraf schedctl_set_park(void) 42647eb4d1eSsl108498 { 42747eb4d1eSsl108498 sc_shared_t *tdp = curthread->t_schedctl; 42847eb4d1eSsl108498 if (tdp != NULL) 429a574db85Sraf tdp->sc_flgs |= SC_PARK_FLG; 43047eb4d1eSsl108498 } 4317c478bd9Sstevel@tonic-gate 432a574db85Sraf 4337c478bd9Sstevel@tonic-gate /* 434a574db85Sraf * Clear the parking flag on return from parking in the kernel. 4357c478bd9Sstevel@tonic-gate */ 4367c478bd9Sstevel@tonic-gate void 437a574db85Sraf schedctl_unpark(void) 4387c478bd9Sstevel@tonic-gate { 4397c478bd9Sstevel@tonic-gate sc_shared_t *tdp = curthread->t_schedctl; 4407c478bd9Sstevel@tonic-gate 44147eb4d1eSsl108498 if (tdp != NULL) 442a574db85Sraf tdp->sc_flgs &= ~SC_PARK_FLG; 4437c478bd9Sstevel@tonic-gate } 4447c478bd9Sstevel@tonic-gate 4457c478bd9Sstevel@tonic-gate 4467c478bd9Sstevel@tonic-gate /* 4477c478bd9Sstevel@tonic-gate * Page handling code. 4487c478bd9Sstevel@tonic-gate */ 4497c478bd9Sstevel@tonic-gate 4507c478bd9Sstevel@tonic-gate void 451a574db85Sraf schedctl_init(void) 4527c478bd9Sstevel@tonic-gate { 4537c478bd9Sstevel@tonic-gate /* 4547c478bd9Sstevel@tonic-gate * Amount of page that can hold sc_shared_t structures. If 4557c478bd9Sstevel@tonic-gate * sizeof (sc_shared_t) is a power of 2, this should just be 4567c478bd9Sstevel@tonic-gate * PAGESIZE. 4577c478bd9Sstevel@tonic-gate */ 4587c478bd9Sstevel@tonic-gate sc_pagesize = PAGESIZE - (PAGESIZE % sizeof (sc_shared_t)); 4597c478bd9Sstevel@tonic-gate 4607c478bd9Sstevel@tonic-gate /* 4617c478bd9Sstevel@tonic-gate * Allocation bitmap is one bit per struct on a page. 4627c478bd9Sstevel@tonic-gate */ 4637c478bd9Sstevel@tonic-gate sc_bitmap_len = sc_pagesize / sizeof (sc_shared_t); 4647c478bd9Sstevel@tonic-gate sc_bitmap_words = howmany(sc_bitmap_len, BT_NBIPUL); 4657c478bd9Sstevel@tonic-gate } 4667c478bd9Sstevel@tonic-gate 467a574db85Sraf 468d4204c85Sraf static int 4697c478bd9Sstevel@tonic-gate schedctl_shared_alloc(sc_shared_t **kaddrp, uintptr_t *uaddrp) 4707c478bd9Sstevel@tonic-gate { 4717c478bd9Sstevel@tonic-gate proc_t *p = curproc; 4727c478bd9Sstevel@tonic-gate sc_page_ctl_t *pagep; 4737c478bd9Sstevel@tonic-gate sc_shared_t *ssp; 4747c478bd9Sstevel@tonic-gate caddr_t base; 4757c478bd9Sstevel@tonic-gate index_t index; 4767c478bd9Sstevel@tonic-gate int error; 4777c478bd9Sstevel@tonic-gate 4787c478bd9Sstevel@tonic-gate ASSERT(MUTEX_NOT_HELD(&p->p_lock)); 4797c478bd9Sstevel@tonic-gate mutex_enter(&p->p_sc_lock); 4807c478bd9Sstevel@tonic-gate 4817c478bd9Sstevel@tonic-gate /* 4827c478bd9Sstevel@tonic-gate * Try to find space for the new data in existing pages 4837c478bd9Sstevel@tonic-gate * within the process's list of shared pages. 4847c478bd9Sstevel@tonic-gate */ 4857c478bd9Sstevel@tonic-gate for (pagep = p->p_pagep; pagep != NULL; pagep = pagep->spc_next) 4867c478bd9Sstevel@tonic-gate if (pagep->spc_space != 0) 4877c478bd9Sstevel@tonic-gate break; 4887c478bd9Sstevel@tonic-gate 4897c478bd9Sstevel@tonic-gate if (pagep != NULL) 4907c478bd9Sstevel@tonic-gate base = pagep->spc_uaddr; 4917c478bd9Sstevel@tonic-gate else { 4927c478bd9Sstevel@tonic-gate struct anon_map *amp; 4937c478bd9Sstevel@tonic-gate caddr_t kaddr; 4947c478bd9Sstevel@tonic-gate 4957c478bd9Sstevel@tonic-gate /* 4967c478bd9Sstevel@tonic-gate * No room, need to allocate a new page. Also set up 4977c478bd9Sstevel@tonic-gate * a mapping to the kernel address space for the new 4987c478bd9Sstevel@tonic-gate * page and lock it in memory. 4997c478bd9Sstevel@tonic-gate */ 5007c478bd9Sstevel@tonic-gate if ((error = schedctl_getpage(&, &kaddr)) != 0) { 5017c478bd9Sstevel@tonic-gate mutex_exit(&p->p_sc_lock); 5027c478bd9Sstevel@tonic-gate return (error); 5037c478bd9Sstevel@tonic-gate } 5047c478bd9Sstevel@tonic-gate if ((error = schedctl_map(amp, &base, kaddr)) != 0) { 5057c478bd9Sstevel@tonic-gate schedctl_freepage(amp, kaddr); 5067c478bd9Sstevel@tonic-gate mutex_exit(&p->p_sc_lock); 5077c478bd9Sstevel@tonic-gate return (error); 5087c478bd9Sstevel@tonic-gate } 5097c478bd9Sstevel@tonic-gate 5107c478bd9Sstevel@tonic-gate /* 5117c478bd9Sstevel@tonic-gate * Allocate and initialize the page control structure. 5127c478bd9Sstevel@tonic-gate */ 5137c478bd9Sstevel@tonic-gate pagep = kmem_alloc(sizeof (sc_page_ctl_t), KM_SLEEP); 5147c478bd9Sstevel@tonic-gate pagep->spc_amp = amp; 5157c478bd9Sstevel@tonic-gate pagep->spc_base = (sc_shared_t *)kaddr; 5167c478bd9Sstevel@tonic-gate pagep->spc_end = (sc_shared_t *)(kaddr + sc_pagesize); 5177c478bd9Sstevel@tonic-gate pagep->spc_uaddr = base; 5187c478bd9Sstevel@tonic-gate 5197c478bd9Sstevel@tonic-gate pagep->spc_map = kmem_zalloc(sizeof (ulong_t) * sc_bitmap_words, 5207c478bd9Sstevel@tonic-gate KM_SLEEP); 5217c478bd9Sstevel@tonic-gate pagep->spc_space = sc_pagesize; 5227c478bd9Sstevel@tonic-gate 5237c478bd9Sstevel@tonic-gate pagep->spc_next = p->p_pagep; 5247c478bd9Sstevel@tonic-gate p->p_pagep = pagep; 5257c478bd9Sstevel@tonic-gate } 5267c478bd9Sstevel@tonic-gate 5277c478bd9Sstevel@tonic-gate /* 5287c478bd9Sstevel@tonic-gate * Got a page, now allocate space for the data. There should 5297c478bd9Sstevel@tonic-gate * be space unless something's wrong. 5307c478bd9Sstevel@tonic-gate */ 5317c478bd9Sstevel@tonic-gate ASSERT(pagep != NULL && pagep->spc_space >= sizeof (sc_shared_t)); 5327c478bd9Sstevel@tonic-gate index = bt_availbit(pagep->spc_map, sc_bitmap_len); 5337c478bd9Sstevel@tonic-gate ASSERT(index != -1); 5347c478bd9Sstevel@tonic-gate 5357c478bd9Sstevel@tonic-gate /* 5367c478bd9Sstevel@tonic-gate * Get location with pointer arithmetic. spc_base is of type 5377c478bd9Sstevel@tonic-gate * sc_shared_t *. Mark as allocated. 5387c478bd9Sstevel@tonic-gate */ 5397c478bd9Sstevel@tonic-gate ssp = pagep->spc_base + index; 5407c478bd9Sstevel@tonic-gate BT_SET(pagep->spc_map, index); 5417c478bd9Sstevel@tonic-gate pagep->spc_space -= sizeof (sc_shared_t); 5427c478bd9Sstevel@tonic-gate 5437c478bd9Sstevel@tonic-gate mutex_exit(&p->p_sc_lock); 5447c478bd9Sstevel@tonic-gate 5457c478bd9Sstevel@tonic-gate /* 5467c478bd9Sstevel@tonic-gate * Return kernel and user addresses. 5477c478bd9Sstevel@tonic-gate */ 5487c478bd9Sstevel@tonic-gate *kaddrp = ssp; 5497c478bd9Sstevel@tonic-gate *uaddrp = (uintptr_t)base + ((uintptr_t)ssp & PAGEOFFSET); 5507c478bd9Sstevel@tonic-gate return (0); 5517c478bd9Sstevel@tonic-gate } 5527c478bd9Sstevel@tonic-gate 5537c478bd9Sstevel@tonic-gate 5547c478bd9Sstevel@tonic-gate /* 5557c478bd9Sstevel@tonic-gate * Find the page control structure corresponding to a kernel address. 5567c478bd9Sstevel@tonic-gate */ 5577c478bd9Sstevel@tonic-gate static sc_page_ctl_t * 5587c478bd9Sstevel@tonic-gate schedctl_page_lookup(sc_shared_t *ssp) 5597c478bd9Sstevel@tonic-gate { 5607c478bd9Sstevel@tonic-gate proc_t *p = curproc; 5617c478bd9Sstevel@tonic-gate sc_page_ctl_t *pagep; 5627c478bd9Sstevel@tonic-gate 5637c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&p->p_sc_lock)); 5647c478bd9Sstevel@tonic-gate for (pagep = p->p_pagep; pagep != NULL; pagep = pagep->spc_next) { 5657c478bd9Sstevel@tonic-gate if (ssp >= pagep->spc_base && ssp < pagep->spc_end) 5667c478bd9Sstevel@tonic-gate return (pagep); 5677c478bd9Sstevel@tonic-gate } 5687c478bd9Sstevel@tonic-gate return (NULL); /* This "can't happen". Should we panic? */ 5697c478bd9Sstevel@tonic-gate } 5707c478bd9Sstevel@tonic-gate 5717c478bd9Sstevel@tonic-gate 5727c478bd9Sstevel@tonic-gate /* 5737c478bd9Sstevel@tonic-gate * This function is called when a page needs to be mapped into a 5747c478bd9Sstevel@tonic-gate * process's address space. Allocate the user address space and 5757c478bd9Sstevel@tonic-gate * set up the mapping to the page. Assumes the page has already 5767c478bd9Sstevel@tonic-gate * been allocated and locked in memory via schedctl_getpage. 5777c478bd9Sstevel@tonic-gate */ 5787c478bd9Sstevel@tonic-gate static int 5797c478bd9Sstevel@tonic-gate schedctl_map(struct anon_map *amp, caddr_t *uaddrp, caddr_t kaddr) 5807c478bd9Sstevel@tonic-gate { 58160946fe0Smec caddr_t addr = NULL; 5827c478bd9Sstevel@tonic-gate struct as *as = curproc->p_as; 5837c478bd9Sstevel@tonic-gate struct segvn_crargs vn_a; 5847c478bd9Sstevel@tonic-gate int error; 5857c478bd9Sstevel@tonic-gate 5867c478bd9Sstevel@tonic-gate as_rangelock(as); 5877c478bd9Sstevel@tonic-gate /* pass address of kernel mapping as offset to avoid VAC conflicts */ 5887c478bd9Sstevel@tonic-gate map_addr(&addr, PAGESIZE, (offset_t)(uintptr_t)kaddr, 1, 0); 5897c478bd9Sstevel@tonic-gate if (addr == NULL) { 5907c478bd9Sstevel@tonic-gate as_rangeunlock(as); 5917c478bd9Sstevel@tonic-gate return (ENOMEM); 5927c478bd9Sstevel@tonic-gate } 5937c478bd9Sstevel@tonic-gate 5947c478bd9Sstevel@tonic-gate /* 5957c478bd9Sstevel@tonic-gate * Use segvn to set up the mapping to the page. 5967c478bd9Sstevel@tonic-gate */ 5977c478bd9Sstevel@tonic-gate vn_a.vp = NULL; 5987c478bd9Sstevel@tonic-gate vn_a.offset = 0; 5997c478bd9Sstevel@tonic-gate vn_a.cred = NULL; 6007c478bd9Sstevel@tonic-gate vn_a.type = MAP_SHARED; 6017c478bd9Sstevel@tonic-gate vn_a.prot = vn_a.maxprot = PROT_ALL; 6027c478bd9Sstevel@tonic-gate vn_a.flags = 0; 6037c478bd9Sstevel@tonic-gate vn_a.amp = amp; 6047c478bd9Sstevel@tonic-gate vn_a.szc = 0; 6057c478bd9Sstevel@tonic-gate vn_a.lgrp_mem_policy_flags = 0; 6067c478bd9Sstevel@tonic-gate error = as_map(as, addr, PAGESIZE, segvn_create, &vn_a); 6077c478bd9Sstevel@tonic-gate as_rangeunlock(as); 6087c478bd9Sstevel@tonic-gate 6097c478bd9Sstevel@tonic-gate if (error) 6107c478bd9Sstevel@tonic-gate return (error); 6117c478bd9Sstevel@tonic-gate 6127c478bd9Sstevel@tonic-gate *uaddrp = addr; 6137c478bd9Sstevel@tonic-gate return (0); 6147c478bd9Sstevel@tonic-gate } 6157c478bd9Sstevel@tonic-gate 6167c478bd9Sstevel@tonic-gate 6177c478bd9Sstevel@tonic-gate /* 6187c478bd9Sstevel@tonic-gate * Allocate a new page from anonymous memory. Also, create a kernel 6197c478bd9Sstevel@tonic-gate * mapping to the page and lock the page in memory. 6207c478bd9Sstevel@tonic-gate */ 6217c478bd9Sstevel@tonic-gate static int 6227c478bd9Sstevel@tonic-gate schedctl_getpage(struct anon_map **newamp, caddr_t *newaddr) 6237c478bd9Sstevel@tonic-gate { 6247c478bd9Sstevel@tonic-gate struct anon_map *amp; 6257c478bd9Sstevel@tonic-gate caddr_t kaddr; 6267c478bd9Sstevel@tonic-gate 6277c478bd9Sstevel@tonic-gate /* 6287c478bd9Sstevel@tonic-gate * Set up anonymous memory struct. No swap reservation is 6297c478bd9Sstevel@tonic-gate * needed since the page will be locked into memory. 6307c478bd9Sstevel@tonic-gate */ 6312cb27123Saguzovsk amp = anonmap_alloc(PAGESIZE, 0, ANON_SLEEP); 6327c478bd9Sstevel@tonic-gate 6337c478bd9Sstevel@tonic-gate /* 6347c478bd9Sstevel@tonic-gate * Allocate the page. 6357c478bd9Sstevel@tonic-gate */ 6360209230bSgjelinek kaddr = segkp_get_withanonmap(segkp, PAGESIZE, 6370209230bSgjelinek KPD_NO_ANON | KPD_LOCKED | KPD_ZERO, amp); 6387c478bd9Sstevel@tonic-gate if (kaddr == NULL) { 6397c478bd9Sstevel@tonic-gate amp->refcnt--; 6407c478bd9Sstevel@tonic-gate anonmap_free(amp); 6417c478bd9Sstevel@tonic-gate return (ENOMEM); 6427c478bd9Sstevel@tonic-gate } 6437c478bd9Sstevel@tonic-gate 6447c478bd9Sstevel@tonic-gate /* 6457c478bd9Sstevel@tonic-gate * The page is left SE_SHARED locked so that it won't be 6467c478bd9Sstevel@tonic-gate * paged out or relocated (KPD_LOCKED above). 6477c478bd9Sstevel@tonic-gate */ 6487c478bd9Sstevel@tonic-gate 6497c478bd9Sstevel@tonic-gate *newamp = amp; 6507c478bd9Sstevel@tonic-gate *newaddr = kaddr; 6517c478bd9Sstevel@tonic-gate return (0); 6527c478bd9Sstevel@tonic-gate } 6537c478bd9Sstevel@tonic-gate 6547c478bd9Sstevel@tonic-gate 6557c478bd9Sstevel@tonic-gate /* 6567c478bd9Sstevel@tonic-gate * Take the necessary steps to allow a page to be released. 6577c478bd9Sstevel@tonic-gate * This is called when the process is doing exit() or exec(). 6587c478bd9Sstevel@tonic-gate * There should be no accesses to the page after this. 6597c478bd9Sstevel@tonic-gate * The kernel mapping of the page is released and the page is unlocked. 6607c478bd9Sstevel@tonic-gate */ 6617c478bd9Sstevel@tonic-gate static void 6627c478bd9Sstevel@tonic-gate schedctl_freepage(struct anon_map *amp, caddr_t kaddr) 6637c478bd9Sstevel@tonic-gate { 6647c478bd9Sstevel@tonic-gate /* 6657c478bd9Sstevel@tonic-gate * Release the lock on the page and remove the kernel mapping. 6667c478bd9Sstevel@tonic-gate */ 6677c478bd9Sstevel@tonic-gate ANON_LOCK_ENTER(&->a_rwlock, RW_WRITER); 6687c478bd9Sstevel@tonic-gate segkp_release(segkp, kaddr); 6697c478bd9Sstevel@tonic-gate 6707c478bd9Sstevel@tonic-gate /* 6717c478bd9Sstevel@tonic-gate * Decrement the refcnt so the anon_map structure will be freed. 6727c478bd9Sstevel@tonic-gate */ 6737c478bd9Sstevel@tonic-gate if (--amp->refcnt == 0) { 6747c478bd9Sstevel@tonic-gate /* 6757c478bd9Sstevel@tonic-gate * The current process no longer has the page mapped, so 6767c478bd9Sstevel@tonic-gate * we have to free everything rather than letting as_free 6777c478bd9Sstevel@tonic-gate * do the work. 6787c478bd9Sstevel@tonic-gate */ 679a98e9dbfSaguzovsk anonmap_purge(amp); 6807c478bd9Sstevel@tonic-gate anon_free(amp->ahp, 0, PAGESIZE); 6817c478bd9Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 6827c478bd9Sstevel@tonic-gate anonmap_free(amp); 6837c478bd9Sstevel@tonic-gate } else { 6847c478bd9Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 6857c478bd9Sstevel@tonic-gate } 6867c478bd9Sstevel@tonic-gate } 687