12850d85bSmv143129 /* 22850d85bSmv143129 * CDDL HEADER START 32850d85bSmv143129 * 42850d85bSmv143129 * The contents of this file are subject to the terms of the 52850d85bSmv143129 * Common Development and Distribution License (the "License"). 62850d85bSmv143129 * You may not use this file except in compliance with the License. 72850d85bSmv143129 * 82850d85bSmv143129 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 92850d85bSmv143129 * or http://www.opensolaris.org/os/licensing. 102850d85bSmv143129 * See the License for the specific language governing permissions 112850d85bSmv143129 * and limitations under the License. 122850d85bSmv143129 * 132850d85bSmv143129 * When distributing Covered Code, include this CDDL HEADER in each 142850d85bSmv143129 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 152850d85bSmv143129 * If applicable, add the following below this CDDL HEADER, with the 162850d85bSmv143129 * fields enclosed by brackets "[]" replaced with your own identifying 172850d85bSmv143129 * information: Portions Copyright [yyyy] [name of copyright owner] 182850d85bSmv143129 * 192850d85bSmv143129 * CDDL HEADER END 202850d85bSmv143129 */ 212850d85bSmv143129 222850d85bSmv143129 /* 2307247649SMadhavan Venkataraman * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 242850d85bSmv143129 * Use is subject to license terms. 252850d85bSmv143129 */ 262850d85bSmv143129 272850d85bSmv143129 #include <sys/thread.h> 282850d85bSmv143129 #include <sys/proc.h> 292850d85bSmv143129 #include <sys/task.h> 302850d85bSmv143129 #include <sys/cmn_err.h> 312850d85bSmv143129 #include <sys/class.h> 322850d85bSmv143129 #include <sys/sdt.h> 332850d85bSmv143129 #include <sys/atomic.h> 342850d85bSmv143129 #include <sys/cpu.h> 352850d85bSmv143129 #include <sys/clock_tick.h> 36d3d50737SRafael Vanoni #include <sys/clock_impl.h> 372850d85bSmv143129 #include <sys/sysmacros.h> 382850d85bSmv143129 #include <vm/rm.h> 392850d85bSmv143129 402850d85bSmv143129 /* 412850d85bSmv143129 * This file contains the implementation of clock tick accounting for threads. 422850d85bSmv143129 * Every tick, user threads running on various CPUs are located and charged 432850d85bSmv143129 * with a tick to account for their use of CPU time. 442850d85bSmv143129 * 452850d85bSmv143129 * Every tick, the clock() handler calls clock_tick_schedule() to perform tick 462850d85bSmv143129 * accounting for all the threads in the system. Tick accounting is done in 472850d85bSmv143129 * two phases: 482850d85bSmv143129 * 492850d85bSmv143129 * Tick scheduling Done in clock_tick_schedule(). In this phase, cross 502850d85bSmv143129 * calls are scheduled to multiple CPUs to perform 512850d85bSmv143129 * multi-threaded tick accounting. The CPUs are chosen 522850d85bSmv143129 * on a rotational basis so as to distribute the tick 532850d85bSmv143129 * accounting load evenly across all CPUs. 542850d85bSmv143129 * 552850d85bSmv143129 * Tick execution Done in clock_tick_execute(). In this phase, tick 562850d85bSmv143129 * accounting is actually performed by softint handlers 572850d85bSmv143129 * on multiple CPUs. 582850d85bSmv143129 * 592850d85bSmv143129 * This implementation gives us a multi-threaded tick processing facility that 602850d85bSmv143129 * is suitable for configurations with a large number of CPUs. On smaller 612850d85bSmv143129 * configurations it may be desirable to let the processing be single-threaded 622850d85bSmv143129 * and just allow clock() to do it as it has been done traditionally. To 632850d85bSmv143129 * facilitate this, a variable, clock_tick_threshold, is defined. Platforms 642850d85bSmv143129 * that desire multi-threading should set this variable to something 652850d85bSmv143129 * appropriate. A recommended value may be found in clock_tick.h. At boot time, 662850d85bSmv143129 * if the number of CPUs is greater than clock_tick_threshold, multi-threading 672850d85bSmv143129 * kicks in. Note that this is a decision made at boot time. If more CPUs 682850d85bSmv143129 * are dynamically added later on to exceed the threshold, no attempt is made 692850d85bSmv143129 * to switch to multi-threaded. Similarly, if CPUs are removed dynamically 702850d85bSmv143129 * no attempt is made to switch to single-threaded. This is to keep the 712850d85bSmv143129 * implementation simple. Also note that the threshold can be changed for a 722850d85bSmv143129 * specific customer configuration via /etc/system. 732850d85bSmv143129 * 742850d85bSmv143129 * The boot time decision is reflected in clock_tick_single_threaded. 752850d85bSmv143129 */ 762850d85bSmv143129 772850d85bSmv143129 /* 782850d85bSmv143129 * clock_tick_threshold 792850d85bSmv143129 * If the number of CPUs at boot time exceeds this threshold, 802850d85bSmv143129 * multi-threaded tick accounting kicks in. 812850d85bSmv143129 * 822850d85bSmv143129 * clock_tick_ncpus 832850d85bSmv143129 * The number of CPUs in a set. Each set is scheduled for tick execution 842850d85bSmv143129 * on a separate processor. 852850d85bSmv143129 * 862850d85bSmv143129 * clock_tick_single_threaded 872850d85bSmv143129 * Indicates whether or not tick accounting is single threaded. 882850d85bSmv143129 * 892850d85bSmv143129 * clock_tick_total_cpus 902850d85bSmv143129 * Total number of online CPUs. 912850d85bSmv143129 * 922850d85bSmv143129 * clock_tick_cpus 932850d85bSmv143129 * Array of online CPU pointers. 942850d85bSmv143129 * 952850d85bSmv143129 * clock_tick_cpu 962850d85bSmv143129 * Per-CPU, cache-aligned data structures to facilitate multi-threading. 972850d85bSmv143129 * 982850d85bSmv143129 * clock_tick_active 992850d85bSmv143129 * Counter that indicates the number of active tick processing softints 1002850d85bSmv143129 * in the system. 1012850d85bSmv143129 * 1022850d85bSmv143129 * clock_tick_pending 1032850d85bSmv143129 * Number of pending ticks that need to be accounted by the softint 1042850d85bSmv143129 * handlers. 1052850d85bSmv143129 * 1062850d85bSmv143129 * clock_tick_lock 1072850d85bSmv143129 * Mutex to synchronize between clock_tick_schedule() and 1082850d85bSmv143129 * CPU online/offline. 1092850d85bSmv143129 * 1102850d85bSmv143129 * clock_cpu_id 1112850d85bSmv143129 * CPU id of the clock() CPU. Used to detect when the clock CPU 1122850d85bSmv143129 * is offlined. 1132850d85bSmv143129 * 1142850d85bSmv143129 * clock_tick_online_cpuset 1152850d85bSmv143129 * CPU set of all online processors that can be X-called. 1162850d85bSmv143129 * 1172850d85bSmv143129 * clock_tick_proc_max 1182850d85bSmv143129 * Each process is allowed to accumulate a few ticks before checking 1192850d85bSmv143129 * for the task CPU time resource limit. We lower the number of calls 1202850d85bSmv143129 * to rctl_test() to make tick accounting more scalable. The tradeoff 1212850d85bSmv143129 * is that the limit may not get enforced in a timely manner. This is 1222850d85bSmv143129 * typically not a problem. 1232850d85bSmv143129 * 1242850d85bSmv143129 * clock_tick_set 1252850d85bSmv143129 * Per-set structures. Each structure contains the range of CPUs 1262850d85bSmv143129 * to be processed for the set. 1272850d85bSmv143129 * 1282850d85bSmv143129 * clock_tick_nsets; 1292850d85bSmv143129 * Number of sets. 1302850d85bSmv143129 * 1312850d85bSmv143129 * clock_tick_scan 1322850d85bSmv143129 * Where to begin the scan for single-threaded mode. In multi-threaded, 1332850d85bSmv143129 * the clock_tick_set itself contains a field for this. 1342850d85bSmv143129 */ 1352850d85bSmv143129 int clock_tick_threshold; 1362850d85bSmv143129 int clock_tick_ncpus; 1372850d85bSmv143129 int clock_tick_single_threaded; 1382850d85bSmv143129 int clock_tick_total_cpus; 1392850d85bSmv143129 cpu_t *clock_tick_cpus[NCPU]; 1402850d85bSmv143129 clock_tick_cpu_t *clock_tick_cpu[NCPU]; 1412850d85bSmv143129 ulong_t clock_tick_active; 1422850d85bSmv143129 int clock_tick_pending; 1432850d85bSmv143129 kmutex_t clock_tick_lock; 1442850d85bSmv143129 processorid_t clock_cpu_id; 1452850d85bSmv143129 cpuset_t clock_tick_online_cpuset; 1462850d85bSmv143129 clock_t clock_tick_proc_max; 1472850d85bSmv143129 clock_tick_set_t *clock_tick_set; 1482850d85bSmv143129 int clock_tick_nsets; 1492850d85bSmv143129 int clock_tick_scan; 15007247649SMadhavan Venkataraman ulong_t clock_tick_intr; 1512850d85bSmv143129 1522850d85bSmv143129 static uint_t clock_tick_execute(caddr_t, caddr_t); 1532850d85bSmv143129 static void clock_tick_execute_common(int, int, int, clock_t, int); 1542850d85bSmv143129 1552850d85bSmv143129 #define CLOCK_TICK_ALIGN 64 /* cache alignment */ 1562850d85bSmv143129 1572850d85bSmv143129 /* 1582850d85bSmv143129 * Clock tick initialization is done in two phases: 1592850d85bSmv143129 * 1602850d85bSmv143129 * 1. Before clock_init() is called, clock_tick_init_pre() is called to set 1612850d85bSmv143129 * up single-threading so the clock() can begin to do its job. 1622850d85bSmv143129 * 1632850d85bSmv143129 * 2. After the slave CPUs are initialized at boot time, we know the number 1642850d85bSmv143129 * of CPUs. clock_tick_init_post() is called to set up multi-threading if 1652850d85bSmv143129 * required. 1662850d85bSmv143129 */ 1672850d85bSmv143129 void 1682850d85bSmv143129 clock_tick_init_pre(void) 1692850d85bSmv143129 { 1702850d85bSmv143129 clock_tick_cpu_t *ctp; 1712850d85bSmv143129 int i, n; 1722850d85bSmv143129 clock_tick_set_t *csp; 1732850d85bSmv143129 uintptr_t buf; 1742850d85bSmv143129 size_t size; 1752850d85bSmv143129 1762850d85bSmv143129 clock_tick_single_threaded = 1; 1772850d85bSmv143129 1782850d85bSmv143129 size = P2ROUNDUP(sizeof (clock_tick_cpu_t), CLOCK_TICK_ALIGN); 1792850d85bSmv143129 buf = (uintptr_t)kmem_zalloc(size * NCPU + CLOCK_TICK_ALIGN, KM_SLEEP); 1802850d85bSmv143129 buf = P2ROUNDUP(buf, CLOCK_TICK_ALIGN); 1812850d85bSmv143129 1822850d85bSmv143129 /* 1832850d85bSmv143129 * Perform initialization in case multi-threading is chosen later. 1842850d85bSmv143129 */ 18507247649SMadhavan Venkataraman if (&create_softint != NULL) { 18607247649SMadhavan Venkataraman clock_tick_intr = create_softint(LOCK_LEVEL, 18707247649SMadhavan Venkataraman clock_tick_execute, (caddr_t)NULL); 18807247649SMadhavan Venkataraman } 1892850d85bSmv143129 for (i = 0; i < NCPU; i++, buf += size) { 1902850d85bSmv143129 ctp = (clock_tick_cpu_t *)buf; 1912850d85bSmv143129 clock_tick_cpu[i] = ctp; 1922850d85bSmv143129 mutex_init(&ctp->ct_lock, NULL, MUTEX_DEFAULT, NULL); 1932850d85bSmv143129 if (&create_softint != NULL) { 19407247649SMadhavan Venkataraman ctp->ct_intr = clock_tick_intr; 1952850d85bSmv143129 } 1962850d85bSmv143129 ctp->ct_pending = 0; 1972850d85bSmv143129 } 1982850d85bSmv143129 1992850d85bSmv143129 mutex_init(&clock_tick_lock, NULL, MUTEX_DEFAULT, NULL); 2002850d85bSmv143129 2012850d85bSmv143129 /* 2022850d85bSmv143129 * Compute clock_tick_ncpus here. We need it to compute the 2032850d85bSmv143129 * maximum number of tick sets we need to support. 2042850d85bSmv143129 */ 2052850d85bSmv143129 ASSERT(clock_tick_ncpus >= 0); 2062850d85bSmv143129 if (clock_tick_ncpus == 0) 2072850d85bSmv143129 clock_tick_ncpus = CLOCK_TICK_NCPUS; 2082850d85bSmv143129 if (clock_tick_ncpus > max_ncpus) 2092850d85bSmv143129 clock_tick_ncpus = max_ncpus; 2102850d85bSmv143129 2112850d85bSmv143129 /* 2122850d85bSmv143129 * Allocate and initialize the tick sets. 2132850d85bSmv143129 */ 2142850d85bSmv143129 n = (max_ncpus + clock_tick_ncpus - 1)/clock_tick_ncpus; 2152850d85bSmv143129 clock_tick_set = kmem_zalloc(sizeof (clock_tick_set_t) * n, KM_SLEEP); 2162850d85bSmv143129 for (i = 0; i < n; i++) { 2172850d85bSmv143129 csp = &clock_tick_set[i]; 2182850d85bSmv143129 csp->ct_start = i * clock_tick_ncpus; 2192850d85bSmv143129 csp->ct_scan = csp->ct_start; 2202850d85bSmv143129 csp->ct_end = csp->ct_start; 2212850d85bSmv143129 } 2222850d85bSmv143129 } 2232850d85bSmv143129 2242850d85bSmv143129 void 2252850d85bSmv143129 clock_tick_init_post(void) 2262850d85bSmv143129 { 2272850d85bSmv143129 /* 2282850d85bSmv143129 * If a platform does not provide create_softint() and invoke_softint(), 2292850d85bSmv143129 * then we assume single threaded. 2302850d85bSmv143129 */ 2312850d85bSmv143129 if (&invoke_softint == NULL) 2322850d85bSmv143129 clock_tick_threshold = 0; 2332850d85bSmv143129 2342850d85bSmv143129 ASSERT(clock_tick_threshold >= 0); 2352850d85bSmv143129 2362850d85bSmv143129 if (clock_tick_threshold == 0) 2372850d85bSmv143129 clock_tick_threshold = max_ncpus; 2382850d85bSmv143129 2392850d85bSmv143129 /* 2402850d85bSmv143129 * If a platform does not specify a threshold or if the number of CPUs 2412850d85bSmv143129 * at boot time does not exceed the threshold, tick accounting remains 2422850d85bSmv143129 * single-threaded. 2432850d85bSmv143129 */ 2442850d85bSmv143129 if (ncpus <= clock_tick_threshold) { 2452850d85bSmv143129 clock_tick_ncpus = max_ncpus; 2462850d85bSmv143129 clock_tick_proc_max = 1; 2472850d85bSmv143129 return; 2482850d85bSmv143129 } 2492850d85bSmv143129 2502850d85bSmv143129 /* 2512850d85bSmv143129 * OK. Multi-thread tick processing. If a platform has not specified 2522850d85bSmv143129 * the CPU set size for multi-threading, then use the default value. 2532850d85bSmv143129 * This value has been arrived through measurements on large 2542850d85bSmv143129 * configuration systems. 2552850d85bSmv143129 */ 2562850d85bSmv143129 clock_tick_single_threaded = 0; 2572850d85bSmv143129 if (clock_tick_proc_max == 0) { 2582850d85bSmv143129 clock_tick_proc_max = CLOCK_TICK_PROC_MAX; 2592850d85bSmv143129 if (hires_tick) 2602850d85bSmv143129 clock_tick_proc_max *= 10; 2612850d85bSmv143129 } 2622850d85bSmv143129 } 2632850d85bSmv143129 2642850d85bSmv143129 static void 2652850d85bSmv143129 clock_tick_schedule_one(clock_tick_set_t *csp, int pending, processorid_t cid) 2662850d85bSmv143129 { 2672850d85bSmv143129 clock_tick_cpu_t *ctp; 2682850d85bSmv143129 2692850d85bSmv143129 ASSERT(&invoke_softint != NULL); 27007247649SMadhavan Venkataraman 27107247649SMadhavan Venkataraman atomic_inc_ulong(&clock_tick_active); 27207247649SMadhavan Venkataraman 2732850d85bSmv143129 /* 2742850d85bSmv143129 * Schedule tick accounting for a set of CPUs. 2752850d85bSmv143129 */ 2762850d85bSmv143129 ctp = clock_tick_cpu[cid]; 2772850d85bSmv143129 mutex_enter(&ctp->ct_lock); 278*1b7f7204SRafael Vanoni ctp->ct_lbolt = LBOLT_NO_ACCOUNT; 2792850d85bSmv143129 ctp->ct_pending += pending; 2802850d85bSmv143129 ctp->ct_start = csp->ct_start; 2812850d85bSmv143129 ctp->ct_end = csp->ct_end; 2822850d85bSmv143129 ctp->ct_scan = csp->ct_scan; 2832850d85bSmv143129 mutex_exit(&ctp->ct_lock); 2842850d85bSmv143129 2852850d85bSmv143129 invoke_softint(cid, ctp->ct_intr); 2862850d85bSmv143129 /* 2872850d85bSmv143129 * Return without waiting for the softint to finish. 2882850d85bSmv143129 */ 2892850d85bSmv143129 } 2902850d85bSmv143129 2912850d85bSmv143129 static void 2922850d85bSmv143129 clock_tick_process(cpu_t *cp, clock_t mylbolt, int pending) 2932850d85bSmv143129 { 2942850d85bSmv143129 kthread_t *t; 2952850d85bSmv143129 kmutex_t *plockp; 2962850d85bSmv143129 int notick, intr; 2972850d85bSmv143129 klwp_id_t lwp; 2982850d85bSmv143129 2992850d85bSmv143129 /* 3002850d85bSmv143129 * The locking here is rather tricky. thread_free_prevent() 3012850d85bSmv143129 * prevents the thread returned from being freed while we 3022850d85bSmv143129 * are looking at it. We can then check if the thread 3032850d85bSmv143129 * is exiting and get the appropriate p_lock if it 3042850d85bSmv143129 * is not. We have to be careful, though, because 3052850d85bSmv143129 * the _process_ can still be freed while we've 3062850d85bSmv143129 * prevented thread free. To avoid touching the 3072850d85bSmv143129 * proc structure we put a pointer to the p_lock in the 3082850d85bSmv143129 * thread structure. The p_lock is persistent so we 3092850d85bSmv143129 * can acquire it even if the process is gone. At that 3102850d85bSmv143129 * point we can check (again) if the thread is exiting 3112850d85bSmv143129 * and either drop the lock or do the tick processing. 3122850d85bSmv143129 */ 3132850d85bSmv143129 t = cp->cpu_thread; /* Current running thread */ 3142850d85bSmv143129 if (CPU == cp) { 3152850d85bSmv143129 /* 3162850d85bSmv143129 * 't' will be the tick processing thread on this 3172850d85bSmv143129 * CPU. Use the pinned thread (if any) on this CPU 3182850d85bSmv143129 * as the target of the clock tick. 3192850d85bSmv143129 */ 3202850d85bSmv143129 if (t->t_intr != NULL) 3212850d85bSmv143129 t = t->t_intr; 3222850d85bSmv143129 } 3232850d85bSmv143129 3242850d85bSmv143129 /* 3252850d85bSmv143129 * We use thread_free_prevent to keep the currently running 3262850d85bSmv143129 * thread from being freed or recycled while we're 3272850d85bSmv143129 * looking at it. 3282850d85bSmv143129 */ 3292850d85bSmv143129 thread_free_prevent(t); 3302850d85bSmv143129 /* 3312850d85bSmv143129 * We cannot hold the cpu_lock to prevent the 3322850d85bSmv143129 * cpu_active from changing in the clock interrupt. 3332850d85bSmv143129 * As long as we don't block (or don't get pre-empted) 3342850d85bSmv143129 * the cpu_list will not change (all threads are paused 3352850d85bSmv143129 * before list modification). 3362850d85bSmv143129 */ 3372850d85bSmv143129 if (CLOCK_TICK_CPU_OFFLINE(cp)) { 3382850d85bSmv143129 thread_free_allow(t); 3392850d85bSmv143129 return; 3402850d85bSmv143129 } 3412850d85bSmv143129 3422850d85bSmv143129 /* 3432850d85bSmv143129 * Make sure the thread is still on the CPU. 3442850d85bSmv143129 */ 3452850d85bSmv143129 if ((t != cp->cpu_thread) && 3462850d85bSmv143129 ((cp != CPU) || (t != cp->cpu_thread->t_intr))) { 3472850d85bSmv143129 /* 3482850d85bSmv143129 * We could not locate the thread. Skip this CPU. Race 3492850d85bSmv143129 * conditions while performing these checks are benign. 3502850d85bSmv143129 * These checks are not perfect and they don't need 3512850d85bSmv143129 * to be. 3522850d85bSmv143129 */ 3532850d85bSmv143129 thread_free_allow(t); 3542850d85bSmv143129 return; 3552850d85bSmv143129 } 3562850d85bSmv143129 3572850d85bSmv143129 intr = t->t_flag & T_INTR_THREAD; 3582850d85bSmv143129 lwp = ttolwp(t); 3592850d85bSmv143129 if (lwp == NULL || (t->t_proc_flag & TP_LWPEXIT) || intr) { 3602850d85bSmv143129 /* 3612850d85bSmv143129 * Thread is exiting (or uninteresting) so don't 3622850d85bSmv143129 * do tick processing. 3632850d85bSmv143129 */ 3642850d85bSmv143129 thread_free_allow(t); 3652850d85bSmv143129 return; 3662850d85bSmv143129 } 3672850d85bSmv143129 3682850d85bSmv143129 /* 3692850d85bSmv143129 * OK, try to grab the process lock. See 3702850d85bSmv143129 * comments above for why we're not using 3712850d85bSmv143129 * ttoproc(t)->p_lockp here. 3722850d85bSmv143129 */ 3732850d85bSmv143129 plockp = t->t_plockp; 3742850d85bSmv143129 mutex_enter(plockp); 3752850d85bSmv143129 /* See above comment. */ 3762850d85bSmv143129 if (CLOCK_TICK_CPU_OFFLINE(cp)) { 3772850d85bSmv143129 mutex_exit(plockp); 3782850d85bSmv143129 thread_free_allow(t); 3792850d85bSmv143129 return; 3802850d85bSmv143129 } 3812850d85bSmv143129 3822850d85bSmv143129 /* 3832850d85bSmv143129 * The thread may have exited between when we 3842850d85bSmv143129 * checked above, and when we got the p_lock. 3852850d85bSmv143129 */ 3862850d85bSmv143129 if (t->t_proc_flag & TP_LWPEXIT) { 3872850d85bSmv143129 mutex_exit(plockp); 3882850d85bSmv143129 thread_free_allow(t); 3892850d85bSmv143129 return; 3902850d85bSmv143129 } 3912850d85bSmv143129 3922850d85bSmv143129 /* 3932850d85bSmv143129 * Either we have the p_lock for the thread's process, 3942850d85bSmv143129 * or we don't care about the thread structure any more. 3952850d85bSmv143129 * Either way we can allow thread free. 3962850d85bSmv143129 */ 3972850d85bSmv143129 thread_free_allow(t); 3982850d85bSmv143129 3992850d85bSmv143129 /* 4002850d85bSmv143129 * If we haven't done tick processing for this 4012850d85bSmv143129 * lwp, then do it now. Since we don't hold the 4022850d85bSmv143129 * lwp down on a CPU it can migrate and show up 4032850d85bSmv143129 * more than once, hence the lbolt check. mylbolt 4042850d85bSmv143129 * is copied at the time of tick scheduling to prevent 4052850d85bSmv143129 * lbolt mismatches. 4062850d85bSmv143129 * 4072850d85bSmv143129 * Also, make sure that it's okay to perform the 4082850d85bSmv143129 * tick processing before calling clock_tick. 4092850d85bSmv143129 * Setting notick to a TRUE value (ie. not 0) 4102850d85bSmv143129 * results in tick processing not being performed for 4112850d85bSmv143129 * that thread. 4122850d85bSmv143129 */ 4132850d85bSmv143129 notick = ((cp->cpu_flags & CPU_QUIESCED) || CPU_ON_INTR(cp) || 4142850d85bSmv143129 (cp->cpu_dispthread == cp->cpu_idle_thread)); 4152850d85bSmv143129 4162850d85bSmv143129 if ((!notick) && (t->t_lbolt < mylbolt)) { 4172850d85bSmv143129 t->t_lbolt = mylbolt; 4182850d85bSmv143129 clock_tick(t, pending); 4192850d85bSmv143129 } 4202850d85bSmv143129 4212850d85bSmv143129 mutex_exit(plockp); 4222850d85bSmv143129 } 4232850d85bSmv143129 4242850d85bSmv143129 void 4252850d85bSmv143129 clock_tick_schedule(int one_sec) 4262850d85bSmv143129 { 4272850d85bSmv143129 ulong_t active; 4282850d85bSmv143129 int i, end; 4292850d85bSmv143129 clock_tick_set_t *csp; 4302850d85bSmv143129 cpu_t *cp; 4312850d85bSmv143129 4322850d85bSmv143129 if (clock_cpu_id != CPU->cpu_id) 4332850d85bSmv143129 clock_cpu_id = CPU->cpu_id; 4342850d85bSmv143129 4352850d85bSmv143129 if (clock_tick_single_threaded) { 4362850d85bSmv143129 /* 4372850d85bSmv143129 * Each tick cycle, start the scan from a different 4382850d85bSmv143129 * CPU for the sake of fairness. 4392850d85bSmv143129 */ 4402850d85bSmv143129 end = clock_tick_total_cpus; 4412850d85bSmv143129 clock_tick_scan++; 4422850d85bSmv143129 if (clock_tick_scan >= end) 4432850d85bSmv143129 clock_tick_scan = 0; 4442850d85bSmv143129 445d3d50737SRafael Vanoni clock_tick_execute_common(0, clock_tick_scan, end, 446*1b7f7204SRafael Vanoni LBOLT_NO_ACCOUNT, 1); 4472850d85bSmv143129 4482850d85bSmv143129 return; 4492850d85bSmv143129 } 4502850d85bSmv143129 4512850d85bSmv143129 /* 4522850d85bSmv143129 * If the previous invocation of handlers is not yet finished, then 4532850d85bSmv143129 * simply increment a pending count and return. Eventually when they 4542850d85bSmv143129 * finish, the pending count is passed down to the next set of 4552850d85bSmv143129 * handlers to process. This way, ticks that have already elapsed 4562850d85bSmv143129 * in the past are handled as quickly as possible to minimize the 4572850d85bSmv143129 * chances of threads getting away before their pending ticks are 4582850d85bSmv143129 * accounted. The other benefit is that if the pending count is 4592850d85bSmv143129 * more than one, it can be handled by a single invocation of 4602850d85bSmv143129 * clock_tick(). This is a good optimization for large configuration 4612850d85bSmv143129 * busy systems where tick accounting can get backed up for various 4622850d85bSmv143129 * reasons. 4632850d85bSmv143129 */ 4642850d85bSmv143129 clock_tick_pending++; 4652850d85bSmv143129 4662850d85bSmv143129 active = clock_tick_active; 4672850d85bSmv143129 active = atomic_cas_ulong(&clock_tick_active, active, active); 4682850d85bSmv143129 if (active) 4692850d85bSmv143129 return; 4702850d85bSmv143129 4712850d85bSmv143129 /* 4722850d85bSmv143129 * We want to handle the clock CPU here. If we 4732850d85bSmv143129 * scheduled the accounting for the clock CPU to another 4742850d85bSmv143129 * processor, that processor will find only the clock() thread 4752850d85bSmv143129 * running and not account for any user thread below it. Also, 4762850d85bSmv143129 * we want to handle this before we block on anything and allow 4772850d85bSmv143129 * the pinned thread below the current thread to escape. 4782850d85bSmv143129 */ 479*1b7f7204SRafael Vanoni clock_tick_process(CPU, LBOLT_NO_ACCOUNT, clock_tick_pending); 4802850d85bSmv143129 4812850d85bSmv143129 mutex_enter(&clock_tick_lock); 4822850d85bSmv143129 4832850d85bSmv143129 /* 4842850d85bSmv143129 * Schedule each set on a separate processor. 4852850d85bSmv143129 */ 4862850d85bSmv143129 cp = clock_cpu_list; 4872850d85bSmv143129 for (i = 0; i < clock_tick_nsets; i++) { 4882850d85bSmv143129 csp = &clock_tick_set[i]; 4892850d85bSmv143129 4902850d85bSmv143129 /* 4912850d85bSmv143129 * Pick the next online CPU in list for scheduling tick 4922850d85bSmv143129 * accounting. The clock_tick_lock is held by the caller. 4932850d85bSmv143129 * So, CPU online/offline cannot muck with this while 4942850d85bSmv143129 * we are picking our CPU to X-call. 4952850d85bSmv143129 */ 4962850d85bSmv143129 if (cp == CPU) 4972850d85bSmv143129 cp = cp->cpu_next_onln; 4982850d85bSmv143129 4992850d85bSmv143129 /* 5002850d85bSmv143129 * Each tick cycle, start the scan from a different 5012850d85bSmv143129 * CPU for the sake of fairness. 5022850d85bSmv143129 */ 5032850d85bSmv143129 csp->ct_scan++; 5042850d85bSmv143129 if (csp->ct_scan >= csp->ct_end) 5052850d85bSmv143129 csp->ct_scan = csp->ct_start; 5062850d85bSmv143129 5072850d85bSmv143129 clock_tick_schedule_one(csp, clock_tick_pending, cp->cpu_id); 5082850d85bSmv143129 5092850d85bSmv143129 cp = cp->cpu_next_onln; 5102850d85bSmv143129 } 5112850d85bSmv143129 5122850d85bSmv143129 if (one_sec) { 5132850d85bSmv143129 /* 5142850d85bSmv143129 * Move the CPU pointer around every second. This is so 5152850d85bSmv143129 * all the CPUs can be X-called in a round-robin fashion 5162850d85bSmv143129 * to evenly distribute the X-calls. We don't do this 5172850d85bSmv143129 * at a faster rate than this because we don't want 5182850d85bSmv143129 * to affect cache performance negatively. 5192850d85bSmv143129 */ 5202850d85bSmv143129 clock_cpu_list = clock_cpu_list->cpu_next_onln; 5212850d85bSmv143129 } 5222850d85bSmv143129 5232850d85bSmv143129 mutex_exit(&clock_tick_lock); 5242850d85bSmv143129 5252850d85bSmv143129 clock_tick_pending = 0; 5262850d85bSmv143129 } 5272850d85bSmv143129 5282850d85bSmv143129 static void 5292850d85bSmv143129 clock_tick_execute_common(int start, int scan, int end, clock_t mylbolt, 5302850d85bSmv143129 int pending) 5312850d85bSmv143129 { 5322850d85bSmv143129 cpu_t *cp; 5332850d85bSmv143129 int i; 5342850d85bSmv143129 5352850d85bSmv143129 ASSERT((start <= scan) && (scan <= end)); 5362850d85bSmv143129 5372850d85bSmv143129 /* 5382850d85bSmv143129 * Handle the thread on current CPU first. This is to prevent a 5392850d85bSmv143129 * pinned thread from escaping if we ever block on something. 5402850d85bSmv143129 * Note that in the single-threaded mode, this handles the clock 5412850d85bSmv143129 * CPU. 5422850d85bSmv143129 */ 5432850d85bSmv143129 clock_tick_process(CPU, mylbolt, pending); 5442850d85bSmv143129 5452850d85bSmv143129 /* 5462850d85bSmv143129 * Perform tick accounting for the threads running on 5472850d85bSmv143129 * the scheduled CPUs. 5482850d85bSmv143129 */ 5492850d85bSmv143129 for (i = scan; i < end; i++) { 5502850d85bSmv143129 cp = clock_tick_cpus[i]; 5512850d85bSmv143129 if ((cp == NULL) || (cp == CPU) || (cp->cpu_id == clock_cpu_id)) 5522850d85bSmv143129 continue; 5532850d85bSmv143129 clock_tick_process(cp, mylbolt, pending); 5542850d85bSmv143129 } 5552850d85bSmv143129 5562850d85bSmv143129 for (i = start; i < scan; i++) { 5572850d85bSmv143129 cp = clock_tick_cpus[i]; 5582850d85bSmv143129 if ((cp == NULL) || (cp == CPU) || (cp->cpu_id == clock_cpu_id)) 5592850d85bSmv143129 continue; 5602850d85bSmv143129 clock_tick_process(cp, mylbolt, pending); 5612850d85bSmv143129 } 5622850d85bSmv143129 } 5632850d85bSmv143129 5642850d85bSmv143129 /*ARGSUSED*/ 5652850d85bSmv143129 static uint_t 5662850d85bSmv143129 clock_tick_execute(caddr_t arg1, caddr_t arg2) 5672850d85bSmv143129 { 5682850d85bSmv143129 clock_tick_cpu_t *ctp; 5692850d85bSmv143129 int start, scan, end, pending; 5702850d85bSmv143129 clock_t mylbolt; 5712850d85bSmv143129 5722850d85bSmv143129 /* 5732850d85bSmv143129 * We could have raced with cpu offline. We don't want to 5742850d85bSmv143129 * process anything on an offlined CPU. If we got blocked 5752850d85bSmv143129 * on anything, we may not get scheduled when we wakeup 5762850d85bSmv143129 * later on. 5772850d85bSmv143129 */ 5782850d85bSmv143129 if (!CLOCK_TICK_XCALL_SAFE(CPU)) 57907247649SMadhavan Venkataraman goto out; 5802850d85bSmv143129 58107247649SMadhavan Venkataraman ctp = clock_tick_cpu[CPU->cpu_id]; 5822850d85bSmv143129 5832850d85bSmv143129 mutex_enter(&ctp->ct_lock); 5842850d85bSmv143129 pending = ctp->ct_pending; 5852850d85bSmv143129 if (pending == 0) { 5862850d85bSmv143129 /* 5872850d85bSmv143129 * If a CPU is busy at LOCK_LEVEL, then an invocation 5882850d85bSmv143129 * of this softint may be queued for some time. In that case, 5892850d85bSmv143129 * clock_tick_active will not be incremented. 5902850d85bSmv143129 * clock_tick_schedule() will then assume that the previous 5912850d85bSmv143129 * invocation is done and post a new softint. The first one 5922850d85bSmv143129 * that gets in will reset the pending count so the 5932850d85bSmv143129 * second one is a noop. 5942850d85bSmv143129 */ 5952850d85bSmv143129 mutex_exit(&ctp->ct_lock); 5962850d85bSmv143129 goto out; 5972850d85bSmv143129 } 5982850d85bSmv143129 ctp->ct_pending = 0; 5992850d85bSmv143129 start = ctp->ct_start; 6002850d85bSmv143129 end = ctp->ct_end; 6012850d85bSmv143129 scan = ctp->ct_scan; 6022850d85bSmv143129 mylbolt = ctp->ct_lbolt; 6032850d85bSmv143129 mutex_exit(&ctp->ct_lock); 6042850d85bSmv143129 6052850d85bSmv143129 clock_tick_execute_common(start, scan, end, mylbolt, pending); 6062850d85bSmv143129 6072850d85bSmv143129 out: 6082850d85bSmv143129 /* 6092850d85bSmv143129 * Signal completion to the clock handler. 6102850d85bSmv143129 */ 6112850d85bSmv143129 atomic_dec_ulong(&clock_tick_active); 6122850d85bSmv143129 6132850d85bSmv143129 return (1); 6142850d85bSmv143129 } 6152850d85bSmv143129 6162850d85bSmv143129 /*ARGSUSED*/ 6172850d85bSmv143129 static int 6182850d85bSmv143129 clock_tick_cpu_setup(cpu_setup_t what, int cid, void *arg) 6192850d85bSmv143129 { 6202850d85bSmv143129 cpu_t *cp, *ncp; 6212850d85bSmv143129 int i, set; 6222850d85bSmv143129 clock_tick_set_t *csp; 6232850d85bSmv143129 6242850d85bSmv143129 /* 6252850d85bSmv143129 * This function performs some computations at CPU offline/online 6262850d85bSmv143129 * time. The computed values are used during tick scheduling and 6272850d85bSmv143129 * execution phases. This avoids having to compute things on 6282850d85bSmv143129 * an every tick basis. The other benefit is that we perform the 6292850d85bSmv143129 * computations only for onlined CPUs (not offlined ones). As a 6302850d85bSmv143129 * result, no tick processing is attempted for offlined CPUs. 6312850d85bSmv143129 * 6322850d85bSmv143129 * Also, cpu_offline() calls this function before checking for 6332850d85bSmv143129 * active interrupt threads. This allows us to avoid posting 6342850d85bSmv143129 * cross calls to CPUs that are being offlined. 6352850d85bSmv143129 */ 6362850d85bSmv143129 6372850d85bSmv143129 cp = cpu[cid]; 6382850d85bSmv143129 6392850d85bSmv143129 mutex_enter(&clock_tick_lock); 6402850d85bSmv143129 6412850d85bSmv143129 switch (what) { 6422850d85bSmv143129 case CPU_ON: 6432850d85bSmv143129 clock_tick_cpus[clock_tick_total_cpus] = cp; 6442850d85bSmv143129 set = clock_tick_total_cpus / clock_tick_ncpus; 6452850d85bSmv143129 csp = &clock_tick_set[set]; 6462850d85bSmv143129 csp->ct_end++; 6472850d85bSmv143129 clock_tick_total_cpus++; 6482850d85bSmv143129 clock_tick_nsets = 6492850d85bSmv143129 (clock_tick_total_cpus + clock_tick_ncpus - 1) / 6502850d85bSmv143129 clock_tick_ncpus; 6512850d85bSmv143129 CPUSET_ADD(clock_tick_online_cpuset, cp->cpu_id); 6522850d85bSmv143129 membar_sync(); 6532850d85bSmv143129 break; 6542850d85bSmv143129 6552850d85bSmv143129 case CPU_OFF: 6562850d85bSmv143129 if (&sync_softint != NULL) 6572850d85bSmv143129 sync_softint(clock_tick_online_cpuset); 6582850d85bSmv143129 CPUSET_DEL(clock_tick_online_cpuset, cp->cpu_id); 6592850d85bSmv143129 clock_tick_total_cpus--; 6602850d85bSmv143129 clock_tick_cpus[clock_tick_total_cpus] = NULL; 6612850d85bSmv143129 clock_tick_nsets = 6622850d85bSmv143129 (clock_tick_total_cpus + clock_tick_ncpus - 1) / 6632850d85bSmv143129 clock_tick_ncpus; 6642850d85bSmv143129 set = clock_tick_total_cpus / clock_tick_ncpus; 6652850d85bSmv143129 csp = &clock_tick_set[set]; 6662850d85bSmv143129 csp->ct_end--; 6672850d85bSmv143129 6682850d85bSmv143129 i = 0; 6692850d85bSmv143129 ncp = cpu_active; 6702850d85bSmv143129 do { 6712850d85bSmv143129 if (cp == ncp) 6722850d85bSmv143129 continue; 6732850d85bSmv143129 clock_tick_cpus[i] = ncp; 6742850d85bSmv143129 i++; 6752850d85bSmv143129 } while ((ncp = ncp->cpu_next_onln) != cpu_active); 6762850d85bSmv143129 ASSERT(i == clock_tick_total_cpus); 6772850d85bSmv143129 membar_sync(); 6782850d85bSmv143129 break; 6792850d85bSmv143129 6802850d85bSmv143129 default: 6812850d85bSmv143129 break; 6822850d85bSmv143129 } 6832850d85bSmv143129 6842850d85bSmv143129 mutex_exit(&clock_tick_lock); 6852850d85bSmv143129 6862850d85bSmv143129 return (0); 6872850d85bSmv143129 } 6882850d85bSmv143129 6892850d85bSmv143129 6902850d85bSmv143129 void 6912850d85bSmv143129 clock_tick_mp_init(void) 6922850d85bSmv143129 { 6932850d85bSmv143129 cpu_t *cp; 6942850d85bSmv143129 6952850d85bSmv143129 mutex_enter(&cpu_lock); 6962850d85bSmv143129 6972850d85bSmv143129 cp = cpu_active; 6982850d85bSmv143129 do { 6992850d85bSmv143129 (void) clock_tick_cpu_setup(CPU_ON, cp->cpu_id, NULL); 7002850d85bSmv143129 } while ((cp = cp->cpu_next_onln) != cpu_active); 7012850d85bSmv143129 7022850d85bSmv143129 register_cpu_setup_func(clock_tick_cpu_setup, NULL); 7032850d85bSmv143129 7042850d85bSmv143129 mutex_exit(&cpu_lock); 7052850d85bSmv143129 } 706