1*7c478bd9Sstevel@tonic-gate /* 2*7c478bd9Sstevel@tonic-gate * CDDL HEADER START 3*7c478bd9Sstevel@tonic-gate * 4*7c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5*7c478bd9Sstevel@tonic-gate * Common Development and Distribution License, Version 1.0 only 6*7c478bd9Sstevel@tonic-gate * (the "License"). You may not use this file except in compliance 7*7c478bd9Sstevel@tonic-gate * with the License. 8*7c478bd9Sstevel@tonic-gate * 9*7c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10*7c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 11*7c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 12*7c478bd9Sstevel@tonic-gate * and limitations under the License. 13*7c478bd9Sstevel@tonic-gate * 14*7c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 15*7c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16*7c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 17*7c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 18*7c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 19*7c478bd9Sstevel@tonic-gate * 20*7c478bd9Sstevel@tonic-gate * CDDL HEADER END 21*7c478bd9Sstevel@tonic-gate */ 22*7c478bd9Sstevel@tonic-gate /* 23*7c478bd9Sstevel@tonic-gate * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24*7c478bd9Sstevel@tonic-gate * Use is subject to license terms. 25*7c478bd9Sstevel@tonic-gate */ 26*7c478bd9Sstevel@tonic-gate 27*7c478bd9Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 28*7c478bd9Sstevel@tonic-gate 29*7c478bd9Sstevel@tonic-gate #include <sys/types.h> 30*7c478bd9Sstevel@tonic-gate #include <sys/param.h> 31*7c478bd9Sstevel@tonic-gate #include <sys/systm.h> 32*7c478bd9Sstevel@tonic-gate #include <sys/disp.h> 33*7c478bd9Sstevel@tonic-gate #include <sys/var.h> 34*7c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h> 35*7c478bd9Sstevel@tonic-gate #include <sys/debug.h> 36*7c478bd9Sstevel@tonic-gate #include <sys/x86_archext.h> 37*7c478bd9Sstevel@tonic-gate #include <sys/archsystm.h> 38*7c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h> 39*7c478bd9Sstevel@tonic-gate #include <sys/psm_defs.h> 40*7c478bd9Sstevel@tonic-gate #include <sys/clock.h> 41*7c478bd9Sstevel@tonic-gate #include <sys/atomic.h> 42*7c478bd9Sstevel@tonic-gate #include <sys/lockstat.h> 43*7c478bd9Sstevel@tonic-gate #include <sys/smp_impldefs.h> 44*7c478bd9Sstevel@tonic-gate #include <sys/dtrace.h> 45*7c478bd9Sstevel@tonic-gate #include <sys/time.h> 46*7c478bd9Sstevel@tonic-gate 47*7c478bd9Sstevel@tonic-gate /* 48*7c478bd9Sstevel@tonic-gate * Using the Pentium's TSC register for gethrtime() 49*7c478bd9Sstevel@tonic-gate * ------------------------------------------------ 50*7c478bd9Sstevel@tonic-gate * 51*7c478bd9Sstevel@tonic-gate * The Pentium family, like many chip architectures, has a high-resolution 52*7c478bd9Sstevel@tonic-gate * timestamp counter ("TSC") which increments once per CPU cycle. The contents 53*7c478bd9Sstevel@tonic-gate * of the timestamp counter are read with the RDTSC instruction. 54*7c478bd9Sstevel@tonic-gate * 55*7c478bd9Sstevel@tonic-gate * As with its UltraSPARC equivalent (the %tick register), TSC's cycle count 56*7c478bd9Sstevel@tonic-gate * must be translated into nanoseconds in order to implement gethrtime(). 57*7c478bd9Sstevel@tonic-gate * We avoid inducing floating point operations in this conversion by 58*7c478bd9Sstevel@tonic-gate * implementing the same nsec_scale algorithm as that found in the sun4u 59*7c478bd9Sstevel@tonic-gate * platform code. The sun4u NATIVE_TIME_TO_NSEC_SCALE block comment contains 60*7c478bd9Sstevel@tonic-gate * a detailed description of the algorithm; the comment is not reproduced 61*7c478bd9Sstevel@tonic-gate * here. This implementation differs only in its value for NSEC_SHIFT: 62*7c478bd9Sstevel@tonic-gate * we implement an NSEC_SHIFT of 5 (instead of sun4u's 4) to allow for 63*7c478bd9Sstevel@tonic-gate * 60 MHz Pentiums. 64*7c478bd9Sstevel@tonic-gate * 65*7c478bd9Sstevel@tonic-gate * While TSC and %tick are both cycle counting registers, TSC's functionality 66*7c478bd9Sstevel@tonic-gate * falls short in several critical ways: 67*7c478bd9Sstevel@tonic-gate * 68*7c478bd9Sstevel@tonic-gate * (a) TSCs on different CPUs are not guaranteed to be in sync. While in 69*7c478bd9Sstevel@tonic-gate * practice they often _are_ in sync, this isn't guaranteed by the 70*7c478bd9Sstevel@tonic-gate * architecture. 71*7c478bd9Sstevel@tonic-gate * 72*7c478bd9Sstevel@tonic-gate * (b) The TSC cannot be reliably set to an arbitrary value. The architecture 73*7c478bd9Sstevel@tonic-gate * only supports writing the low 32-bits of TSC, making it impractical 74*7c478bd9Sstevel@tonic-gate * to rewrite. 75*7c478bd9Sstevel@tonic-gate * 76*7c478bd9Sstevel@tonic-gate * (c) The architecture doesn't have the capacity to interrupt based on 77*7c478bd9Sstevel@tonic-gate * arbitrary values of TSC; there is no TICK_CMPR equivalent. 78*7c478bd9Sstevel@tonic-gate * 79*7c478bd9Sstevel@tonic-gate * Together, (a) and (b) imply that software must track the skew between 80*7c478bd9Sstevel@tonic-gate * TSCs and account for it (it is assumed that while there may exist skew, 81*7c478bd9Sstevel@tonic-gate * there does not exist drift). To determine the skew between CPUs, we 82*7c478bd9Sstevel@tonic-gate * have newly onlined CPUs call tsc_sync_slave(), while the CPU performing 83*7c478bd9Sstevel@tonic-gate * the online operation calls tsc_sync_master(). Once both CPUs are ready, 84*7c478bd9Sstevel@tonic-gate * the master sets a shared flag, and each reads its TSC register. To reduce 85*7c478bd9Sstevel@tonic-gate * bias, we then wait until both CPUs are ready again, but this time the 86*7c478bd9Sstevel@tonic-gate * slave sets the shared flag, and each reads its TSC register again. The 87*7c478bd9Sstevel@tonic-gate * master compares the average of the two sample values, and, if observable 88*7c478bd9Sstevel@tonic-gate * skew is found, changes the gethrtimef function pointer to point to a 89*7c478bd9Sstevel@tonic-gate * gethrtime() implementation which will take the discovered skew into 90*7c478bd9Sstevel@tonic-gate * consideration. 91*7c478bd9Sstevel@tonic-gate * 92*7c478bd9Sstevel@tonic-gate * In the absence of time-of-day clock adjustments, gethrtime() must stay in 93*7c478bd9Sstevel@tonic-gate * sync with gettimeofday(). This is problematic; given (c), the software 94*7c478bd9Sstevel@tonic-gate * cannot drive its time-of-day source from TSC, and yet they must somehow be 95*7c478bd9Sstevel@tonic-gate * kept in sync. We implement this by having a routine, tsc_tick(), which 96*7c478bd9Sstevel@tonic-gate * is called once per second from the interrupt which drives time-of-day. 97*7c478bd9Sstevel@tonic-gate * tsc_tick() recalculates nsec_scale based on the number of the CPU cycles 98*7c478bd9Sstevel@tonic-gate * since boot versus the number of seconds since boot. This algorithm 99*7c478bd9Sstevel@tonic-gate * becomes more accurate over time and converges quickly; the error in 100*7c478bd9Sstevel@tonic-gate * nsec_scale is typically under 1 ppm less than 10 seconds after boot, and 101*7c478bd9Sstevel@tonic-gate * is less than 100 ppb 1 minute after boot. 102*7c478bd9Sstevel@tonic-gate * 103*7c478bd9Sstevel@tonic-gate * Note that the hrtime base for gethrtime, tsc_hrtime_base, is modified 104*7c478bd9Sstevel@tonic-gate * atomically with nsec_scale under CLOCK_LOCK. This assures that time 105*7c478bd9Sstevel@tonic-gate * monotonically increases. 106*7c478bd9Sstevel@tonic-gate */ 107*7c478bd9Sstevel@tonic-gate 108*7c478bd9Sstevel@tonic-gate #define NSEC_SHIFT 5 109*7c478bd9Sstevel@tonic-gate 110*7c478bd9Sstevel@tonic-gate static uint_t nsec_scale; 111*7c478bd9Sstevel@tonic-gate 112*7c478bd9Sstevel@tonic-gate /* 113*7c478bd9Sstevel@tonic-gate * These two variables used to be grouped together inside of a structure that 114*7c478bd9Sstevel@tonic-gate * lived on a single cache line. A regression (bug ID 4623398) caused the 115*7c478bd9Sstevel@tonic-gate * compiler to emit code that "optimized" away the while-loops below. The 116*7c478bd9Sstevel@tonic-gate * result was that no synchronization between the onlining and onlined CPUs 117*7c478bd9Sstevel@tonic-gate * took place. 118*7c478bd9Sstevel@tonic-gate */ 119*7c478bd9Sstevel@tonic-gate static volatile int tsc_ready; 120*7c478bd9Sstevel@tonic-gate static volatile int tsc_sync_go; 121*7c478bd9Sstevel@tonic-gate 122*7c478bd9Sstevel@tonic-gate /* 123*7c478bd9Sstevel@tonic-gate * Used as indices into the tsc_sync_snaps[] array. 124*7c478bd9Sstevel@tonic-gate */ 125*7c478bd9Sstevel@tonic-gate #define TSC_MASTER 0 126*7c478bd9Sstevel@tonic-gate #define TSC_SLAVE 1 127*7c478bd9Sstevel@tonic-gate 128*7c478bd9Sstevel@tonic-gate /* 129*7c478bd9Sstevel@tonic-gate * Used in the tsc_master_sync()/tsc_slave_sync() rendezvous. 130*7c478bd9Sstevel@tonic-gate */ 131*7c478bd9Sstevel@tonic-gate #define TSC_SYNC_STOP 1 132*7c478bd9Sstevel@tonic-gate #define TSC_SYNC_GO 2 133*7c478bd9Sstevel@tonic-gate #define TSC_SYNC_AGAIN 3 134*7c478bd9Sstevel@tonic-gate 135*7c478bd9Sstevel@tonic-gate /* 136*7c478bd9Sstevel@tonic-gate * XX64 Is the faster way to do this with a 64-bit ABI? 137*7c478bd9Sstevel@tonic-gate */ 138*7c478bd9Sstevel@tonic-gate #define TSC_CONVERT_AND_ADD(tsc, hrt, scale) { \ 139*7c478bd9Sstevel@tonic-gate unsigned int *_l = (unsigned int *)&(tsc); \ 140*7c478bd9Sstevel@tonic-gate (hrt) += mul32(_l[1], scale) << NSEC_SHIFT; \ 141*7c478bd9Sstevel@tonic-gate (hrt) += mul32(_l[0], scale) >> (32 - NSEC_SHIFT); \ 142*7c478bd9Sstevel@tonic-gate } 143*7c478bd9Sstevel@tonic-gate 144*7c478bd9Sstevel@tonic-gate #define TSC_CONVERT(tsc, hrt, scale) { \ 145*7c478bd9Sstevel@tonic-gate unsigned int *_l = (unsigned int *)&(tsc); \ 146*7c478bd9Sstevel@tonic-gate (hrt) = mul32(_l[1], scale) << NSEC_SHIFT; \ 147*7c478bd9Sstevel@tonic-gate (hrt) += mul32(_l[0], scale) >> (32 - NSEC_SHIFT); \ 148*7c478bd9Sstevel@tonic-gate } 149*7c478bd9Sstevel@tonic-gate 150*7c478bd9Sstevel@tonic-gate 151*7c478bd9Sstevel@tonic-gate 152*7c478bd9Sstevel@tonic-gate static int tsc_max_delta; 153*7c478bd9Sstevel@tonic-gate static hrtime_t tsc_sync_snaps[2]; 154*7c478bd9Sstevel@tonic-gate static hrtime_t tsc_sync_delta[NCPU]; 155*7c478bd9Sstevel@tonic-gate static hrtime_t tsc_sync_tick_delta[NCPU]; 156*7c478bd9Sstevel@tonic-gate static hrtime_t tsc_last = 0; 157*7c478bd9Sstevel@tonic-gate static hrtime_t tsc_last_jumped = 0; 158*7c478bd9Sstevel@tonic-gate static hrtime_t tsc_hrtime_base = 0; 159*7c478bd9Sstevel@tonic-gate static int tsc_jumped = 0; 160*7c478bd9Sstevel@tonic-gate 161*7c478bd9Sstevel@tonic-gate static hrtime_t shadow_tsc_hrtime_base; 162*7c478bd9Sstevel@tonic-gate static hrtime_t shadow_tsc_last; 163*7c478bd9Sstevel@tonic-gate static uint_t shadow_nsec_scale; 164*7c478bd9Sstevel@tonic-gate static uint32_t shadow_hres_lock; 165*7c478bd9Sstevel@tonic-gate 166*7c478bd9Sstevel@tonic-gate /* 167*7c478bd9Sstevel@tonic-gate * Called by the master after the sync operation is complete. If the 168*7c478bd9Sstevel@tonic-gate * slave is discovered to lag, gethrtimef will be changed to point to 169*7c478bd9Sstevel@tonic-gate * tsc_gethrtime_delta(). 170*7c478bd9Sstevel@tonic-gate */ 171*7c478bd9Sstevel@tonic-gate static void 172*7c478bd9Sstevel@tonic-gate tsc_digest(processorid_t target) 173*7c478bd9Sstevel@tonic-gate { 174*7c478bd9Sstevel@tonic-gate hrtime_t tdelta, hdelta = 0; 175*7c478bd9Sstevel@tonic-gate int max = tsc_max_delta; 176*7c478bd9Sstevel@tonic-gate processorid_t source = CPU->cpu_id; 177*7c478bd9Sstevel@tonic-gate int update; 178*7c478bd9Sstevel@tonic-gate 179*7c478bd9Sstevel@tonic-gate update = tsc_sync_delta[source] != 0 || 180*7c478bd9Sstevel@tonic-gate gethrtimef == tsc_gethrtime_delta; 181*7c478bd9Sstevel@tonic-gate 182*7c478bd9Sstevel@tonic-gate /* 183*7c478bd9Sstevel@tonic-gate * We divide by 2 since each of the data points is the sum of two TSC 184*7c478bd9Sstevel@tonic-gate * reads; this takes the average of the two. 185*7c478bd9Sstevel@tonic-gate */ 186*7c478bd9Sstevel@tonic-gate tdelta = (tsc_sync_snaps[TSC_SLAVE] - tsc_sync_snaps[TSC_MASTER]) / 2; 187*7c478bd9Sstevel@tonic-gate if ((tdelta > max) || ((tdelta >= 0) && update)) { 188*7c478bd9Sstevel@tonic-gate TSC_CONVERT_AND_ADD(tdelta, hdelta, nsec_scale); 189*7c478bd9Sstevel@tonic-gate tsc_sync_delta[target] = tsc_sync_delta[source] - hdelta; 190*7c478bd9Sstevel@tonic-gate tsc_sync_tick_delta[target] = -tdelta; 191*7c478bd9Sstevel@tonic-gate gethrtimef = tsc_gethrtime_delta; 192*7c478bd9Sstevel@tonic-gate gethrtimeunscaledf = tsc_gethrtimeunscaled_delta; 193*7c478bd9Sstevel@tonic-gate return; 194*7c478bd9Sstevel@tonic-gate } 195*7c478bd9Sstevel@tonic-gate 196*7c478bd9Sstevel@tonic-gate tdelta = -tdelta; 197*7c478bd9Sstevel@tonic-gate if ((tdelta > max) || update) { 198*7c478bd9Sstevel@tonic-gate TSC_CONVERT_AND_ADD(tdelta, hdelta, nsec_scale); 199*7c478bd9Sstevel@tonic-gate tsc_sync_delta[target] = tsc_sync_delta[source] + hdelta; 200*7c478bd9Sstevel@tonic-gate tsc_sync_tick_delta[target] = tdelta; 201*7c478bd9Sstevel@tonic-gate gethrtimef = tsc_gethrtime_delta; 202*7c478bd9Sstevel@tonic-gate gethrtimeunscaledf = tsc_gethrtimeunscaled_delta; 203*7c478bd9Sstevel@tonic-gate } 204*7c478bd9Sstevel@tonic-gate 205*7c478bd9Sstevel@tonic-gate } 206*7c478bd9Sstevel@tonic-gate 207*7c478bd9Sstevel@tonic-gate /* 208*7c478bd9Sstevel@tonic-gate * Called by a CPU which has just performed an online operation on another 209*7c478bd9Sstevel@tonic-gate * CPU. It is expected that the newly onlined CPU will call tsc_sync_slave(). 210*7c478bd9Sstevel@tonic-gate */ 211*7c478bd9Sstevel@tonic-gate void 212*7c478bd9Sstevel@tonic-gate tsc_sync_master(processorid_t slave) 213*7c478bd9Sstevel@tonic-gate { 214*7c478bd9Sstevel@tonic-gate int flags; 215*7c478bd9Sstevel@tonic-gate hrtime_t hrt; 216*7c478bd9Sstevel@tonic-gate 217*7c478bd9Sstevel@tonic-gate ASSERT(tsc_sync_go != TSC_SYNC_GO); 218*7c478bd9Sstevel@tonic-gate 219*7c478bd9Sstevel@tonic-gate flags = clear_int_flag(); 220*7c478bd9Sstevel@tonic-gate 221*7c478bd9Sstevel@tonic-gate /* 222*7c478bd9Sstevel@tonic-gate * Wait for the slave CPU to arrive. 223*7c478bd9Sstevel@tonic-gate */ 224*7c478bd9Sstevel@tonic-gate while (tsc_ready != TSC_SYNC_GO) 225*7c478bd9Sstevel@tonic-gate continue; 226*7c478bd9Sstevel@tonic-gate 227*7c478bd9Sstevel@tonic-gate /* 228*7c478bd9Sstevel@tonic-gate * Tell the slave CPU to begin reading its TSC; read our own. 229*7c478bd9Sstevel@tonic-gate */ 230*7c478bd9Sstevel@tonic-gate tsc_sync_go = TSC_SYNC_GO; 231*7c478bd9Sstevel@tonic-gate hrt = tsc_read(); 232*7c478bd9Sstevel@tonic-gate 233*7c478bd9Sstevel@tonic-gate /* 234*7c478bd9Sstevel@tonic-gate * Tell the slave that we're ready, and wait for the slave to tell us 235*7c478bd9Sstevel@tonic-gate * to read our TSC again. 236*7c478bd9Sstevel@tonic-gate */ 237*7c478bd9Sstevel@tonic-gate tsc_ready = TSC_SYNC_AGAIN; 238*7c478bd9Sstevel@tonic-gate while (tsc_sync_go != TSC_SYNC_AGAIN) 239*7c478bd9Sstevel@tonic-gate continue; 240*7c478bd9Sstevel@tonic-gate 241*7c478bd9Sstevel@tonic-gate hrt += tsc_read(); 242*7c478bd9Sstevel@tonic-gate tsc_sync_snaps[TSC_MASTER] = hrt; 243*7c478bd9Sstevel@tonic-gate 244*7c478bd9Sstevel@tonic-gate /* 245*7c478bd9Sstevel@tonic-gate * Wait for the slave to finish reading its TSC. 246*7c478bd9Sstevel@tonic-gate */ 247*7c478bd9Sstevel@tonic-gate while (tsc_ready != TSC_SYNC_STOP) 248*7c478bd9Sstevel@tonic-gate continue; 249*7c478bd9Sstevel@tonic-gate 250*7c478bd9Sstevel@tonic-gate /* 251*7c478bd9Sstevel@tonic-gate * At this point, both CPUs have performed their tsc_read() calls. 252*7c478bd9Sstevel@tonic-gate * We'll digest it now before letting the slave CPU return. 253*7c478bd9Sstevel@tonic-gate */ 254*7c478bd9Sstevel@tonic-gate tsc_digest(slave); 255*7c478bd9Sstevel@tonic-gate tsc_sync_go = TSC_SYNC_STOP; 256*7c478bd9Sstevel@tonic-gate 257*7c478bd9Sstevel@tonic-gate restore_int_flag(flags); 258*7c478bd9Sstevel@tonic-gate } 259*7c478bd9Sstevel@tonic-gate 260*7c478bd9Sstevel@tonic-gate /* 261*7c478bd9Sstevel@tonic-gate * Called by a CPU which has just been onlined. It is expected that the CPU 262*7c478bd9Sstevel@tonic-gate * performing the online operation will call tsc_sync_master(). 263*7c478bd9Sstevel@tonic-gate */ 264*7c478bd9Sstevel@tonic-gate void 265*7c478bd9Sstevel@tonic-gate tsc_sync_slave(void) 266*7c478bd9Sstevel@tonic-gate { 267*7c478bd9Sstevel@tonic-gate int flags; 268*7c478bd9Sstevel@tonic-gate hrtime_t hrt; 269*7c478bd9Sstevel@tonic-gate 270*7c478bd9Sstevel@tonic-gate ASSERT(tsc_sync_go != TSC_SYNC_GO); 271*7c478bd9Sstevel@tonic-gate 272*7c478bd9Sstevel@tonic-gate flags = clear_int_flag(); 273*7c478bd9Sstevel@tonic-gate 274*7c478bd9Sstevel@tonic-gate /* 275*7c478bd9Sstevel@tonic-gate * Tell the master CPU that we're ready, and wait for the master to 276*7c478bd9Sstevel@tonic-gate * tell us to begin reading our TSC. 277*7c478bd9Sstevel@tonic-gate */ 278*7c478bd9Sstevel@tonic-gate tsc_ready = TSC_SYNC_GO; 279*7c478bd9Sstevel@tonic-gate while (tsc_sync_go != TSC_SYNC_GO) 280*7c478bd9Sstevel@tonic-gate continue; 281*7c478bd9Sstevel@tonic-gate 282*7c478bd9Sstevel@tonic-gate hrt = tsc_read(); 283*7c478bd9Sstevel@tonic-gate 284*7c478bd9Sstevel@tonic-gate /* 285*7c478bd9Sstevel@tonic-gate * Wait for the master CPU to be ready to read its TSC again. 286*7c478bd9Sstevel@tonic-gate */ 287*7c478bd9Sstevel@tonic-gate while (tsc_ready != TSC_SYNC_AGAIN) 288*7c478bd9Sstevel@tonic-gate continue; 289*7c478bd9Sstevel@tonic-gate 290*7c478bd9Sstevel@tonic-gate /* 291*7c478bd9Sstevel@tonic-gate * Tell the master CPU to read its TSC again; read ours again. 292*7c478bd9Sstevel@tonic-gate */ 293*7c478bd9Sstevel@tonic-gate tsc_sync_go = TSC_SYNC_AGAIN; 294*7c478bd9Sstevel@tonic-gate 295*7c478bd9Sstevel@tonic-gate hrt += tsc_read(); 296*7c478bd9Sstevel@tonic-gate tsc_sync_snaps[TSC_SLAVE] = hrt; 297*7c478bd9Sstevel@tonic-gate 298*7c478bd9Sstevel@tonic-gate /* 299*7c478bd9Sstevel@tonic-gate * Tell the master that we're done, and wait to be dismissed. 300*7c478bd9Sstevel@tonic-gate */ 301*7c478bd9Sstevel@tonic-gate tsc_ready = TSC_SYNC_STOP; 302*7c478bd9Sstevel@tonic-gate while (tsc_sync_go != TSC_SYNC_STOP) 303*7c478bd9Sstevel@tonic-gate continue; 304*7c478bd9Sstevel@tonic-gate 305*7c478bd9Sstevel@tonic-gate restore_int_flag(flags); 306*7c478bd9Sstevel@tonic-gate } 307*7c478bd9Sstevel@tonic-gate 308*7c478bd9Sstevel@tonic-gate void 309*7c478bd9Sstevel@tonic-gate tsc_hrtimeinit(uint64_t cpu_freq_hz) 310*7c478bd9Sstevel@tonic-gate { 311*7c478bd9Sstevel@tonic-gate longlong_t tsc; 312*7c478bd9Sstevel@tonic-gate int flags; 313*7c478bd9Sstevel@tonic-gate 314*7c478bd9Sstevel@tonic-gate /* 315*7c478bd9Sstevel@tonic-gate * cpu_freq_hz is the measured cpu frequency in hertz 316*7c478bd9Sstevel@tonic-gate */ 317*7c478bd9Sstevel@tonic-gate 318*7c478bd9Sstevel@tonic-gate /* 319*7c478bd9Sstevel@tonic-gate * We can't accommodate CPUs slower than 31.25 MHz. 320*7c478bd9Sstevel@tonic-gate */ 321*7c478bd9Sstevel@tonic-gate ASSERT(cpu_freq_hz > NANOSEC / (1 << NSEC_SHIFT)); 322*7c478bd9Sstevel@tonic-gate nsec_scale = 323*7c478bd9Sstevel@tonic-gate (uint_t) 324*7c478bd9Sstevel@tonic-gate (((uint64_t)NANOSEC << (32 - NSEC_SHIFT)) / cpu_freq_hz); 325*7c478bd9Sstevel@tonic-gate 326*7c478bd9Sstevel@tonic-gate flags = clear_int_flag(); 327*7c478bd9Sstevel@tonic-gate tsc = tsc_read(); 328*7c478bd9Sstevel@tonic-gate (void) tsc_gethrtime(); 329*7c478bd9Sstevel@tonic-gate tsc_max_delta = tsc_read() - tsc; 330*7c478bd9Sstevel@tonic-gate restore_int_flag(flags); 331*7c478bd9Sstevel@tonic-gate } 332*7c478bd9Sstevel@tonic-gate 333*7c478bd9Sstevel@tonic-gate /* 334*7c478bd9Sstevel@tonic-gate * Called once per second on CPU 0 from the cyclic subsystem's CY_HIGH_LEVEL 335*7c478bd9Sstevel@tonic-gate * interrupt. 336*7c478bd9Sstevel@tonic-gate */ 337*7c478bd9Sstevel@tonic-gate void 338*7c478bd9Sstevel@tonic-gate tsc_tick(void) 339*7c478bd9Sstevel@tonic-gate { 340*7c478bd9Sstevel@tonic-gate hrtime_t now, delta; 341*7c478bd9Sstevel@tonic-gate ushort_t spl; 342*7c478bd9Sstevel@tonic-gate 343*7c478bd9Sstevel@tonic-gate /* 344*7c478bd9Sstevel@tonic-gate * Before we set the new variables, we set the shadow values. This 345*7c478bd9Sstevel@tonic-gate * allows for lock free operation in dtrace_gethrtime(). 346*7c478bd9Sstevel@tonic-gate */ 347*7c478bd9Sstevel@tonic-gate lock_set_spl((lock_t *)&shadow_hres_lock + HRES_LOCK_OFFSET, 348*7c478bd9Sstevel@tonic-gate ipltospl(CBE_HIGH_PIL), &spl); 349*7c478bd9Sstevel@tonic-gate 350*7c478bd9Sstevel@tonic-gate shadow_tsc_hrtime_base = tsc_hrtime_base; 351*7c478bd9Sstevel@tonic-gate shadow_tsc_last = tsc_last; 352*7c478bd9Sstevel@tonic-gate shadow_nsec_scale = nsec_scale; 353*7c478bd9Sstevel@tonic-gate 354*7c478bd9Sstevel@tonic-gate shadow_hres_lock++; 355*7c478bd9Sstevel@tonic-gate splx(spl); 356*7c478bd9Sstevel@tonic-gate 357*7c478bd9Sstevel@tonic-gate CLOCK_LOCK(&spl); 358*7c478bd9Sstevel@tonic-gate 359*7c478bd9Sstevel@tonic-gate now = tsc_read(); 360*7c478bd9Sstevel@tonic-gate 361*7c478bd9Sstevel@tonic-gate if (now < tsc_last) { 362*7c478bd9Sstevel@tonic-gate /* 363*7c478bd9Sstevel@tonic-gate * The TSC has just jumped into the past. We assume that 364*7c478bd9Sstevel@tonic-gate * this is due to a suspend/resume cycle, and we're going 365*7c478bd9Sstevel@tonic-gate * to use the _current_ value of TSC as the delta. This 366*7c478bd9Sstevel@tonic-gate * will keep tsc_hrtime_base correct. We're also going to 367*7c478bd9Sstevel@tonic-gate * assume that rate of tsc does not change after a suspend 368*7c478bd9Sstevel@tonic-gate * resume (i.e nsec_scale remains the same). 369*7c478bd9Sstevel@tonic-gate */ 370*7c478bd9Sstevel@tonic-gate delta = now; 371*7c478bd9Sstevel@tonic-gate tsc_last_jumped += tsc_last; 372*7c478bd9Sstevel@tonic-gate tsc_jumped = 1; 373*7c478bd9Sstevel@tonic-gate } else { 374*7c478bd9Sstevel@tonic-gate /* 375*7c478bd9Sstevel@tonic-gate * Determine the number of TSC ticks since the last clock 376*7c478bd9Sstevel@tonic-gate * tick, and add that to the hrtime base. 377*7c478bd9Sstevel@tonic-gate */ 378*7c478bd9Sstevel@tonic-gate delta = now - tsc_last; 379*7c478bd9Sstevel@tonic-gate } 380*7c478bd9Sstevel@tonic-gate 381*7c478bd9Sstevel@tonic-gate TSC_CONVERT_AND_ADD(delta, tsc_hrtime_base, nsec_scale); 382*7c478bd9Sstevel@tonic-gate tsc_last = now; 383*7c478bd9Sstevel@tonic-gate 384*7c478bd9Sstevel@tonic-gate CLOCK_UNLOCK(spl); 385*7c478bd9Sstevel@tonic-gate } 386*7c478bd9Sstevel@tonic-gate 387*7c478bd9Sstevel@tonic-gate hrtime_t 388*7c478bd9Sstevel@tonic-gate tsc_gethrtime(void) 389*7c478bd9Sstevel@tonic-gate { 390*7c478bd9Sstevel@tonic-gate uint32_t old_hres_lock; 391*7c478bd9Sstevel@tonic-gate hrtime_t tsc, hrt; 392*7c478bd9Sstevel@tonic-gate 393*7c478bd9Sstevel@tonic-gate do { 394*7c478bd9Sstevel@tonic-gate old_hres_lock = hres_lock; 395*7c478bd9Sstevel@tonic-gate 396*7c478bd9Sstevel@tonic-gate if ((tsc = tsc_read()) >= tsc_last) { 397*7c478bd9Sstevel@tonic-gate /* 398*7c478bd9Sstevel@tonic-gate * It would seem to be obvious that this is true 399*7c478bd9Sstevel@tonic-gate * (that is, the past is less than the present), 400*7c478bd9Sstevel@tonic-gate * but it isn't true in the presence of suspend/resume 401*7c478bd9Sstevel@tonic-gate * cycles. If we manage to call gethrtime() 402*7c478bd9Sstevel@tonic-gate * after a resume, but before the first call to 403*7c478bd9Sstevel@tonic-gate * tsc_tick(), we will see the jump. In this case, 404*7c478bd9Sstevel@tonic-gate * we will simply use the value in TSC as the delta. 405*7c478bd9Sstevel@tonic-gate */ 406*7c478bd9Sstevel@tonic-gate tsc -= tsc_last; 407*7c478bd9Sstevel@tonic-gate } else if (tsc >= tsc_last - 2*tsc_max_delta) { 408*7c478bd9Sstevel@tonic-gate /* 409*7c478bd9Sstevel@tonic-gate * There is a chance that tsc_tick() has just run on 410*7c478bd9Sstevel@tonic-gate * another CPU, and we have drifted just enough so that 411*7c478bd9Sstevel@tonic-gate * we appear behind tsc_last. In this case, force the 412*7c478bd9Sstevel@tonic-gate * delta to be zero. 413*7c478bd9Sstevel@tonic-gate */ 414*7c478bd9Sstevel@tonic-gate tsc = 0; 415*7c478bd9Sstevel@tonic-gate } 416*7c478bd9Sstevel@tonic-gate 417*7c478bd9Sstevel@tonic-gate hrt = tsc_hrtime_base; 418*7c478bd9Sstevel@tonic-gate 419*7c478bd9Sstevel@tonic-gate TSC_CONVERT_AND_ADD(tsc, hrt, nsec_scale); 420*7c478bd9Sstevel@tonic-gate } while ((old_hres_lock & ~1) != hres_lock); 421*7c478bd9Sstevel@tonic-gate 422*7c478bd9Sstevel@tonic-gate return (hrt); 423*7c478bd9Sstevel@tonic-gate } 424*7c478bd9Sstevel@tonic-gate 425*7c478bd9Sstevel@tonic-gate /* 426*7c478bd9Sstevel@tonic-gate * This is similar to the above, but it cannot actually spin on hres_lock. 427*7c478bd9Sstevel@tonic-gate * As a result, it caches all of the variables it needs; if the variables 428*7c478bd9Sstevel@tonic-gate * don't change, it's done. 429*7c478bd9Sstevel@tonic-gate */ 430*7c478bd9Sstevel@tonic-gate hrtime_t 431*7c478bd9Sstevel@tonic-gate dtrace_gethrtime(void) 432*7c478bd9Sstevel@tonic-gate { 433*7c478bd9Sstevel@tonic-gate uint32_t old_hres_lock; 434*7c478bd9Sstevel@tonic-gate hrtime_t tsc, hrt; 435*7c478bd9Sstevel@tonic-gate 436*7c478bd9Sstevel@tonic-gate do { 437*7c478bd9Sstevel@tonic-gate old_hres_lock = hres_lock; 438*7c478bd9Sstevel@tonic-gate 439*7c478bd9Sstevel@tonic-gate /* 440*7c478bd9Sstevel@tonic-gate * See the comments in tsc_gethrtime(), above. 441*7c478bd9Sstevel@tonic-gate */ 442*7c478bd9Sstevel@tonic-gate if ((tsc = tsc_read()) >= tsc_last) 443*7c478bd9Sstevel@tonic-gate tsc -= tsc_last; 444*7c478bd9Sstevel@tonic-gate else if (tsc >= tsc_last - 2*tsc_max_delta) 445*7c478bd9Sstevel@tonic-gate tsc = 0; 446*7c478bd9Sstevel@tonic-gate 447*7c478bd9Sstevel@tonic-gate hrt = tsc_hrtime_base; 448*7c478bd9Sstevel@tonic-gate 449*7c478bd9Sstevel@tonic-gate TSC_CONVERT_AND_ADD(tsc, hrt, nsec_scale); 450*7c478bd9Sstevel@tonic-gate 451*7c478bd9Sstevel@tonic-gate if ((old_hres_lock & ~1) == hres_lock) 452*7c478bd9Sstevel@tonic-gate break; 453*7c478bd9Sstevel@tonic-gate 454*7c478bd9Sstevel@tonic-gate /* 455*7c478bd9Sstevel@tonic-gate * If we're here, the clock lock is locked -- or it has been 456*7c478bd9Sstevel@tonic-gate * unlocked and locked since we looked. This may be due to 457*7c478bd9Sstevel@tonic-gate * tsc_tick() running on another CPU -- or it may be because 458*7c478bd9Sstevel@tonic-gate * some code path has ended up in dtrace_probe() with 459*7c478bd9Sstevel@tonic-gate * CLOCK_LOCK held. We'll try to determine that we're in 460*7c478bd9Sstevel@tonic-gate * the former case by taking another lap if the lock has 461*7c478bd9Sstevel@tonic-gate * changed since when we first looked at it. 462*7c478bd9Sstevel@tonic-gate */ 463*7c478bd9Sstevel@tonic-gate if (old_hres_lock != hres_lock) 464*7c478bd9Sstevel@tonic-gate continue; 465*7c478bd9Sstevel@tonic-gate 466*7c478bd9Sstevel@tonic-gate /* 467*7c478bd9Sstevel@tonic-gate * So the lock was and is locked. We'll use the old data 468*7c478bd9Sstevel@tonic-gate * instead. 469*7c478bd9Sstevel@tonic-gate */ 470*7c478bd9Sstevel@tonic-gate old_hres_lock = shadow_hres_lock; 471*7c478bd9Sstevel@tonic-gate 472*7c478bd9Sstevel@tonic-gate /* 473*7c478bd9Sstevel@tonic-gate * See the comments in tsc_gethrtime(), above. 474*7c478bd9Sstevel@tonic-gate */ 475*7c478bd9Sstevel@tonic-gate if ((tsc = tsc_read()) >= shadow_tsc_last) 476*7c478bd9Sstevel@tonic-gate tsc -= shadow_tsc_last; 477*7c478bd9Sstevel@tonic-gate else if (tsc >= shadow_tsc_last - 2*tsc_max_delta) 478*7c478bd9Sstevel@tonic-gate tsc = 0; 479*7c478bd9Sstevel@tonic-gate 480*7c478bd9Sstevel@tonic-gate hrt = shadow_tsc_hrtime_base; 481*7c478bd9Sstevel@tonic-gate 482*7c478bd9Sstevel@tonic-gate TSC_CONVERT_AND_ADD(tsc, hrt, shadow_nsec_scale); 483*7c478bd9Sstevel@tonic-gate } while ((old_hres_lock & ~1) != shadow_hres_lock); 484*7c478bd9Sstevel@tonic-gate 485*7c478bd9Sstevel@tonic-gate return (hrt); 486*7c478bd9Sstevel@tonic-gate } 487*7c478bd9Sstevel@tonic-gate 488*7c478bd9Sstevel@tonic-gate hrtime_t 489*7c478bd9Sstevel@tonic-gate tsc_gethrtime_delta(void) 490*7c478bd9Sstevel@tonic-gate { 491*7c478bd9Sstevel@tonic-gate hrtime_t hrt; 492*7c478bd9Sstevel@tonic-gate int flags; 493*7c478bd9Sstevel@tonic-gate 494*7c478bd9Sstevel@tonic-gate /* 495*7c478bd9Sstevel@tonic-gate * We need to disable interrupts here to assure that we don't migrate 496*7c478bd9Sstevel@tonic-gate * between the call to tsc_gethrtime() and adding the CPU's hrtime 497*7c478bd9Sstevel@tonic-gate * delta. Note that disabling and reenabling preemption is forbidden 498*7c478bd9Sstevel@tonic-gate * here because we may be in the middle of a fast trap. In the amd64 499*7c478bd9Sstevel@tonic-gate * kernel we cannot tolerate preemption during a fast trap. See 500*7c478bd9Sstevel@tonic-gate * _update_sregs(). 501*7c478bd9Sstevel@tonic-gate */ 502*7c478bd9Sstevel@tonic-gate 503*7c478bd9Sstevel@tonic-gate flags = clear_int_flag(); 504*7c478bd9Sstevel@tonic-gate hrt = tsc_gethrtime() + tsc_sync_delta[CPU->cpu_id]; 505*7c478bd9Sstevel@tonic-gate restore_int_flag(flags); 506*7c478bd9Sstevel@tonic-gate 507*7c478bd9Sstevel@tonic-gate return (hrt); 508*7c478bd9Sstevel@tonic-gate } 509*7c478bd9Sstevel@tonic-gate 510*7c478bd9Sstevel@tonic-gate extern uint64_t cpu_freq_hz; 511*7c478bd9Sstevel@tonic-gate extern int tsc_gethrtime_enable; 512*7c478bd9Sstevel@tonic-gate 513*7c478bd9Sstevel@tonic-gate /* 514*7c478bd9Sstevel@tonic-gate * The following converts nanoseconds of highres-time to ticks 515*7c478bd9Sstevel@tonic-gate */ 516*7c478bd9Sstevel@tonic-gate 517*7c478bd9Sstevel@tonic-gate static uint64_t 518*7c478bd9Sstevel@tonic-gate hrtime2tick(hrtime_t ts) 519*7c478bd9Sstevel@tonic-gate { 520*7c478bd9Sstevel@tonic-gate hrtime_t q = ts / NANOSEC; 521*7c478bd9Sstevel@tonic-gate hrtime_t r = ts - (q * NANOSEC); 522*7c478bd9Sstevel@tonic-gate 523*7c478bd9Sstevel@tonic-gate return (q * cpu_freq_hz + ((r * cpu_freq_hz) / NANOSEC)); 524*7c478bd9Sstevel@tonic-gate } 525*7c478bd9Sstevel@tonic-gate 526*7c478bd9Sstevel@tonic-gate /* 527*7c478bd9Sstevel@tonic-gate * This is used to convert scaled high-res time from nanoseconds to 528*7c478bd9Sstevel@tonic-gate * unscaled hardware ticks. (Read from hardware timestamp counter) 529*7c478bd9Sstevel@tonic-gate */ 530*7c478bd9Sstevel@tonic-gate 531*7c478bd9Sstevel@tonic-gate uint64_t 532*7c478bd9Sstevel@tonic-gate unscalehrtime(hrtime_t ts) 533*7c478bd9Sstevel@tonic-gate { 534*7c478bd9Sstevel@tonic-gate if (tsc_gethrtime_enable) { 535*7c478bd9Sstevel@tonic-gate uint64_t unscale = 0; 536*7c478bd9Sstevel@tonic-gate hrtime_t rescale; 537*7c478bd9Sstevel@tonic-gate hrtime_t diff = ts; 538*7c478bd9Sstevel@tonic-gate 539*7c478bd9Sstevel@tonic-gate while (diff > (nsec_per_tick)) { 540*7c478bd9Sstevel@tonic-gate unscale += hrtime2tick(diff); 541*7c478bd9Sstevel@tonic-gate rescale = unscale; 542*7c478bd9Sstevel@tonic-gate scalehrtime(&rescale); 543*7c478bd9Sstevel@tonic-gate diff = ts - rescale; 544*7c478bd9Sstevel@tonic-gate } 545*7c478bd9Sstevel@tonic-gate 546*7c478bd9Sstevel@tonic-gate return (unscale); 547*7c478bd9Sstevel@tonic-gate } 548*7c478bd9Sstevel@tonic-gate return (0); 549*7c478bd9Sstevel@tonic-gate } 550*7c478bd9Sstevel@tonic-gate 551*7c478bd9Sstevel@tonic-gate 552*7c478bd9Sstevel@tonic-gate hrtime_t 553*7c478bd9Sstevel@tonic-gate tsc_gethrtimeunscaled(void) 554*7c478bd9Sstevel@tonic-gate { 555*7c478bd9Sstevel@tonic-gate uint32_t old_hres_lock; 556*7c478bd9Sstevel@tonic-gate hrtime_t tsc; 557*7c478bd9Sstevel@tonic-gate 558*7c478bd9Sstevel@tonic-gate do { 559*7c478bd9Sstevel@tonic-gate old_hres_lock = hres_lock; 560*7c478bd9Sstevel@tonic-gate 561*7c478bd9Sstevel@tonic-gate if ((tsc = tsc_read()) < tsc_last) { 562*7c478bd9Sstevel@tonic-gate /* 563*7c478bd9Sstevel@tonic-gate * see comments in tsc_gethrtime 564*7c478bd9Sstevel@tonic-gate */ 565*7c478bd9Sstevel@tonic-gate tsc += tsc_last_jumped; 566*7c478bd9Sstevel@tonic-gate } 567*7c478bd9Sstevel@tonic-gate 568*7c478bd9Sstevel@tonic-gate } while ((old_hres_lock & ~1) != hres_lock); 569*7c478bd9Sstevel@tonic-gate 570*7c478bd9Sstevel@tonic-gate return (tsc); 571*7c478bd9Sstevel@tonic-gate } 572*7c478bd9Sstevel@tonic-gate 573*7c478bd9Sstevel@tonic-gate 574*7c478bd9Sstevel@tonic-gate /* Convert a tsc timestamp to nanoseconds */ 575*7c478bd9Sstevel@tonic-gate void 576*7c478bd9Sstevel@tonic-gate tsc_scalehrtime(hrtime_t *tsc) 577*7c478bd9Sstevel@tonic-gate { 578*7c478bd9Sstevel@tonic-gate hrtime_t hrt; 579*7c478bd9Sstevel@tonic-gate hrtime_t mytsc; 580*7c478bd9Sstevel@tonic-gate 581*7c478bd9Sstevel@tonic-gate if (tsc == NULL) 582*7c478bd9Sstevel@tonic-gate return; 583*7c478bd9Sstevel@tonic-gate mytsc = *tsc; 584*7c478bd9Sstevel@tonic-gate 585*7c478bd9Sstevel@tonic-gate TSC_CONVERT(mytsc, hrt, nsec_scale); 586*7c478bd9Sstevel@tonic-gate *tsc = hrt; 587*7c478bd9Sstevel@tonic-gate } 588*7c478bd9Sstevel@tonic-gate 589*7c478bd9Sstevel@tonic-gate hrtime_t 590*7c478bd9Sstevel@tonic-gate tsc_gethrtimeunscaled_delta(void) 591*7c478bd9Sstevel@tonic-gate { 592*7c478bd9Sstevel@tonic-gate hrtime_t hrt; 593*7c478bd9Sstevel@tonic-gate int flags; 594*7c478bd9Sstevel@tonic-gate 595*7c478bd9Sstevel@tonic-gate /* 596*7c478bd9Sstevel@tonic-gate * Similarly to tsc_gethrtime_delta, we need to disable preemption 597*7c478bd9Sstevel@tonic-gate * to prevent migration between the call to tsc_gethrtimeunscaled 598*7c478bd9Sstevel@tonic-gate * and adding the CPU's hrtime delta. Note that disabling and 599*7c478bd9Sstevel@tonic-gate * reenabling preemption is forbidden here because we may be in the 600*7c478bd9Sstevel@tonic-gate * middle of a fast trap. In the amd64 kernel we cannot tolerate 601*7c478bd9Sstevel@tonic-gate * preemption during a fast trap. See _update_sregs(). 602*7c478bd9Sstevel@tonic-gate */ 603*7c478bd9Sstevel@tonic-gate 604*7c478bd9Sstevel@tonic-gate flags = clear_int_flag(); 605*7c478bd9Sstevel@tonic-gate hrt = tsc_gethrtimeunscaled() + tsc_sync_tick_delta[CPU->cpu_id]; 606*7c478bd9Sstevel@tonic-gate restore_int_flag(flags); 607*7c478bd9Sstevel@tonic-gate 608*7c478bd9Sstevel@tonic-gate return (hrt); 609*7c478bd9Sstevel@tonic-gate } 610