1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #ifndef _SYS_CLOCK_H 27 #define _SYS_CLOCK_H 28 29 #pragma ident "%Z%%M% %I% %E% SMI" 30 31 #ifdef __cplusplus 32 extern "C" { 33 #endif 34 35 #include <sys/spl.h> 36 #include <sys/time.h> 37 #include <sys/machclock.h> 38 39 #ifndef _ASM 40 41 #ifdef _KERNEL 42 43 extern void setcpudelay(void); 44 45 extern uint_t nsec_scale; 46 extern uint_t nsec_shift; 47 extern uint_t nsec_per_sys_tick; 48 extern uint64_t sys_tick_freq; 49 50 extern int traptrace_use_stick; 51 extern uint64_t system_clock_freq; 52 extern uint_t sys_clock_mhz; 53 54 extern void mon_clock_init(void); 55 extern void mon_clock_start(void); 56 extern void mon_clock_stop(void); 57 extern void mon_clock_share(void); 58 extern void mon_clock_unshare(void); 59 60 extern hrtime_t hrtime_base; 61 extern void hres_tick(void); 62 extern void clkstart(void); 63 extern void cbe_level14(); 64 extern hrtime_t tick2ns(hrtime_t, uint_t); 65 66 typedef struct { 67 uint64_t cbe_level1_inum; 68 uint64_t cbe_level10_inum; 69 } cbe_data_t; 70 71 #endif /* _KERNEL */ 72 73 #endif /* _ASM */ 74 75 76 #define CBE_LOW_PIL 1 77 #define CBE_LOCK_PIL LOCK_LEVEL 78 #define CBE_HIGH_PIL 14 79 80 #define ADJ_SHIFT 4 /* used in get_hrestime and _level10 */ 81 82 /* 83 * Locking strategy for high-resolution timing services 84 * 85 * We generally construct timestamps from two or more components: 86 * a hardware time source and one or more software time sources. 87 * These components cannot all be loaded simultaneously, so we need 88 * some sort of locking strategy to generate consistent timestamps. 89 * 90 * To minimize lock contention and cache thrashing we employ the 91 * weakest possible synchronization model: writers (rare) serialize 92 * on an acquisition-counting mutex, described below; readers (common) 93 * execute in parallel with no synchronization at all -- they don't 94 * exclude other readers, and they don't even exclude writers. Instead, 95 * readers just examine the writer lock's value before and after loading 96 * all the components of a timestamp to detect writer intervention. 97 * In the rare case when a writer does intervene, the reader will 98 * detect it, discard the timestamp and try again. 99 * 100 * The writer lock, hres_lock, is a 32-bit integer consisting of an 101 * 8-bit lock and a 24-bit acquisition count. To acquire the lock we 102 * set the lock field with ldstub, which sets the low-order 8 bits to 103 * 0xff; to clear the lock, we increment it, which simultaneously clears 104 * the lock field (0xff --> 0x00) and increments the acquisition count 105 * (due to carry into bit 8). Thus each acquisition transforms hres_lock 106 * from N:0 to N:ff, and each release transforms N:ff into (N+1):0. 107 * 108 * Readers can detect writer intervention by loading hres_lock before 109 * and after loading the time components they need; if either lock value 110 * contains 0xff in the low-order bits (lock held), or if the lock values 111 * are not equal (lock was acquired and released), a writer intervened 112 * and the reader must try again. If the lock values are equal and the 113 * low-order 8 bits are clear, the timestamp must be valid. We can check 114 * both of these conditions with a single compare instruction by checking 115 * whether old_hres_lock & ~1 == new_hres_lock, as illustrated by the 116 * following table of all possible lock states: 117 * 118 * initial & ~1 final result of compare 119 * ------------ ----- ----------------- 120 * now:00 now:00 valid 121 * now:00 now:ff invalid 122 * now:00 later:00 invalid 123 * now:00 later:ff invalid 124 * now:fe now:ff invalid 125 * now:fe later:00 invalid 126 * now:fe later:ff invalid 127 * 128 * Implementation considerations: 129 * 130 * (1) Load buffering. 131 * 132 * On a CPU that does load buffering we must ensure that the load of 133 * hres_lock completes before the load of any timestamp components. 134 * This is essential *even on a CPU that does in-order loads* because 135 * accessing the hardware time source may not involve a memory reference 136 * (e.g. rd %tick). A convenient way to address this is to clear the 137 * lower bit (andn with 1) of the old lock value right away, since this 138 * generates a dependency on the load of hres_lock. We have to do this 139 * anyway to perform the lock comparison described above. 140 * 141 * (2) Out-of-order loads. 142 * 143 * On a CPU that does out-of-order loads we must ensure that the loads 144 * of all timestamp components have completed before we load the final 145 * value of hres_lock. This can be done either by generating load 146 * dependencies on the timestamp components or by membar #LoadLoad. 147 * 148 * (3) Interaction with the high level cyclic handler, hres_tick(). 149 * 150 * One unusual property of hres_lock is that it's acquired in a high 151 * level cyclic handler, hres_tick(). Thus, hres_lock must be acquired at 152 * CBE_HIGH_PIL or higher to prevent single-CPU deadlock. 153 * 154 * (4) Cross-calls. 155 * 156 * If a cross-call happens while one CPU has hres_lock and another is 157 * trying to acquire it in the clock interrupt path, the system will 158 * deadlock: the first CPU will never release hres_lock since it's 159 * waiting to be released from the cross-call, and the cross-call can't 160 * complete because the second CPU is spinning on hres_lock with traps 161 * disabled. Thus cross-calls must be blocked while holding hres_lock. 162 * 163 * Together, (3) and (4) imply that hres_lock should only be acquired 164 * at PIL >= max(XCALL_PIL, CBE_HIGH_PIL), or while traps are disabled. 165 */ 166 #define HRES_LOCK_OFFSET 3 167 168 #define CLOCK_LOCK(oldsplp) \ 169 lock_set_spl((lock_t *)&hres_lock + HRES_LOCK_OFFSET, \ 170 ipltospl(CBE_HIGH_PIL), oldsplp) 171 172 #define CLOCK_UNLOCK(spl) \ 173 membar_ldst_stst(); \ 174 hres_lock++; \ 175 splx(spl); \ 176 LOCKSTAT_RECORD0(LS_CLOCK_UNLOCK_RELEASE, \ 177 (lock_t *)&hres_lock + HRES_LOCK_OFFSET); 178 179 /* 180 * NATIVE_TIME_TO_NSEC_SCALE is called with NSEC_SHIFT to convert hi-res 181 * timestamps into nanoseconds. On systems that have a %stick register, 182 * hi-res timestamps are in %stick units. On systems that do not have a 183 * %stick register, hi-res timestamps are in %tick units. 184 * 185 * NATIVE_TIME_TO_NSEC_SCALE is called with TICK_NSEC_SHIFT to convert from 186 * %tick units to nanoseconds on all implementations whether %stick is 187 * available or not. 188 */ 189 190 /* 191 * At least 62.5 MHz CPU %tick frequency 192 */ 193 194 #define TICK_NSEC_SHIFT 4 195 196 /* 197 * Convert hi-res native time (V9's %tick in our case) into nanoseconds. 198 * 199 * The challenge is to multiply a %tick value by (NANOSEC / sys_tick_freq) 200 * without using floating point and without overflowing 64-bit integers. 201 * We assume that all sun4u systems will have a 16 nsec or better clock 202 * (i.e. faster than 62.5 MHz), which means that (ticks << 4) has units 203 * greater than one nanosecond, so converting from (ticks << 4) to nsec 204 * requires multiplication by a rational number, R, between 0 and 1. 205 * To avoid floating-point we precompute (R * 2^32) during boot and 206 * stash this away in nsec_scale. Thus we can compute (tick * R) as 207 * (tick * nsec_scale) >> 32, which is accurate to about 1 part per billion. 208 * 209 * To avoid 64-bit overflow when multiplying (tick << 4) by nsec_scale, 210 * we split (tick << 4) into its high and low 32-bit pieces, H and L, 211 * multiply each piece separately, and add up the relevant bits of the 212 * partial products. Putting it all together we have: 213 * 214 * nsec = (tick << 4) * R 215 * = ((tick << 4) * nsec_scale) >> 32 216 * = ((H << 32) + L) * nsec_scale) >> 32 217 * = (H * nsec_scale) + ((L * nsec_scale) >> 32) 218 * 219 * The last line is the computation we actually perform: it requires no 220 * floating point and all intermediate results fit in 64-bit registers. 221 * 222 * Note that we require that tick is less than (1 << (64 - NSEC_SHIFT)); 223 * greater values will result in overflow and misbehavior (not that this 224 * is a serious problem; (1 << (64 - NSEC_SHIFT)) nanoseconds is over 225 * thirty-six years). Nonetheless, clients may wish to be aware of this 226 * limitation; NATIVE_TIME_MAX() returns this maximum native time. 227 * 228 * We provide two versions of this macro: a "full-service" version that 229 * just converts ticks to nanoseconds and a higher-performance version that 230 * expects the scaling factor nsec_scale as its second argument (so that 231 * callers can distance the load of nsec_scale from its use). Note that 232 * we take a fast path if we determine the ticks to be less than 32 bits 233 * (as it often is for the delta between %tick values for successive 234 * firings of the hres_tick() cyclic). 235 * 236 * Note that in the 32-bit path we don't even bother clearing NPT. 237 * We get away with this by making hardclk.c ensure than nsec_scale 238 * is even, so we can take advantage of the associativity of modular 239 * arithmetic: multiplying %tick by any even number, say 2*n, is 240 * equivalent to multiplying %tick by 2, then by n. Multiplication 241 * by 2 is equivalent to shifting left by one, which clears NPT. 242 * 243 * Finally, note that the macros use the labels "6:" and "7:"; these 244 * labels must not be used across an invocation of either macro. 245 */ 246 #define NATIVE_TIME_TO_NSEC_SCALE(out, scr1, scr2, shift) \ 247 srlx out, 32, scr2; /* check high 32 bits */ \ 248 /* CSTYLED */ \ 249 brz,a,pt scr2, 6f; /* if clear, 32-bit fast path */\ 250 mulx out, scr1, out; /* delay: 32-bit fast path */ \ 251 sllx out, shift, out; /* clear NPT and pre-scale */ \ 252 srlx out, 32, scr2; /* scr2 = hi32(tick<<4) = H */ \ 253 mulx scr2, scr1, scr2; /* scr2 = (H*F) */ \ 254 srl out, 0, out; /* out = lo32(tick<<4) = L */ \ 255 mulx out, scr1, scr1; /* scr1 = (L*F) */ \ 256 srlx scr1, 32, scr1; /* scr1 = (L*F) >> 32 */ \ 257 ba 7f; /* branch over 32-bit path */ \ 258 add scr1, scr2, out; /* out = (H*F) + ((L*F) >> 32) */\ 259 6: \ 260 srlx out, 32 - shift, out; \ 261 7: 262 263 #define NATIVE_TIME_TO_NSEC(out, scr1, scr2) \ 264 sethi %hi(nsec_scale), scr1; /* load scaling factor */ \ 265 ld [scr1 + %lo(nsec_scale)], scr1; \ 266 NATIVE_TIME_TO_NSEC_SCALE(out, scr1, scr2, NSEC_SHIFT); 267 268 #define NATIVE_TIME_MAX(out) \ 269 mov -1, out; \ 270 srlx out, NSEC_SHIFT, out 271 272 273 /* 274 * The following macros are only for use in the cpu module. 275 */ 276 #if defined(CPU_MODULE) 277 278 /* 279 * NSEC_SHIFT and VTRACE_SHIFT constants are defined in 280 * <sys/machclock.h> file. 281 */ 282 283 284 /* 285 * NOTE: the macros below assume that the various time-related variables 286 * (hrestime, hrestime_adj, hres_last_tick, timedelta, nsec_scale, etc) 287 * are all stored together on a 64-byte boundary. The primary motivation 288 * is cache performance, but we also take advantage of a convenient side 289 * effect: these variables all have the same high 22 address bits, so only 290 * one sethi is needed to access them all. 291 */ 292 293 /* 294 * GET_HRESTIME() returns the value of hrestime, hrestime_adj and the 295 * number of nanoseconds since the last clock tick ('nslt'). It also 296 * sets 'nano' to the value NANOSEC (one billion). 297 * 298 * This macro assumes that all registers are globals or outs so they can 299 * safely contain 64-bit data, and that it's safe to use the label "5:". 300 * Further, this macro calls the NATIVE_TIME_TO_NSEC_SCALE which in turn 301 * uses the labels "6:" and "7:"; labels "5:", "6:" and "7:" must not 302 * be used across invocations of this macro. 303 */ 304 #define GET_HRESTIME(hrestsec, hrestnsec, adj, nslt, nano, scr, hrlock, \ 305 gnt1, gnt2) \ 306 5: sethi %hi(hres_lock), scr; \ 307 lduw [scr + %lo(hres_lock)], hrlock; /* load clock lock */ \ 308 lduw [scr + %lo(nsec_scale)], nano; /* tick-to-ns factor */ \ 309 andn hrlock, 1, hrlock; /* see comments above! */ \ 310 ldx [scr + %lo(hres_last_tick)], nslt; \ 311 ldn [scr + %lo(hrestime)], hrestsec; /* load hrestime.sec */\ 312 add scr, %lo(hrestime), hrestnsec; \ 313 ldn [hrestnsec + CLONGSIZE], hrestnsec; \ 314 GET_NATIVE_TIME(adj, gnt1, gnt2); /* get current %tick */ \ 315 subcc adj, nslt, nslt; /* nslt = ticks since last clockint */ \ 316 movneg %xcc, %g0, nslt; /* ignore neg delta from tick skew */ \ 317 ldx [scr + %lo(hrestime_adj)], adj; /* load hrestime_adj */ \ 318 /* membar #LoadLoad; (see comment (2) above) */ \ 319 lduw [scr + %lo(hres_lock)], scr; /* load clock lock */ \ 320 NATIVE_TIME_TO_NSEC_SCALE(nslt, nano, gnt1, NSEC_SHIFT); \ 321 sethi %hi(NANOSEC), nano; \ 322 xor hrlock, scr, scr; \ 323 /* CSTYLED */ \ 324 brnz,pn scr, 5b; \ 325 or nano, %lo(NANOSEC), nano; 326 327 /* 328 * Similar to above, but returns current gethrtime() value in 'base'. 329 */ 330 #define GET_HRTIME(base, now, nslt, scale, scr, hrlock, gnt1, gnt2) \ 331 5: sethi %hi(hres_lock), scr; \ 332 lduw [scr + %lo(hres_lock)], hrlock; /* load clock lock */ \ 333 lduw [scr + %lo(nsec_scale)], scale; /* tick-to-ns factor */ \ 334 andn hrlock, 1, hrlock; /* see comments above! */ \ 335 ldx [scr + %lo(hres_last_tick)], nslt; \ 336 ldx [scr + %lo(hrtime_base)], base; /* load hrtime_base */ \ 337 GET_NATIVE_TIME(now, gnt1, gnt2); /* get current %tick */ \ 338 subcc now, nslt, nslt; /* nslt = ticks since last clockint */ \ 339 movneg %xcc, %g0, nslt; /* ignore neg delta from tick skew */ \ 340 /* membar #LoadLoad; (see comment (2) above) */ \ 341 ld [scr + %lo(hres_lock)], scr; /* load clock lock */ \ 342 NATIVE_TIME_TO_NSEC_SCALE(nslt, scale, gnt1, NSEC_SHIFT); \ 343 xor hrlock, scr, scr; \ 344 /* CSTYLED */ \ 345 brnz,pn scr, 5b; \ 346 add base, nslt, base; 347 348 /* 349 * Maximum-performance timestamp for kernel tracing. We don't bother 350 * clearing NPT because vtrace expresses everything in 32-bit deltas, 351 * so only the low-order 32 bits matter. We do shift down a few bits, 352 * however, so that the trace framework doesn't emit a ridiculous number 353 * of 32_bit_elapsed_time records (trace points are more expensive when 354 * the time since the last trace point doesn't fit in a 16-bit delta). 355 * We currently shift by 4 (divide by 16) on the grounds that (1) there's 356 * no point making the timing finer-grained than the trace point latency, 357 * which exceeds 16 cycles; and (2) the cost and probe effect of many 358 * 32-bit time records far exceeds the cost of the 'srlx' instruction. 359 */ 360 #define GET_VTRACE_TIME(out, scr1, scr2) \ 361 GET_NATIVE_TIME(out, scr1, scr2); /* get current %tick */ \ 362 srlx out, VTRACE_SHIFT, out; 363 364 /* 365 * Full 64-bit version for those truly rare occasions when you need it. 366 * Currently this is only needed to generate the TR_START_TIME record. 367 */ 368 #define GET_VTRACE_TIME_64(out, scr1, scr2) \ 369 GET_NATIVE_TIME(out, scr1, scr2); /* get current %tick */ \ 370 add out, out, out; \ 371 srlx out, VTRACE_SHIFT + 1, out; 372 373 /* 374 * Return the rate at which the vtrace clock runs. 375 */ 376 #define GET_VTRACE_FREQUENCY(out, scr1, scr2) \ 377 sethi %hi(sys_tick_freq), out; \ 378 ldx [out + %lo(sys_tick_freq)], out; \ 379 srlx out, VTRACE_SHIFT, out; 380 381 #endif /* CPU_MODULE */ 382 383 #ifdef __cplusplus 384 } 385 #endif 386 387 #endif /* !_SYS_CLOCK_H */ 388