1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #ifndef _SYS_CLOCK_H 28 #define _SYS_CLOCK_H 29 30 #pragma ident "%Z%%M% %I% %E% SMI" 31 32 #ifdef __cplusplus 33 extern "C" { 34 #endif 35 36 #include <sys/spl.h> 37 #include <sys/time.h> 38 #include <sys/machclock.h> 39 40 #ifndef _ASM 41 42 #ifdef _KERNEL 43 44 extern void setcpudelay(void); 45 46 extern uint_t nsec_scale; 47 extern uint_t nsec_shift; 48 extern uint_t nsec_per_sys_tick; 49 extern uint64_t sys_tick_freq; 50 51 extern int traptrace_use_stick; 52 extern uint64_t system_clock_freq; 53 extern uint_t sys_clock_mhz; 54 55 extern void mon_clock_init(void); 56 extern void mon_clock_start(void); 57 extern void mon_clock_stop(void); 58 extern void mon_clock_share(void); 59 extern void mon_clock_unshare(void); 60 61 extern hrtime_t hrtime_base; 62 extern void hres_tick(void); 63 extern void clkstart(void); 64 extern void cbe_level14(); 65 extern hrtime_t tick2ns(hrtime_t, uint_t); 66 67 typedef struct { 68 uint32_t cbe_level1_inum; 69 uint32_t cbe_level10_inum; 70 } cbe_data_t; 71 72 #endif /* _KERNEL */ 73 74 #endif /* _ASM */ 75 76 77 #define CBE_LOW_PIL 1 78 #define CBE_LOCK_PIL LOCK_LEVEL 79 #define CBE_HIGH_PIL 14 80 81 #define ADJ_SHIFT 4 /* used in get_hrestime and _level10 */ 82 83 /* 84 * Locking strategy for high-resolution timing services 85 * 86 * We generally construct timestamps from two or more components: 87 * a hardware time source and one or more software time sources. 88 * These components cannot all be loaded simultaneously, so we need 89 * some sort of locking strategy to generate consistent timestamps. 90 * 91 * To minimize lock contention and cache thrashing we employ the 92 * weakest possible synchronization model: writers (rare) serialize 93 * on an acquisition-counting mutex, described below; readers (common) 94 * execute in parallel with no synchronization at all -- they don't 95 * exclude other readers, and they don't even exclude writers. Instead, 96 * readers just examine the writer lock's value before and after loading 97 * all the components of a timestamp to detect writer intervention. 98 * In the rare case when a writer does intervene, the reader will 99 * detect it, discard the timestamp and try again. 100 * 101 * The writer lock, hres_lock, is a 32-bit integer consisting of an 102 * 8-bit lock and a 24-bit acquisition count. To acquire the lock we 103 * set the lock field with ldstub, which sets the low-order 8 bits to 104 * 0xff; to clear the lock, we increment it, which simultaneously clears 105 * the lock field (0xff --> 0x00) and increments the acquisition count 106 * (due to carry into bit 8). Thus each acquisition transforms hres_lock 107 * from N:0 to N:ff, and each release transforms N:ff into (N+1):0. 108 * 109 * Readers can detect writer intervention by loading hres_lock before 110 * and after loading the time components they need; if either lock value 111 * contains 0xff in the low-order bits (lock held), or if the lock values 112 * are not equal (lock was acquired and released), a writer intervened 113 * and the reader must try again. If the lock values are equal and the 114 * low-order 8 bits are clear, the timestamp must be valid. We can check 115 * both of these conditions with a single compare instruction by checking 116 * whether old_hres_lock & ~1 == new_hres_lock, as illustrated by the 117 * following table of all possible lock states: 118 * 119 * initial & ~1 final result of compare 120 * ------------ ----- ----------------- 121 * now:00 now:00 valid 122 * now:00 now:ff invalid 123 * now:00 later:00 invalid 124 * now:00 later:ff invalid 125 * now:fe now:ff invalid 126 * now:fe later:00 invalid 127 * now:fe later:ff invalid 128 * 129 * Implementation considerations: 130 * 131 * (1) Load buffering. 132 * 133 * On a CPU that does load buffering we must ensure that the load of 134 * hres_lock completes before the load of any timestamp components. 135 * This is essential *even on a CPU that does in-order loads* because 136 * accessing the hardware time source may not involve a memory reference 137 * (e.g. rd %tick). A convenient way to address this is to clear the 138 * lower bit (andn with 1) of the old lock value right away, since this 139 * generates a dependency on the load of hres_lock. We have to do this 140 * anyway to perform the lock comparison described above. 141 * 142 * (2) Out-of-order loads. 143 * 144 * On a CPU that does out-of-order loads we must ensure that the loads 145 * of all timestamp components have completed before we load the final 146 * value of hres_lock. This can be done either by generating load 147 * dependencies on the timestamp components or by membar #LoadLoad. 148 * 149 * (3) Interaction with the high level cyclic handler, hres_tick(). 150 * 151 * One unusual property of hres_lock is that it's acquired in a high 152 * level cyclic handler, hres_tick(). Thus, hres_lock must be acquired at 153 * CBE_HIGH_PIL or higher to prevent single-CPU deadlock. 154 * 155 * (4) Cross-calls. 156 * 157 * If a cross-call happens while one CPU has hres_lock and another is 158 * trying to acquire it in the clock interrupt path, the system will 159 * deadlock: the first CPU will never release hres_lock since it's 160 * waiting to be released from the cross-call, and the cross-call can't 161 * complete because the second CPU is spinning on hres_lock with traps 162 * disabled. Thus cross-calls must be blocked while holding hres_lock. 163 * 164 * Together, (3) and (4) imply that hres_lock should only be acquired 165 * at PIL >= max(XCALL_PIL, CBE_HIGH_PIL), or while traps are disabled. 166 */ 167 #define HRES_LOCK_OFFSET 3 168 169 #define CLOCK_LOCK(oldsplp) \ 170 lock_set_spl((lock_t *)&hres_lock + HRES_LOCK_OFFSET, \ 171 ipltospl(CBE_HIGH_PIL), oldsplp) 172 173 #define CLOCK_UNLOCK(spl) \ 174 membar_ldst_stst(); \ 175 hres_lock++; \ 176 splx(spl); \ 177 LOCKSTAT_RECORD0(LS_CLOCK_UNLOCK_RELEASE, \ 178 (lock_t *)&hres_lock + HRES_LOCK_OFFSET); 179 180 /* 181 * NATIVE_TIME_TO_NSEC_SCALE is called with NSEC_SHIFT to convert hi-res 182 * timestamps into nanoseconds. On systems that have a %stick register, 183 * hi-res timestamps are in %stick units. On systems that do not have a 184 * %stick register, hi-res timestamps are in %tick units. 185 * 186 * NATIVE_TIME_TO_NSEC_SCALE is called with TICK_NSEC_SHIFT to convert from 187 * %tick units to nanoseconds on all implementations whether %stick is 188 * available or not. 189 */ 190 191 /* 192 * At least 62.5 MHz CPU %tick frequency 193 */ 194 195 #define TICK_NSEC_SHIFT 4 196 197 /* 198 * Convert hi-res native time (V9's %tick in our case) into nanoseconds. 199 * 200 * The challenge is to multiply a %tick value by (NANOSEC / sys_tick_freq) 201 * without using floating point and without overflowing 64-bit integers. 202 * We assume that all sun4u systems will have a 16 nsec or better clock 203 * (i.e. faster than 62.5 MHz), which means that (ticks << 4) has units 204 * greater than one nanosecond, so converting from (ticks << 4) to nsec 205 * requires multiplication by a rational number, R, between 0 and 1. 206 * To avoid floating-point we precompute (R * 2^32) during boot and 207 * stash this away in nsec_scale. Thus we can compute (tick * R) as 208 * (tick * nsec_scale) >> 32, which is accurate to about 1 part per billion. 209 * 210 * To avoid 64-bit overflow when multiplying (tick << 4) by nsec_scale, 211 * we split (tick << 4) into its high and low 32-bit pieces, H and L, 212 * multiply each piece separately, and add up the relevant bits of the 213 * partial products. Putting it all together we have: 214 * 215 * nsec = (tick << 4) * R 216 * = ((tick << 4) * nsec_scale) >> 32 217 * = ((H << 32) + L) * nsec_scale) >> 32 218 * = (H * nsec_scale) + ((L * nsec_scale) >> 32) 219 * 220 * The last line is the computation we actually perform: it requires no 221 * floating point and all intermediate results fit in 64-bit registers. 222 * 223 * Note that we require that tick is less than (1 << (64 - NSEC_SHIFT)); 224 * greater values will result in overflow and misbehavior (not that this 225 * is a serious problem; (1 << (64 - NSEC_SHIFT)) nanoseconds is over 226 * thirty-six years). Nonetheless, clients may wish to be aware of this 227 * limitation; NATIVE_TIME_MAX() returns this maximum native time. 228 * 229 * We provide two versions of this macro: a "full-service" version that 230 * just converts ticks to nanoseconds and a higher-performance version that 231 * expects the scaling factor nsec_scale as its second argument (so that 232 * callers can distance the load of nsec_scale from its use). Note that 233 * we take a fast path if we determine the ticks to be less than 32 bits 234 * (as it often is for the delta between %tick values for successive 235 * firings of the hres_tick() cyclic). 236 * 237 * Note that in the 32-bit path we don't even bother clearing NPT. 238 * We get away with this by making hardclk.c ensure than nsec_scale 239 * is even, so we can take advantage of the associativity of modular 240 * arithmetic: multiplying %tick by any even number, say 2*n, is 241 * equivalent to multiplying %tick by 2, then by n. Multiplication 242 * by 2 is equivalent to shifting left by one, which clears NPT. 243 * 244 * Finally, note that the macros use the labels "6:" and "7:"; these 245 * labels must not be used across an invocation of either macro. 246 */ 247 #define NATIVE_TIME_TO_NSEC_SCALE(out, scr1, scr2, shift) \ 248 srlx out, 32, scr2; /* check high 32 bits */ \ 249 /* CSTYLED */ \ 250 brz,a,pt scr2, 6f; /* if clear, 32-bit fast path */\ 251 mulx out, scr1, out; /* delay: 32-bit fast path */ \ 252 sllx out, shift, out; /* clear NPT and pre-scale */ \ 253 srlx out, 32, scr2; /* scr2 = hi32(tick<<4) = H */ \ 254 mulx scr2, scr1, scr2; /* scr2 = (H*F) */ \ 255 srl out, 0, out; /* out = lo32(tick<<4) = L */ \ 256 mulx out, scr1, scr1; /* scr1 = (L*F) */ \ 257 srlx scr1, 32, scr1; /* scr1 = (L*F) >> 32 */ \ 258 ba 7f; /* branch over 32-bit path */ \ 259 add scr1, scr2, out; /* out = (H*F) + ((L*F) >> 32) */\ 260 6: \ 261 srlx out, 32 - shift, out; \ 262 7: 263 264 #define NATIVE_TIME_TO_NSEC(out, scr1, scr2) \ 265 sethi %hi(nsec_scale), scr1; /* load scaling factor */ \ 266 ld [scr1 + %lo(nsec_scale)], scr1; \ 267 NATIVE_TIME_TO_NSEC_SCALE(out, scr1, scr2, NSEC_SHIFT); 268 269 #define NATIVE_TIME_MAX(out) \ 270 mov -1, out; \ 271 srlx out, NSEC_SHIFT, out 272 273 274 /* 275 * The following macros are only for use in the cpu module. 276 */ 277 #if defined(CPU_MODULE) 278 279 /* 280 * NSEC_SHIFT and VTRACE_SHIFT constants are defined in 281 * <sys/machclock.h> file. 282 */ 283 284 285 /* 286 * NOTE: the macros below assume that the various time-related variables 287 * (hrestime, hrestime_adj, hres_last_tick, timedelta, nsec_scale, etc) 288 * are all stored together on a 64-byte boundary. The primary motivation 289 * is cache performance, but we also take advantage of a convenient side 290 * effect: these variables all have the same high 22 address bits, so only 291 * one sethi is needed to access them all. 292 */ 293 294 /* 295 * GET_HRESTIME() returns the value of hrestime, hrestime_adj and the 296 * number of nanoseconds since the last clock tick ('nslt'). It also 297 * sets 'nano' to the value NANOSEC (one billion). 298 * 299 * This macro assumes that all registers are globals or outs so they can 300 * safely contain 64-bit data, and that it's safe to use the label "5:". 301 * Further, this macro calls the NATIVE_TIME_TO_NSEC_SCALE which in turn 302 * uses the labels "6:" and "7:"; labels "5:", "6:" and "7:" must not 303 * be used across invocations of this macro. 304 */ 305 #define GET_HRESTIME(hrestsec, hrestnsec, adj, nslt, nano, scr, hrlock, \ 306 gnt1, gnt2) \ 307 5: sethi %hi(hres_lock), scr; \ 308 lduw [scr + %lo(hres_lock)], hrlock; /* load clock lock */ \ 309 lduw [scr + %lo(nsec_scale)], nano; /* tick-to-ns factor */ \ 310 andn hrlock, 1, hrlock; /* see comments above! */ \ 311 ldx [scr + %lo(hres_last_tick)], nslt; \ 312 ldn [scr + %lo(hrestime)], hrestsec; /* load hrestime.sec */\ 313 add scr, %lo(hrestime), hrestnsec; \ 314 ldn [hrestnsec + CLONGSIZE], hrestnsec; \ 315 GET_NATIVE_TIME(adj, gnt1, gnt2); /* get current %tick */ \ 316 subcc adj, nslt, nslt; /* nslt = ticks since last clockint */ \ 317 movneg %xcc, %g0, nslt; /* ignore neg delta from tick skew */ \ 318 ldx [scr + %lo(hrestime_adj)], adj; /* load hrestime_adj */ \ 319 /* membar #LoadLoad; (see comment (2) above) */ \ 320 lduw [scr + %lo(hres_lock)], scr; /* load clock lock */ \ 321 NATIVE_TIME_TO_NSEC_SCALE(nslt, nano, gnt1, NSEC_SHIFT); \ 322 sethi %hi(NANOSEC), nano; \ 323 xor hrlock, scr, scr; \ 324 /* CSTYLED */ \ 325 brnz,pn scr, 5b; \ 326 or nano, %lo(NANOSEC), nano; 327 328 /* 329 * Similar to above, but returns current gethrtime() value in 'base'. 330 */ 331 #define GET_HRTIME(base, now, nslt, scale, scr, hrlock, gnt1, gnt2) \ 332 5: sethi %hi(hres_lock), scr; \ 333 lduw [scr + %lo(hres_lock)], hrlock; /* load clock lock */ \ 334 lduw [scr + %lo(nsec_scale)], scale; /* tick-to-ns factor */ \ 335 andn hrlock, 1, hrlock; /* see comments above! */ \ 336 ldx [scr + %lo(hres_last_tick)], nslt; \ 337 ldx [scr + %lo(hrtime_base)], base; /* load hrtime_base */ \ 338 GET_NATIVE_TIME(now, gnt1, gnt2); /* get current %tick */ \ 339 subcc now, nslt, nslt; /* nslt = ticks since last clockint */ \ 340 movneg %xcc, %g0, nslt; /* ignore neg delta from tick skew */ \ 341 /* membar #LoadLoad; (see comment (2) above) */ \ 342 ld [scr + %lo(hres_lock)], scr; /* load clock lock */ \ 343 NATIVE_TIME_TO_NSEC_SCALE(nslt, scale, gnt1, NSEC_SHIFT); \ 344 xor hrlock, scr, scr; \ 345 /* CSTYLED */ \ 346 brnz,pn scr, 5b; \ 347 add base, nslt, base; 348 349 /* 350 * Maximum-performance timestamp for kernel tracing. We don't bother 351 * clearing NPT because vtrace expresses everything in 32-bit deltas, 352 * so only the low-order 32 bits matter. We do shift down a few bits, 353 * however, so that the trace framework doesn't emit a ridiculous number 354 * of 32_bit_elapsed_time records (trace points are more expensive when 355 * the time since the last trace point doesn't fit in a 16-bit delta). 356 * We currently shift by 4 (divide by 16) on the grounds that (1) there's 357 * no point making the timing finer-grained than the trace point latency, 358 * which exceeds 16 cycles; and (2) the cost and probe effect of many 359 * 32-bit time records far exceeds the cost of the 'srlx' instruction. 360 */ 361 #define GET_VTRACE_TIME(out, scr1, scr2) \ 362 GET_NATIVE_TIME(out, scr1, scr2); /* get current %tick */ \ 363 srlx out, VTRACE_SHIFT, out; 364 365 /* 366 * Full 64-bit version for those truly rare occasions when you need it. 367 * Currently this is only needed to generate the TR_START_TIME record. 368 */ 369 #define GET_VTRACE_TIME_64(out, scr1, scr2) \ 370 GET_NATIVE_TIME(out, scr1, scr2); /* get current %tick */ \ 371 add out, out, out; \ 372 srlx out, VTRACE_SHIFT + 1, out; 373 374 /* 375 * Return the rate at which the vtrace clock runs. 376 */ 377 #define GET_VTRACE_FREQUENCY(out, scr1, scr2) \ 378 sethi %hi(sys_tick_freq), out; \ 379 ldx [out + %lo(sys_tick_freq)], out; \ 380 srlx out, VTRACE_SHIFT, out; 381 382 #endif /* CPU_MODULE */ 383 384 #ifdef __cplusplus 385 } 386 #endif 387 388 #endif /* !_SYS_CLOCK_H */ 389