1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Kernel timekeeping code and accessor functions. Based on code from 4 * timer.c, moved in commit 8524070b7982. 5 */ 6 #include <linux/audit.h> 7 #include <linux/clocksource.h> 8 #include <linux/compiler.h> 9 #include <linux/jiffies.h> 10 #include <linux/kobject.h> 11 #include <linux/module.h> 12 #include <linux/nmi.h> 13 #include <linux/pvclock_gtod.h> 14 #include <linux/random.h> 15 #include <linux/sched/clock.h> 16 #include <linux/sched/loadavg.h> 17 #include <linux/static_key.h> 18 #include <linux/stop_machine.h> 19 #include <linux/syscore_ops.h> 20 #include <linux/tick.h> 21 #include <linux/time.h> 22 #include <linux/timex.h> 23 #include <linux/timekeeper_internal.h> 24 25 #include <vdso/auxclock.h> 26 27 #include "tick-internal.h" 28 #include "timekeeping_internal.h" 29 #include "ntp_internal.h" 30 31 #define TK_CLEAR_NTP (1 << 0) 32 #define TK_CLOCK_WAS_SET (1 << 1) 33 34 #define TK_UPDATE_ALL (TK_CLEAR_NTP | TK_CLOCK_WAS_SET) 35 36 enum timekeeping_adv_mode { 37 /* Update timekeeper when a tick has passed */ 38 TK_ADV_TICK, 39 40 /* Update timekeeper on a direct frequency change */ 41 TK_ADV_FREQ 42 }; 43 44 /* 45 * The most important data for readout fits into a single 64 byte 46 * cache line. 47 */ 48 struct tk_data { 49 seqcount_raw_spinlock_t seq; 50 struct timekeeper timekeeper; 51 struct timekeeper shadow_timekeeper; 52 raw_spinlock_t lock; 53 } ____cacheline_aligned; 54 55 static struct tk_data timekeeper_data[TIMEKEEPERS_MAX]; 56 57 /* The core timekeeper */ 58 #define tk_core (timekeeper_data[TIMEKEEPER_CORE]) 59 60 #ifdef CONFIG_POSIX_AUX_CLOCKS 61 static inline bool tk_get_aux_ts64(unsigned int tkid, struct timespec64 *ts) 62 { 63 return ktime_get_aux_ts64(CLOCK_AUX + tkid - TIMEKEEPER_AUX_FIRST, ts); 64 } 65 66 static inline bool tk_is_aux(const struct timekeeper *tk) 67 { 68 return tk->id >= TIMEKEEPER_AUX_FIRST && tk->id <= TIMEKEEPER_AUX_LAST; 69 } 70 static inline struct tk_data *aux_get_tk_data(clockid_t id); 71 #else 72 static inline bool tk_get_aux_ts64(unsigned int tkid, struct timespec64 *ts) 73 { 74 return false; 75 } 76 77 static inline bool tk_is_aux(const struct timekeeper *tk) 78 { 79 return false; 80 } 81 static inline struct tk_data *aux_get_tk_data(clockid_t id) 82 { 83 return NULL; 84 } 85 #endif 86 87 static inline void tk_update_aux_offs(struct timekeeper *tk, ktime_t offs) 88 { 89 tk->offs_aux = offs; 90 tk->monotonic_to_aux = ktime_to_timespec64(offs); 91 } 92 93 /* flag for if timekeeping is suspended */ 94 int __read_mostly timekeeping_suspended; 95 96 /** 97 * struct tk_fast - NMI safe timekeeper 98 * @seq: Sequence counter for protecting updates. The lowest bit 99 * is the index for the tk_read_base array 100 * @base: tk_read_base array. Access is indexed by the lowest bit of 101 * @seq. 102 * 103 * See @update_fast_timekeeper() below. 104 */ 105 struct tk_fast { 106 seqcount_latch_t seq; 107 struct tk_read_base base[2]; 108 }; 109 110 /* Suspend-time cycles value for halted fast timekeeper. */ 111 static u64 cycles_at_suspend; 112 113 static u64 dummy_clock_read(struct clocksource *cs) 114 { 115 if (timekeeping_suspended) 116 return cycles_at_suspend; 117 return local_clock(); 118 } 119 120 static struct clocksource dummy_clock = { 121 .read = dummy_clock_read, 122 }; 123 124 /* 125 * Boot time initialization which allows local_clock() to be utilized 126 * during early boot when clocksources are not available. local_clock() 127 * returns nanoseconds already so no conversion is required, hence mult=1 128 * and shift=0. When the first proper clocksource is installed then 129 * the fast time keepers are updated with the correct values. 130 */ 131 #define FAST_TK_INIT \ 132 { \ 133 .clock = &dummy_clock, \ 134 .mask = CLOCKSOURCE_MASK(64), \ 135 .mult = 1, \ 136 .shift = 0, \ 137 } 138 139 static struct tk_fast tk_fast_mono ____cacheline_aligned = { 140 .seq = SEQCNT_LATCH_ZERO(tk_fast_mono.seq), 141 .base[0] = FAST_TK_INIT, 142 .base[1] = FAST_TK_INIT, 143 }; 144 145 static struct tk_fast tk_fast_raw ____cacheline_aligned = { 146 .seq = SEQCNT_LATCH_ZERO(tk_fast_raw.seq), 147 .base[0] = FAST_TK_INIT, 148 .base[1] = FAST_TK_INIT, 149 }; 150 151 #ifdef CONFIG_POSIX_AUX_CLOCKS 152 static __init void tk_aux_setup(void); 153 static void tk_aux_update_clocksource(void); 154 static void tk_aux_advance(void); 155 #else 156 static inline void tk_aux_setup(void) { } 157 static inline void tk_aux_update_clocksource(void) { } 158 static inline void tk_aux_advance(void) { } 159 #endif 160 161 unsigned long timekeeper_lock_irqsave(void) 162 { 163 unsigned long flags; 164 165 raw_spin_lock_irqsave(&tk_core.lock, flags); 166 return flags; 167 } 168 169 void timekeeper_unlock_irqrestore(unsigned long flags) 170 { 171 raw_spin_unlock_irqrestore(&tk_core.lock, flags); 172 } 173 174 /* 175 * Multigrain timestamps require tracking the latest fine-grained timestamp 176 * that has been issued, and never returning a coarse-grained timestamp that is 177 * earlier than that value. 178 * 179 * mg_floor represents the latest fine-grained time that has been handed out as 180 * a file timestamp on the system. This is tracked as a monotonic ktime_t, and 181 * converted to a realtime clock value on an as-needed basis. 182 * 183 * Maintaining mg_floor ensures the multigrain interfaces never issue a 184 * timestamp earlier than one that has been previously issued. 185 * 186 * The exception to this rule is when there is a backward realtime clock jump. If 187 * such an event occurs, a timestamp can appear to be earlier than a previous one. 188 */ 189 static __cacheline_aligned_in_smp atomic64_t mg_floor; 190 191 static inline void tk_normalize_xtime(struct timekeeper *tk) 192 { 193 while (tk->tkr_mono.xtime_nsec >= ((u64)NSEC_PER_SEC << tk->tkr_mono.shift)) { 194 tk->tkr_mono.xtime_nsec -= (u64)NSEC_PER_SEC << tk->tkr_mono.shift; 195 tk->xtime_sec++; 196 } 197 while (tk->tkr_raw.xtime_nsec >= ((u64)NSEC_PER_SEC << tk->tkr_raw.shift)) { 198 tk->tkr_raw.xtime_nsec -= (u64)NSEC_PER_SEC << tk->tkr_raw.shift; 199 tk->raw_sec++; 200 } 201 } 202 203 static inline struct timespec64 tk_xtime(const struct timekeeper *tk) 204 { 205 struct timespec64 ts; 206 207 ts.tv_sec = tk->xtime_sec; 208 ts.tv_nsec = (long)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift); 209 return ts; 210 } 211 212 static inline struct timespec64 tk_xtime_coarse(const struct timekeeper *tk) 213 { 214 struct timespec64 ts; 215 216 ts.tv_sec = tk->xtime_sec; 217 ts.tv_nsec = tk->coarse_nsec; 218 return ts; 219 } 220 221 /* 222 * Update the nanoseconds part for the coarse time keepers. They can't rely 223 * on xtime_nsec because xtime_nsec could be adjusted by a small negative 224 * amount when the multiplication factor of the clock is adjusted, which 225 * could cause the coarse clocks to go slightly backwards. See 226 * timekeeping_apply_adjustment(). Thus we keep a separate copy for the coarse 227 * clockids which only is updated when the clock has been set or we have 228 * accumulated time. 229 */ 230 static inline void tk_update_coarse_nsecs(struct timekeeper *tk) 231 { 232 tk->coarse_nsec = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift; 233 } 234 235 static void tk_set_xtime(struct timekeeper *tk, const struct timespec64 *ts) 236 { 237 tk->xtime_sec = ts->tv_sec; 238 tk->tkr_mono.xtime_nsec = (u64)ts->tv_nsec << tk->tkr_mono.shift; 239 tk_update_coarse_nsecs(tk); 240 } 241 242 static void tk_xtime_add(struct timekeeper *tk, const struct timespec64 *ts) 243 { 244 tk->xtime_sec += ts->tv_sec; 245 tk->tkr_mono.xtime_nsec += (u64)ts->tv_nsec << tk->tkr_mono.shift; 246 tk_normalize_xtime(tk); 247 tk_update_coarse_nsecs(tk); 248 } 249 250 static void tk_set_wall_to_mono(struct timekeeper *tk, struct timespec64 wtm) 251 { 252 struct timespec64 tmp; 253 254 /* 255 * Verify consistency of: offset_real = -wall_to_monotonic 256 * before modifying anything 257 */ 258 set_normalized_timespec64(&tmp, -tk->wall_to_monotonic.tv_sec, 259 -tk->wall_to_monotonic.tv_nsec); 260 WARN_ON_ONCE(tk->offs_real != timespec64_to_ktime(tmp)); 261 tk->wall_to_monotonic = wtm; 262 set_normalized_timespec64(&tmp, -wtm.tv_sec, -wtm.tv_nsec); 263 /* Paired with READ_ONCE() in ktime_mono_to_any() */ 264 WRITE_ONCE(tk->offs_real, timespec64_to_ktime(tmp)); 265 WRITE_ONCE(tk->offs_tai, ktime_add(tk->offs_real, ktime_set(tk->tai_offset, 0))); 266 } 267 268 static inline void tk_update_sleep_time(struct timekeeper *tk, ktime_t delta) 269 { 270 /* Paired with READ_ONCE() in ktime_mono_to_any() */ 271 WRITE_ONCE(tk->offs_boot, ktime_add(tk->offs_boot, delta)); 272 /* 273 * Timespec representation for VDSO update to avoid 64bit division 274 * on every update. 275 */ 276 tk->monotonic_to_boot = ktime_to_timespec64(tk->offs_boot); 277 } 278 279 #ifdef CONFIG_ARCH_WANTS_CLOCKSOURCE_READ_INLINE 280 #include <asm/clock_inlined.h> 281 282 static DEFINE_STATIC_KEY_FALSE(clocksource_read_inlined); 283 284 /* 285 * tk_clock_read - atomic clocksource read() helper 286 * 287 * This helper is necessary to use in the read paths because, while the 288 * seqcount ensures we don't return a bad value while structures are updated, 289 * it doesn't protect from potential crashes. There is the possibility that 290 * the tkr's clocksource may change between the read reference, and the 291 * clock reference passed to the read function. This can cause crashes if 292 * the wrong clocksource is passed to the wrong read function. 293 * This isn't necessary to use when holding the tk_core.lock or doing 294 * a read of the fast-timekeeper tkrs (which is protected by its own locking 295 * and update logic). 296 */ 297 static __always_inline u64 tk_clock_read(const struct tk_read_base *tkr) 298 { 299 struct clocksource *clock = READ_ONCE(tkr->clock); 300 301 if (static_branch_likely(&clocksource_read_inlined)) 302 return arch_inlined_clocksource_read(clock); 303 304 return clock->read(clock); 305 } 306 307 static inline void clocksource_disable_inline_read(void) 308 { 309 static_branch_disable(&clocksource_read_inlined); 310 } 311 312 static inline void clocksource_enable_inline_read(void) 313 { 314 static_branch_enable(&clocksource_read_inlined); 315 } 316 #else 317 static __always_inline u64 tk_clock_read(const struct tk_read_base *tkr) 318 { 319 struct clocksource *clock = READ_ONCE(tkr->clock); 320 321 return clock->read(clock); 322 } 323 324 static inline void clocksource_disable_inline_read(void) { } 325 static inline void clocksource_enable_inline_read(void) { } 326 #endif 327 328 /** 329 * tk_setup_internals - Set up internals to use clocksource clock. 330 * 331 * @tk: The target timekeeper to setup. 332 * @clock: Pointer to clocksource. 333 * 334 * Calculates a fixed cycle/nsec interval for a given clocksource/adjustment 335 * pair and interval request. 336 * 337 * Unless you're the timekeeping code, you should not be using this! 338 */ 339 static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock) 340 { 341 u64 interval; 342 u64 tmp, ntpinterval; 343 struct clocksource *old_clock; 344 345 ++tk->cs_was_changed_seq; 346 old_clock = tk->tkr_mono.clock; 347 tk->tkr_mono.clock = clock; 348 tk->tkr_mono.mask = clock->mask; 349 tk->tkr_mono.cycle_last = tk_clock_read(&tk->tkr_mono); 350 351 tk->tkr_raw.clock = clock; 352 tk->tkr_raw.mask = clock->mask; 353 tk->tkr_raw.cycle_last = tk->tkr_mono.cycle_last; 354 355 /* Do the ns -> cycle conversion first, using original mult */ 356 tmp = NTP_INTERVAL_LENGTH; 357 tmp <<= clock->shift; 358 ntpinterval = tmp; 359 tmp += clock->mult/2; 360 do_div(tmp, clock->mult); 361 if (tmp == 0) 362 tmp = 1; 363 364 interval = (u64) tmp; 365 tk->cycle_interval = interval; 366 367 /* Go back from cycles -> shifted ns */ 368 tk->xtime_interval = interval * clock->mult; 369 tk->xtime_remainder = ntpinterval - tk->xtime_interval; 370 tk->raw_interval = interval * clock->mult; 371 372 /* if changing clocks, convert xtime_nsec shift units */ 373 if (old_clock) { 374 int shift_change = clock->shift - old_clock->shift; 375 if (shift_change < 0) { 376 tk->tkr_mono.xtime_nsec >>= -shift_change; 377 tk->tkr_raw.xtime_nsec >>= -shift_change; 378 } else { 379 tk->tkr_mono.xtime_nsec <<= shift_change; 380 tk->tkr_raw.xtime_nsec <<= shift_change; 381 } 382 } 383 384 tk->tkr_mono.shift = clock->shift; 385 tk->tkr_raw.shift = clock->shift; 386 387 tk->ntp_error = 0; 388 tk->ntp_error_shift = NTP_SCALE_SHIFT - clock->shift; 389 tk->ntp_tick = ntpinterval << tk->ntp_error_shift; 390 391 /* 392 * The timekeeper keeps its own mult values for the currently 393 * active clocksource. These value will be adjusted via NTP 394 * to counteract clock drifting. 395 */ 396 tk->tkr_mono.mult = clock->mult; 397 tk->tkr_raw.mult = clock->mult; 398 tk->ntp_err_mult = 0; 399 tk->skip_second_overflow = 0; 400 401 tk->cs_id = clock->id; 402 403 /* Coupled clockevent data */ 404 if (IS_ENABLED(CONFIG_GENERIC_CLOCKEVENTS_COUPLED) && 405 clock->flags & CLOCK_SOURCE_HAS_COUPLED_CLOCK_EVENT) { 406 /* 407 * Aim for an one hour maximum delta and use KHz to handle 408 * clocksources with a frequency above 4GHz correctly as 409 * the frequency argument of clocks_calc_mult_shift() is u32. 410 */ 411 clocks_calc_mult_shift(&tk->cs_ns_to_cyc_mult, &tk->cs_ns_to_cyc_shift, 412 NSEC_PER_MSEC, clock->freq_khz, 3600 * 1000); 413 /* 414 * Initialize the conversion limit as the previous clocksource 415 * might have the same shift/mult pair so the quick check in 416 * tk_update_ns_to_cyc() fails to update it after a clocksource 417 * change leaving it effectivly zero. 418 */ 419 tk->cs_ns_to_cyc_maxns = div_u64(clock->mask, tk->cs_ns_to_cyc_mult); 420 } 421 } 422 423 /* Timekeeper helper functions. */ 424 static noinline u64 delta_to_ns_safe(const struct tk_read_base *tkr, u64 delta) 425 { 426 return mul_u64_u32_add_u64_shr(delta, tkr->mult, tkr->xtime_nsec, tkr->shift); 427 } 428 429 static __always_inline u64 timekeeping_cycles_to_ns(const struct tk_read_base *tkr, u64 cycles) 430 { 431 /* Calculate the delta since the last update_wall_time() */ 432 u64 mask = tkr->mask, delta = (cycles - tkr->cycle_last) & mask; 433 434 /* 435 * This detects both negative motion and the case where the delta 436 * overflows the multiplication with tkr->mult. 437 */ 438 if (unlikely(delta > tkr->clock->max_cycles)) { 439 /* 440 * Handle clocksource inconsistency between CPUs to prevent 441 * time from going backwards by checking for the MSB of the 442 * mask being set in the delta. 443 */ 444 if (delta & ~(mask >> 1)) 445 return tkr->xtime_nsec >> tkr->shift; 446 447 return delta_to_ns_safe(tkr, delta); 448 } 449 450 return ((delta * tkr->mult) + tkr->xtime_nsec) >> tkr->shift; 451 } 452 453 static __always_inline u64 timekeeping_get_ns(const struct tk_read_base *tkr) 454 { 455 return timekeeping_cycles_to_ns(tkr, tk_clock_read(tkr)); 456 } 457 458 /** 459 * update_fast_timekeeper - Update the fast and NMI safe monotonic timekeeper. 460 * @tkr: Timekeeping readout base from which we take the update 461 * @tkf: Pointer to NMI safe timekeeper 462 * 463 * We want to use this from any context including NMI and tracing / 464 * instrumenting the timekeeping code itself. 465 * 466 * Employ the latch technique; see @write_seqcount_latch. 467 * 468 * So if a NMI hits the update of base[0] then it will use base[1] 469 * which is still consistent. In the worst case this can result is a 470 * slightly wrong timestamp (a few nanoseconds). See 471 * @ktime_get_mono_fast_ns. 472 */ 473 static void update_fast_timekeeper(const struct tk_read_base *tkr, 474 struct tk_fast *tkf) 475 { 476 struct tk_read_base *base = tkf->base; 477 478 /* Force readers off to base[1] */ 479 write_seqcount_latch_begin(&tkf->seq); 480 481 /* Update base[0] */ 482 memcpy(base, tkr, sizeof(*base)); 483 484 /* Force readers back to base[0] */ 485 write_seqcount_latch(&tkf->seq); 486 487 /* Update base[1] */ 488 memcpy(base + 1, base, sizeof(*base)); 489 490 write_seqcount_latch_end(&tkf->seq); 491 } 492 493 static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf) 494 { 495 struct tk_read_base *tkr; 496 unsigned int seq; 497 u64 now; 498 499 do { 500 seq = read_seqcount_latch(&tkf->seq); 501 tkr = tkf->base + (seq & 0x01); 502 now = ktime_to_ns(tkr->base); 503 now += timekeeping_get_ns(tkr); 504 } while (read_seqcount_latch_retry(&tkf->seq, seq)); 505 506 return now; 507 } 508 509 /** 510 * ktime_get_mono_fast_ns - Fast NMI safe access to clock monotonic 511 * 512 * This timestamp is not guaranteed to be monotonic across an update. 513 * The timestamp is calculated by: 514 * 515 * now = base_mono + clock_delta * slope 516 * 517 * So if the update lowers the slope, readers who are forced to the 518 * not yet updated second array are still using the old steeper slope. 519 * 520 * tmono 521 * ^ 522 * | o n 523 * | o n 524 * | u 525 * | o 526 * |o 527 * |12345678---> reader order 528 * 529 * o = old slope 530 * u = update 531 * n = new slope 532 * 533 * So reader 6 will observe time going backwards versus reader 5. 534 * 535 * While other CPUs are likely to be able to observe that, the only way 536 * for a CPU local observation is when an NMI hits in the middle of 537 * the update. Timestamps taken from that NMI context might be ahead 538 * of the following timestamps. Callers need to be aware of that and 539 * deal with it. 540 */ 541 u64 notrace ktime_get_mono_fast_ns(void) 542 { 543 return __ktime_get_fast_ns(&tk_fast_mono); 544 } 545 EXPORT_SYMBOL_GPL(ktime_get_mono_fast_ns); 546 547 /** 548 * ktime_get_raw_fast_ns - Fast NMI safe access to clock monotonic raw 549 * 550 * Contrary to ktime_get_mono_fast_ns() this is always correct because the 551 * conversion factor is not affected by NTP/PTP correction. 552 */ 553 u64 notrace ktime_get_raw_fast_ns(void) 554 { 555 return __ktime_get_fast_ns(&tk_fast_raw); 556 } 557 EXPORT_SYMBOL_GPL(ktime_get_raw_fast_ns); 558 559 /** 560 * ktime_get_boot_fast_ns - NMI safe and fast access to boot clock. 561 * 562 * To keep it NMI safe since we're accessing from tracing, we're not using a 563 * separate timekeeper with updates to monotonic clock and boot offset 564 * protected with seqcounts. This has the following minor side effects: 565 * 566 * (1) Its possible that a timestamp be taken after the boot offset is updated 567 * but before the timekeeper is updated. If this happens, the new boot offset 568 * is added to the old timekeeping making the clock appear to update slightly 569 * earlier: 570 * CPU 0 CPU 1 571 * timekeeping_inject_sleeptime64() 572 * __timekeeping_inject_sleeptime(tk, delta); 573 * timestamp(); 574 * timekeeping_update_staged(tkd, TK_CLEAR_NTP...); 575 * 576 * (2) On 32-bit systems, the 64-bit boot offset (tk->offs_boot) may be 577 * partially updated. Since the tk->offs_boot update is a rare event, this 578 * should be a rare occurrence which postprocessing should be able to handle. 579 * 580 * The caveats vs. timestamp ordering as documented for ktime_get_mono_fast_ns() 581 * apply as well. 582 */ 583 u64 notrace ktime_get_boot_fast_ns(void) 584 { 585 struct timekeeper *tk = &tk_core.timekeeper; 586 587 return (ktime_get_mono_fast_ns() + ktime_to_ns(data_race(tk->offs_boot))); 588 } 589 EXPORT_SYMBOL_GPL(ktime_get_boot_fast_ns); 590 591 /** 592 * ktime_get_tai_fast_ns - NMI safe and fast access to tai clock. 593 * 594 * The same limitations as described for ktime_get_boot_fast_ns() apply. The 595 * mono time and the TAI offset are not read atomically which may yield wrong 596 * readouts. However, an update of the TAI offset is an rare event e.g., caused 597 * by settime or adjtimex with an offset. The user of this function has to deal 598 * with the possibility of wrong timestamps in post processing. 599 */ 600 u64 notrace ktime_get_tai_fast_ns(void) 601 { 602 struct timekeeper *tk = &tk_core.timekeeper; 603 604 return (ktime_get_mono_fast_ns() + ktime_to_ns(data_race(tk->offs_tai))); 605 } 606 EXPORT_SYMBOL_GPL(ktime_get_tai_fast_ns); 607 608 /** 609 * ktime_get_real_fast_ns: - NMI safe and fast access to clock realtime. 610 * 611 * See ktime_get_mono_fast_ns() for documentation of the time stamp ordering. 612 */ 613 u64 ktime_get_real_fast_ns(void) 614 { 615 struct tk_fast *tkf = &tk_fast_mono; 616 struct tk_read_base *tkr; 617 u64 baser, delta; 618 unsigned int seq; 619 620 do { 621 seq = raw_read_seqcount_latch(&tkf->seq); 622 tkr = tkf->base + (seq & 0x01); 623 baser = ktime_to_ns(tkr->base_real); 624 delta = timekeeping_get_ns(tkr); 625 } while (raw_read_seqcount_latch_retry(&tkf->seq, seq)); 626 627 return baser + delta; 628 } 629 EXPORT_SYMBOL_GPL(ktime_get_real_fast_ns); 630 631 /** 632 * halt_fast_timekeeper - Prevent fast timekeeper from accessing clocksource. 633 * @tk: Timekeeper to snapshot. 634 * 635 * It generally is unsafe to access the clocksource after timekeeping has been 636 * suspended, so take a snapshot of the readout base of @tk and use it as the 637 * fast timekeeper's readout base while suspended. It will return the same 638 * number of cycles every time until timekeeping is resumed at which time the 639 * proper readout base for the fast timekeeper will be restored automatically. 640 */ 641 static void halt_fast_timekeeper(const struct timekeeper *tk) 642 { 643 static struct tk_read_base tkr_dummy; 644 const struct tk_read_base *tkr = &tk->tkr_mono; 645 646 memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy)); 647 cycles_at_suspend = tk_clock_read(tkr); 648 tkr_dummy.clock = &dummy_clock; 649 tkr_dummy.base_real = tkr->base + tk->offs_real; 650 update_fast_timekeeper(&tkr_dummy, &tk_fast_mono); 651 652 tkr = &tk->tkr_raw; 653 memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy)); 654 tkr_dummy.clock = &dummy_clock; 655 update_fast_timekeeper(&tkr_dummy, &tk_fast_raw); 656 } 657 658 static RAW_NOTIFIER_HEAD(pvclock_gtod_chain); 659 660 static void update_pvclock_gtod(struct timekeeper *tk, bool was_set) 661 { 662 raw_notifier_call_chain(&pvclock_gtod_chain, was_set, tk); 663 } 664 665 /** 666 * pvclock_gtod_register_notifier - register a pvclock timedata update listener 667 * @nb: Pointer to the notifier block to register 668 */ 669 int pvclock_gtod_register_notifier(struct notifier_block *nb) 670 { 671 struct timekeeper *tk = &tk_core.timekeeper; 672 int ret; 673 674 guard(raw_spinlock_irqsave)(&tk_core.lock); 675 ret = raw_notifier_chain_register(&pvclock_gtod_chain, nb); 676 update_pvclock_gtod(tk, true); 677 678 return ret; 679 } 680 EXPORT_SYMBOL_GPL(pvclock_gtod_register_notifier); 681 682 /** 683 * pvclock_gtod_unregister_notifier - unregister a pvclock 684 * timedata update listener 685 * @nb: Pointer to the notifier block to unregister 686 */ 687 int pvclock_gtod_unregister_notifier(struct notifier_block *nb) 688 { 689 guard(raw_spinlock_irqsave)(&tk_core.lock); 690 return raw_notifier_chain_unregister(&pvclock_gtod_chain, nb); 691 } 692 EXPORT_SYMBOL_GPL(pvclock_gtod_unregister_notifier); 693 694 /* 695 * tk_update_leap_state - helper to update the next_leap_ktime 696 */ 697 static inline void tk_update_leap_state(struct timekeeper *tk) 698 { 699 tk->next_leap_ktime = ntp_get_next_leap(tk->id); 700 if (tk->next_leap_ktime != KTIME_MAX) 701 /* Convert to monotonic time */ 702 tk->next_leap_ktime = ktime_sub(tk->next_leap_ktime, tk->offs_real); 703 } 704 705 /* 706 * Leap state update for both shadow and the real timekeeper 707 * Separate to spare a full memcpy() of the timekeeper. 708 */ 709 static void tk_update_leap_state_all(struct tk_data *tkd) 710 { 711 write_seqcount_begin(&tkd->seq); 712 tk_update_leap_state(&tkd->shadow_timekeeper); 713 tkd->timekeeper.next_leap_ktime = tkd->shadow_timekeeper.next_leap_ktime; 714 write_seqcount_end(&tkd->seq); 715 } 716 717 /* 718 * Update the ktime_t based scalar nsec members of the timekeeper 719 */ 720 static inline void tk_update_ktime_data(struct timekeeper *tk) 721 { 722 u64 seconds; 723 u32 nsec; 724 725 /* 726 * The xtime based monotonic readout is: 727 * nsec = (xtime_sec + wtm_sec) * 1e9 + wtm_nsec + now(); 728 * The ktime based monotonic readout is: 729 * nsec = base_mono + now(); 730 * ==> base_mono = (xtime_sec + wtm_sec) * 1e9 + wtm_nsec 731 */ 732 seconds = (u64)(tk->xtime_sec + tk->wall_to_monotonic.tv_sec); 733 nsec = (u32) tk->wall_to_monotonic.tv_nsec; 734 tk->tkr_mono.base = ns_to_ktime(seconds * NSEC_PER_SEC + nsec); 735 736 /* 737 * The sum of the nanoseconds portions of xtime and 738 * wall_to_monotonic can be greater/equal one second. Take 739 * this into account before updating tk->ktime_sec. 740 */ 741 nsec += (u32)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift); 742 if (nsec >= NSEC_PER_SEC) 743 seconds++; 744 tk->ktime_sec = seconds; 745 746 /* Update the monotonic raw base */ 747 tk->tkr_raw.base = ns_to_ktime(tk->raw_sec * NSEC_PER_SEC); 748 } 749 750 static inline void tk_update_ns_to_cyc(struct timekeeper *tks, struct timekeeper *tkc) 751 { 752 struct tk_read_base *tkrs = &tks->tkr_mono; 753 struct tk_read_base *tkrc = &tkc->tkr_mono; 754 unsigned int shift; 755 756 if (!IS_ENABLED(CONFIG_GENERIC_CLOCKEVENTS_COUPLED) || 757 !(tkrs->clock->flags & CLOCK_SOURCE_HAS_COUPLED_CLOCK_EVENT)) 758 return; 759 760 if (tkrs->mult == tkrc->mult && tkrs->shift == tkrc->shift) 761 return; 762 /* 763 * The conversion math is simple: 764 * 765 * CS::MULT (1 << NS_TO_CYC_SHIFT) 766 * --------------- = ---------------------- 767 * (1 << CS:SHIFT) NS_TO_CYC_MULT 768 * 769 * Ergo: 770 * 771 * NS_TO_CYC_MULT = (1 << (CS::SHIFT + NS_TO_CYC_SHIFT)) / CS::MULT 772 * 773 * NS_TO_CYC_SHIFT has been set up in tk_setup_internals() 774 */ 775 shift = tkrs->shift + tks->cs_ns_to_cyc_shift; 776 tks->cs_ns_to_cyc_mult = (u32)div_u64(1ULL << shift, tkrs->mult); 777 tks->cs_ns_to_cyc_maxns = div_u64(tkrs->clock->mask, tks->cs_ns_to_cyc_mult); 778 } 779 780 /* 781 * Restore the shadow timekeeper from the real timekeeper. 782 */ 783 static void timekeeping_restore_shadow(struct tk_data *tkd) 784 { 785 lockdep_assert_held(&tkd->lock); 786 memcpy(&tkd->shadow_timekeeper, &tkd->timekeeper, sizeof(tkd->timekeeper)); 787 } 788 789 static void timekeeping_update_from_shadow(struct tk_data *tkd, unsigned int action) 790 { 791 struct timekeeper *tk = &tkd->shadow_timekeeper; 792 793 lockdep_assert_held(&tkd->lock); 794 795 /* 796 * Block out readers before running the updates below because that 797 * updates VDSO and other time related infrastructure. Not blocking 798 * the readers might let a reader see time going backwards when 799 * reading from the VDSO after the VDSO update and then reading in 800 * the kernel from the timekeeper before that got updated. 801 */ 802 write_seqcount_begin(&tkd->seq); 803 804 if (action & TK_CLEAR_NTP) { 805 tk->ntp_error = 0; 806 ntp_clear(tk->id); 807 } 808 809 tk_update_leap_state(tk); 810 tk_update_ktime_data(tk); 811 tk->tkr_mono.base_real = tk->tkr_mono.base + tk->offs_real; 812 813 if (tk->id == TIMEKEEPER_CORE) { 814 tk_update_ns_to_cyc(tk, &tkd->timekeeper); 815 update_vsyscall(tk); 816 update_pvclock_gtod(tk, action & TK_CLOCK_WAS_SET); 817 818 update_fast_timekeeper(&tk->tkr_mono, &tk_fast_mono); 819 update_fast_timekeeper(&tk->tkr_raw, &tk_fast_raw); 820 } else if (tk_is_aux(tk)) { 821 vdso_time_update_aux(tk); 822 } 823 824 if (action & TK_CLOCK_WAS_SET) 825 tk->clock_was_set_seq++; 826 827 /* 828 * Update the real timekeeper. 829 * 830 * We could avoid this memcpy() by switching pointers, but that has 831 * the downside that the reader side does not longer benefit from 832 * the cacheline optimized data layout of the timekeeper and requires 833 * another indirection. 834 */ 835 memcpy(&tkd->timekeeper, tk, sizeof(*tk)); 836 write_seqcount_end(&tkd->seq); 837 } 838 839 /** 840 * timekeeping_forward_now - update clock to the current time 841 * @tk: Pointer to the timekeeper to update 842 * 843 * Forward the current clock to update its state since the last call to 844 * update_wall_time(). This is useful before significant clock changes, 845 * as it avoids having to deal with this time offset explicitly. 846 */ 847 static void timekeeping_forward_now(struct timekeeper *tk) 848 { 849 u64 cycle_now, delta; 850 851 cycle_now = tk_clock_read(&tk->tkr_mono); 852 delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last, tk->tkr_mono.mask, 853 tk->tkr_mono.clock->max_raw_delta); 854 tk->tkr_mono.cycle_last = cycle_now; 855 tk->tkr_raw.cycle_last = cycle_now; 856 857 while (delta > 0) { 858 u64 max = tk->tkr_mono.clock->max_cycles; 859 u64 incr = delta < max ? delta : max; 860 861 tk->tkr_mono.xtime_nsec += incr * tk->tkr_mono.mult; 862 tk->tkr_raw.xtime_nsec += incr * tk->tkr_raw.mult; 863 tk_normalize_xtime(tk); 864 delta -= incr; 865 } 866 tk_update_coarse_nsecs(tk); 867 } 868 869 /* 870 * ktime_expiry_to_cycles - Convert a expiry time to clocksource cycles 871 * @id: Clocksource ID which is required for validity 872 * @expires_ns: Absolute CLOCK_MONOTONIC expiry time (nsecs) to be converted 873 * @cycles: Pointer to storage for corresponding absolute cycles value 874 * 875 * Convert a CLOCK_MONOTONIC based absolute expiry time to a cycles value 876 * based on the correlated clocksource of the clockevent device by using 877 * the base nanoseconds and cycles values of the last timekeeper update and 878 * converting the delta between @expires_ns and base nanoseconds to cycles. 879 * 880 * This only works for clockevent devices which are using a less than or 881 * equal comparator against the clocksource. 882 * 883 * Utilizing this avoids two clocksource reads for such devices, the 884 * ktime_get() in clockevents_program_event() to calculate the delta expiry 885 * value and the readout in the device::set_next_event() callback to 886 * convert the delta back to a absolute comparator value. 887 * 888 * Returns: True if @id matches the current clocksource ID, false otherwise 889 */ 890 bool ktime_expiry_to_cycles(enum clocksource_ids id, ktime_t expires_ns, u64 *cycles) 891 { 892 struct timekeeper *tk = &tk_core.timekeeper; 893 struct tk_read_base *tkrm = &tk->tkr_mono; 894 ktime_t base_ns, delta_ns, max_ns; 895 u64 base_cycles, delta_cycles; 896 unsigned int seq; 897 u32 mult, shift; 898 899 /* 900 * Racy check to avoid the seqcount overhead when ID does not match. If 901 * the relevant clocksource is installed concurrently, then this will 902 * just delay the switch over to this mechanism until the next event is 903 * programmed. If the ID is not matching the clock events code will use 904 * the regular relative set_next_event() callback as before. 905 */ 906 if (data_race(tk->cs_id) != id) 907 return false; 908 909 do { 910 seq = read_seqcount_begin(&tk_core.seq); 911 912 if (tk->cs_id != id) 913 return false; 914 915 base_cycles = tkrm->cycle_last; 916 base_ns = tkrm->base + (tkrm->xtime_nsec >> tkrm->shift); 917 918 mult = tk->cs_ns_to_cyc_mult; 919 shift = tk->cs_ns_to_cyc_shift; 920 max_ns = tk->cs_ns_to_cyc_maxns; 921 922 } while (read_seqcount_retry(&tk_core.seq, seq)); 923 924 /* Prevent negative deltas and multiplication overflows */ 925 delta_ns = min(expires_ns - base_ns, max_ns); 926 delta_ns = max(delta_ns, 0); 927 928 /* Convert to cycles */ 929 delta_cycles = ((u64)delta_ns * mult) >> shift; 930 *cycles = base_cycles + delta_cycles; 931 return true; 932 } 933 934 /** 935 * ktime_get_real_ts64 - Returns the time of day in a timespec64. 936 * @ts: pointer to the timespec to be set 937 * 938 * Returns the time of day in a timespec64 (WARN if suspended). 939 */ 940 void ktime_get_real_ts64(struct timespec64 *ts) 941 { 942 struct timekeeper *tk = &tk_core.timekeeper; 943 unsigned int seq; 944 u64 nsecs; 945 946 WARN_ON(timekeeping_suspended); 947 948 do { 949 seq = read_seqcount_begin(&tk_core.seq); 950 951 ts->tv_sec = tk->xtime_sec; 952 nsecs = timekeeping_get_ns(&tk->tkr_mono); 953 954 } while (read_seqcount_retry(&tk_core.seq, seq)); 955 956 ts->tv_nsec = 0; 957 timespec64_add_ns(ts, nsecs); 958 } 959 EXPORT_SYMBOL(ktime_get_real_ts64); 960 961 ktime_t ktime_get(void) 962 { 963 struct timekeeper *tk = &tk_core.timekeeper; 964 unsigned int seq; 965 ktime_t base; 966 u64 nsecs; 967 968 WARN_ON(timekeeping_suspended); 969 970 do { 971 seq = read_seqcount_begin(&tk_core.seq); 972 base = tk->tkr_mono.base; 973 nsecs = timekeeping_get_ns(&tk->tkr_mono); 974 975 } while (read_seqcount_retry(&tk_core.seq, seq)); 976 977 return ktime_add_ns(base, nsecs); 978 } 979 EXPORT_SYMBOL_GPL(ktime_get); 980 981 u32 ktime_get_resolution_ns(void) 982 { 983 struct timekeeper *tk = &tk_core.timekeeper; 984 unsigned int seq; 985 u32 nsecs; 986 987 WARN_ON(timekeeping_suspended); 988 989 do { 990 seq = read_seqcount_begin(&tk_core.seq); 991 nsecs = tk->tkr_mono.mult >> tk->tkr_mono.shift; 992 } while (read_seqcount_retry(&tk_core.seq, seq)); 993 994 return nsecs; 995 } 996 EXPORT_SYMBOL_GPL(ktime_get_resolution_ns); 997 998 static const ktime_t *const offsets[TK_OFFS_MAX] = { 999 [TK_OFFS_REAL] = &tk_core.timekeeper.offs_real, 1000 [TK_OFFS_BOOT] = &tk_core.timekeeper.offs_boot, 1001 [TK_OFFS_TAI] = &tk_core.timekeeper.offs_tai, 1002 }; 1003 1004 ktime_t ktime_get_with_offset(enum tk_offsets offs) 1005 { 1006 struct timekeeper *tk = &tk_core.timekeeper; 1007 const ktime_t *offset = offsets[offs]; 1008 unsigned int seq; 1009 ktime_t base; 1010 u64 nsecs; 1011 1012 WARN_ON(timekeeping_suspended); 1013 1014 do { 1015 seq = read_seqcount_begin(&tk_core.seq); 1016 base = ktime_add(tk->tkr_mono.base, *offset); 1017 nsecs = timekeeping_get_ns(&tk->tkr_mono); 1018 1019 } while (read_seqcount_retry(&tk_core.seq, seq)); 1020 1021 return ktime_add_ns(base, nsecs); 1022 1023 } 1024 EXPORT_SYMBOL_GPL(ktime_get_with_offset); 1025 1026 ktime_t ktime_get_coarse_with_offset(enum tk_offsets offs) 1027 { 1028 struct timekeeper *tk = &tk_core.timekeeper; 1029 const ktime_t *offset = offsets[offs]; 1030 unsigned int seq; 1031 ktime_t base; 1032 u64 nsecs; 1033 1034 WARN_ON(timekeeping_suspended); 1035 1036 do { 1037 seq = read_seqcount_begin(&tk_core.seq); 1038 base = ktime_add(tk->tkr_mono.base, *offset); 1039 nsecs = tk->coarse_nsec; 1040 1041 } while (read_seqcount_retry(&tk_core.seq, seq)); 1042 1043 return ktime_add_ns(base, nsecs); 1044 } 1045 EXPORT_SYMBOL_GPL(ktime_get_coarse_with_offset); 1046 1047 /** 1048 * ktime_mono_to_any() - convert monotonic time to any other time 1049 * @tmono: time to convert. 1050 * @offs: which offset to use 1051 */ 1052 ktime_t ktime_mono_to_any(ktime_t tmono, enum tk_offsets offs) 1053 { 1054 const ktime_t *offset = offsets[offs]; 1055 unsigned int seq; 1056 ktime_t tconv; 1057 1058 if (IS_ENABLED(CONFIG_64BIT)) { 1059 /* 1060 * Paired with WRITE_ONCE()s in tk_set_wall_to_mono() and 1061 * tk_update_sleep_time(). 1062 */ 1063 return ktime_add(tmono, READ_ONCE(*offset)); 1064 } 1065 1066 do { 1067 seq = read_seqcount_begin(&tk_core.seq); 1068 tconv = ktime_add(tmono, *offset); 1069 } while (read_seqcount_retry(&tk_core.seq, seq)); 1070 1071 return tconv; 1072 } 1073 EXPORT_SYMBOL_GPL(ktime_mono_to_any); 1074 1075 /** 1076 * ktime_get_raw - Returns the raw monotonic time in ktime_t format 1077 */ 1078 ktime_t ktime_get_raw(void) 1079 { 1080 struct timekeeper *tk = &tk_core.timekeeper; 1081 unsigned int seq; 1082 ktime_t base; 1083 u64 nsecs; 1084 1085 do { 1086 seq = read_seqcount_begin(&tk_core.seq); 1087 base = tk->tkr_raw.base; 1088 nsecs = timekeeping_get_ns(&tk->tkr_raw); 1089 1090 } while (read_seqcount_retry(&tk_core.seq, seq)); 1091 1092 return ktime_add_ns(base, nsecs); 1093 } 1094 EXPORT_SYMBOL_GPL(ktime_get_raw); 1095 1096 /** 1097 * ktime_get_ts64 - get the monotonic clock in timespec64 format 1098 * @ts: pointer to timespec variable 1099 * 1100 * The function calculates the monotonic clock from the realtime 1101 * clock and the wall_to_monotonic offset and stores the result 1102 * in normalized timespec64 format in the variable pointed to by @ts. 1103 */ 1104 void ktime_get_ts64(struct timespec64 *ts) 1105 { 1106 struct timekeeper *tk = &tk_core.timekeeper; 1107 struct timespec64 tomono; 1108 unsigned int seq; 1109 u64 nsec; 1110 1111 WARN_ON(timekeeping_suspended); 1112 1113 do { 1114 seq = read_seqcount_begin(&tk_core.seq); 1115 ts->tv_sec = tk->xtime_sec; 1116 nsec = timekeeping_get_ns(&tk->tkr_mono); 1117 tomono = tk->wall_to_monotonic; 1118 1119 } while (read_seqcount_retry(&tk_core.seq, seq)); 1120 1121 ts->tv_sec += tomono.tv_sec; 1122 ts->tv_nsec = 0; 1123 timespec64_add_ns(ts, nsec + tomono.tv_nsec); 1124 } 1125 EXPORT_SYMBOL_GPL(ktime_get_ts64); 1126 1127 /** 1128 * ktime_get_seconds - Get the seconds portion of CLOCK_MONOTONIC 1129 * 1130 * Returns the seconds portion of CLOCK_MONOTONIC with a single non 1131 * serialized read. tk->ktime_sec is of type 'unsigned long' so this 1132 * works on both 32 and 64 bit systems. On 32 bit systems the readout 1133 * covers ~136 years of uptime which should be enough to prevent 1134 * premature wrap arounds. 1135 */ 1136 time64_t ktime_get_seconds(void) 1137 { 1138 struct timekeeper *tk = &tk_core.timekeeper; 1139 1140 WARN_ON(timekeeping_suspended); 1141 return tk->ktime_sec; 1142 } 1143 EXPORT_SYMBOL_GPL(ktime_get_seconds); 1144 1145 /** 1146 * ktime_get_real_seconds - Get the seconds portion of CLOCK_REALTIME 1147 * 1148 * Returns the wall clock seconds since 1970. 1149 * 1150 * For 64bit systems the fast access to tk->xtime_sec is preserved. On 1151 * 32bit systems the access must be protected with the sequence 1152 * counter to provide "atomic" access to the 64bit tk->xtime_sec 1153 * value. 1154 */ 1155 time64_t ktime_get_real_seconds(void) 1156 { 1157 struct timekeeper *tk = &tk_core.timekeeper; 1158 time64_t seconds; 1159 unsigned int seq; 1160 1161 if (IS_ENABLED(CONFIG_64BIT)) 1162 return tk->xtime_sec; 1163 1164 do { 1165 seq = read_seqcount_begin(&tk_core.seq); 1166 seconds = tk->xtime_sec; 1167 1168 } while (read_seqcount_retry(&tk_core.seq, seq)); 1169 1170 return seconds; 1171 } 1172 EXPORT_SYMBOL_GPL(ktime_get_real_seconds); 1173 1174 /** 1175 * __ktime_get_real_seconds - Unprotected access to CLOCK_REALTIME seconds 1176 * 1177 * The same as ktime_get_real_seconds() but without the sequence counter 1178 * protection. This function is used in restricted contexts like the x86 MCE 1179 * handler and in KGDB. It's unprotected on 32-bit vs. concurrent half 1180 * completed modification and only to be used for such critical contexts. 1181 * 1182 * Returns: Racy snapshot of the CLOCK_REALTIME seconds value 1183 */ 1184 noinstr time64_t __ktime_get_real_seconds(void) 1185 { 1186 struct timekeeper *tk = &tk_core.timekeeper; 1187 1188 return tk->xtime_sec; 1189 } 1190 1191 static inline u64 tk_clock_read_snapshot(const struct tk_read_base *tkr, 1192 struct clocksource_hw_snapshot *chs) 1193 { 1194 struct clocksource *clock = READ_ONCE(tkr->clock); 1195 1196 if (unlikely(clock->read_snapshot)) 1197 return clock->read_snapshot(clock, chs); 1198 1199 return clock->read(clock); 1200 } 1201 1202 1203 /** 1204 * ktime_get_snapshot_id - Simultaneously snapshot a given clock ID with 1205 * CLOCK_MONOTONIC_RAW and the underlying 1206 * clocksource counter value. 1207 * @clock_id: The clock ID to snapshot 1208 * @systime_snapshot: Pointer to struct receiving the system time snapshot 1209 */ 1210 void ktime_get_snapshot_id(clockid_t clock_id, struct system_time_snapshot *systime_snapshot) 1211 { 1212 ktime_t base_raw, base_sys, offs_sys, *offs, offs_zero = 0; 1213 u64 nsec_raw, nsec_sys, now; 1214 struct timekeeper *tk; 1215 struct tk_data *tkd; 1216 unsigned int seq; 1217 1218 /* Invalidate the snapshot for all failure cases */ 1219 systime_snapshot->valid = false; 1220 1221 if (WARN_ON_ONCE(timekeeping_suspended)) 1222 return; 1223 1224 switch (clock_id) { 1225 case CLOCK_REALTIME: 1226 tkd = &tk_core; 1227 offs = &tk_core.timekeeper.offs_real; 1228 break; 1229 /* Map RAW to MONOTONIC so the loop below is trivial */ 1230 case CLOCK_MONOTONIC_RAW: 1231 case CLOCK_MONOTONIC: 1232 tkd = &tk_core; 1233 offs = &offs_zero; 1234 break; 1235 case CLOCK_BOOTTIME: 1236 tkd = &tk_core; 1237 offs = &tk_core.timekeeper.offs_boot; 1238 break; 1239 case CLOCK_AUX ... CLOCK_AUX_LAST: 1240 tkd = aux_get_tk_data(clock_id); 1241 if (!tkd) 1242 return; 1243 offs = &tkd->timekeeper.offs_aux; 1244 break; 1245 default: 1246 WARN_ON_ONCE(1); 1247 return; 1248 } 1249 1250 tk = &tkd->timekeeper; 1251 1252 do { 1253 struct clocksource_hw_snapshot chs = { }; 1254 1255 seq = read_seqcount_begin(&tkd->seq); 1256 1257 /* Aux clocks can be invalid */ 1258 if (!tk->clock_valid) 1259 return; 1260 1261 now = tk_clock_read_snapshot(&tk->tkr_mono, &chs); 1262 systime_snapshot->cs_id = tk->tkr_mono.clock->id; 1263 1264 systime_snapshot->hw_cycles = chs.hw_cycles; 1265 systime_snapshot->hw_csid = chs.hw_csid; 1266 1267 systime_snapshot->cs_was_changed_seq = tk->cs_was_changed_seq; 1268 systime_snapshot->clock_was_set_seq = tk->clock_was_set_seq; 1269 1270 base_sys = tk->tkr_mono.base; 1271 offs_sys = *offs; 1272 base_raw = tk->tkr_raw.base; 1273 1274 nsec_sys = timekeeping_cycles_to_ns(&tk->tkr_mono, now); 1275 nsec_raw = timekeeping_cycles_to_ns(&tk->tkr_raw, now); 1276 } while (read_seqcount_retry(&tkd->seq, seq)); 1277 1278 systime_snapshot->cycles = now; 1279 systime_snapshot->systime = ktime_add_ns(base_sys, offs_sys + nsec_sys); 1280 systime_snapshot->monoraw = ktime_add_ns(base_raw, nsec_raw); 1281 1282 /* 1283 * Special case for PTP. Just transfer the raw time into sys, 1284 * so the call sites can consistently use snap::systime. 1285 */ 1286 if (clock_id == CLOCK_MONOTONIC_RAW) 1287 systime_snapshot->systime = systime_snapshot->monoraw; 1288 /* Tell the consumer that this snapshot is valid */ 1289 systime_snapshot->valid = true; 1290 } 1291 EXPORT_SYMBOL_GPL(ktime_get_snapshot_id); 1292 1293 /* Scale base by mult/div checking for overflow */ 1294 static int scale64_check_overflow(u64 mult, u64 div, u64 *base) 1295 { 1296 u64 tmp, rem; 1297 1298 tmp = div64_u64_rem(*base, div, &rem); 1299 1300 if (((int)sizeof(u64)*8 - fls64(mult) < fls64(tmp)) || 1301 ((int)sizeof(u64)*8 - fls64(mult) < fls64(rem))) 1302 return -EOVERFLOW; 1303 tmp *= mult; 1304 1305 rem = div64_u64(rem * mult, div); 1306 *base = tmp + rem; 1307 return 0; 1308 } 1309 1310 /** 1311 * adjust_historical_crosststamp - adjust crosstimestamp previous to current interval 1312 * @history: Snapshot representing start of history 1313 * @partial_history_cycles: Cycle offset into history (fractional part) 1314 * @total_history_cycles: Total history length in cycles 1315 * @discontinuity: True indicates clock was set on history period 1316 * @ts: Cross timestamp that should be adjusted using 1317 * partial/total ratio 1318 * 1319 * Helper function used by get_device_system_crosststamp() to correct the 1320 * crosstimestamp corresponding to the start of the current interval to the 1321 * system counter value (timestamp point) provided by the driver. The 1322 * total_history_* quantities are the total history starting at the provided 1323 * reference point and ending at the start of the current interval. The cycle 1324 * count between the driver timestamp point and the start of the current 1325 * interval is partial_history_cycles. 1326 */ 1327 static int adjust_historical_crosststamp(struct system_time_snapshot *history, 1328 u64 partial_history_cycles, 1329 u64 total_history_cycles, 1330 bool discontinuity, 1331 struct system_device_crosststamp *ts) 1332 { 1333 struct timekeeper *tk = &tk_core.timekeeper; 1334 u64 corr_raw, corr_sys; 1335 bool interp_forward; 1336 int ret; 1337 1338 if (total_history_cycles == 0 || partial_history_cycles == 0) 1339 return 0; 1340 1341 /* Interpolate shortest distance from beginning or end of history */ 1342 interp_forward = partial_history_cycles > total_history_cycles / 2; 1343 partial_history_cycles = interp_forward ? 1344 total_history_cycles - partial_history_cycles : 1345 partial_history_cycles; 1346 1347 /* 1348 * Scale the monotonic raw time delta by: 1349 * partial_history_cycles / total_history_cycles 1350 */ 1351 corr_raw = (u64)ktime_to_ns(ktime_sub(ts->sys_monoraw, history->monoraw)); 1352 ret = scale64_check_overflow(partial_history_cycles, 1353 total_history_cycles, &corr_raw); 1354 if (ret) 1355 return ret; 1356 1357 /* 1358 * If there is a discontinuity in the history, scale monotonic raw 1359 * correction by: 1360 * mult(sys)/mult(raw) yielding the system time correction 1361 * 1362 * Otherwise, calculate the system time correction similar to monotonic 1363 * raw calculation 1364 */ 1365 if (discontinuity) { 1366 corr_sys = mul_u64_u32_div(corr_raw, tk->tkr_mono.mult, tk->tkr_raw.mult); 1367 } else { 1368 corr_sys = (u64)ktime_to_ns(ktime_sub(ts->sys_systime, history->systime)); 1369 ret = scale64_check_overflow(partial_history_cycles, total_history_cycles, 1370 &corr_sys); 1371 if (ret) 1372 return ret; 1373 } 1374 1375 /* Fixup monotonic raw and system time time values */ 1376 if (interp_forward) { 1377 ts->sys_monoraw = ktime_add_ns(history->monoraw, corr_raw); 1378 ts->sys_systime = ktime_add_ns(history->systime, corr_sys); 1379 } else { 1380 ts->sys_monoraw = ktime_sub_ns(ts->sys_monoraw, corr_raw); 1381 ts->sys_systime = ktime_sub_ns(ts->sys_systime, corr_sys); 1382 } 1383 1384 return 0; 1385 } 1386 1387 /* 1388 * timestamp_in_interval - true if ts is chronologically in [start, end] 1389 * 1390 * True if ts occurs chronologically at or after start, and before or at end. 1391 */ 1392 static bool timestamp_in_interval(u64 start, u64 end, u64 ts) 1393 { 1394 if (ts >= start && ts <= end) 1395 return true; 1396 if (start > end && (ts >= start || ts <= end)) 1397 return true; 1398 return false; 1399 } 1400 1401 static bool convert_clock(u64 *val, u32 numerator, u32 denominator) 1402 { 1403 u64 rem, res; 1404 1405 if (!numerator || !denominator) 1406 return false; 1407 1408 res = div64_u64_rem(*val, denominator, &rem) * numerator; 1409 *val = res + div_u64(rem * numerator, denominator); 1410 return true; 1411 } 1412 1413 static bool convert_base_to_cs(struct system_counterval_t *scv) 1414 { 1415 struct clocksource *cs = tk_core.timekeeper.tkr_mono.clock; 1416 struct clocksource_base *base; 1417 u32 num, den; 1418 1419 /* The timestamp was taken from the time keeper clock source */ 1420 if (cs->id == scv->cs_id) 1421 return true; 1422 1423 /* 1424 * Check whether cs_id matches the base clock. Prevent the compiler from 1425 * re-evaluating @base as the clocksource might change concurrently. 1426 */ 1427 base = READ_ONCE(cs->base); 1428 if (!base || base->id != scv->cs_id) 1429 return false; 1430 1431 num = scv->use_nsecs ? cs->freq_khz : base->numerator; 1432 den = scv->use_nsecs ? USEC_PER_SEC : base->denominator; 1433 1434 if (!convert_clock(&scv->cycles, num, den)) 1435 return false; 1436 1437 scv->cycles += base->offset; 1438 /* Set the clocksource ID as scv::cycles is now clocksource based */ 1439 scv->cs_id = cs->id; 1440 return true; 1441 } 1442 1443 static bool convert_cs_to_base(u64 *cycles, enum clocksource_ids base_id) 1444 { 1445 struct clocksource *cs = tk_core.timekeeper.tkr_mono.clock; 1446 struct clocksource_base *base; 1447 1448 /* 1449 * Check whether base_id matches the base clock. Prevent the compiler from 1450 * re-evaluating @base as the clocksource might change concurrently. 1451 */ 1452 base = READ_ONCE(cs->base); 1453 if (!base || base->id != base_id) 1454 return false; 1455 1456 *cycles -= base->offset; 1457 if (!convert_clock(cycles, base->denominator, base->numerator)) 1458 return false; 1459 return true; 1460 } 1461 1462 static bool convert_ns_to_cs(u64 *delta) 1463 { 1464 struct tk_read_base *tkr = &tk_core.timekeeper.tkr_mono; 1465 1466 if (BITS_TO_BYTES(fls64(*delta) + tkr->shift) >= sizeof(*delta)) 1467 return false; 1468 1469 *delta = div_u64((*delta << tkr->shift) - tkr->xtime_nsec, tkr->mult); 1470 return true; 1471 } 1472 1473 /** 1474 * ktime_real_to_base_clock() - Convert CLOCK_REALTIME timestamp to a base clock timestamp 1475 * @treal: CLOCK_REALTIME timestamp to convert 1476 * @base_id: base clocksource id 1477 * @cycles: pointer to store the converted base clock timestamp 1478 * 1479 * Converts a supplied, future realtime clock value to the corresponding base clock value. 1480 * 1481 * Return: true if the conversion is successful, false otherwise. 1482 */ 1483 bool ktime_real_to_base_clock(ktime_t treal, enum clocksource_ids base_id, u64 *cycles) 1484 { 1485 struct timekeeper *tk = &tk_core.timekeeper; 1486 unsigned int seq; 1487 u64 delta; 1488 1489 do { 1490 seq = read_seqcount_begin(&tk_core.seq); 1491 if ((u64)treal < tk->tkr_mono.base_real) 1492 return false; 1493 delta = (u64)treal - tk->tkr_mono.base_real; 1494 if (!convert_ns_to_cs(&delta)) 1495 return false; 1496 *cycles = tk->tkr_mono.cycle_last + delta; 1497 if (!convert_cs_to_base(cycles, base_id)) 1498 return false; 1499 } while (read_seqcount_retry(&tk_core.seq, seq)); 1500 1501 return true; 1502 } 1503 EXPORT_SYMBOL_GPL(ktime_real_to_base_clock); 1504 1505 /** 1506 * get_device_system_crosststamp - Synchronously capture system/device timestamp 1507 * @get_time_fn: Callback to get simultaneous device time and system counter 1508 * from the device driver 1509 * @ctx: Context passed to get_time_fn() 1510 * @history_begin: Historical reference point used to interpolate system time when 1511 * the counter value provided by the driver is before the current interval 1512 * @xtstamp: Receives simultaneously captured system and device time 1513 * 1514 * Reads a timestamp from a device and correlates it to system time 1515 */ 1516 int get_device_system_crosststamp(int (*get_time_fn) 1517 (ktime_t *device_time, 1518 struct system_counterval_t *sys_counterval, 1519 void *ctx), 1520 void *ctx, 1521 struct system_time_snapshot *history_begin, 1522 struct system_device_crosststamp *xtstamp) 1523 { 1524 u64 syscnt_cycles, cycles, now, interval_start; 1525 unsigned int seq, clock_was_set_seq = 0; 1526 ktime_t base_sys, base_raw, *offs; 1527 u64 nsec_sys, nsec_raw; 1528 u8 cs_was_changed_seq; 1529 bool do_interp; 1530 struct timekeeper *tk; 1531 struct tk_data *tkd; 1532 int ret; 1533 1534 switch (xtstamp->clock_id) { 1535 case CLOCK_REALTIME: 1536 tkd = &tk_core; 1537 offs = &tk_core.timekeeper.offs_real; 1538 break; 1539 case CLOCK_AUX ... CLOCK_AUX_LAST: 1540 tkd = aux_get_tk_data(xtstamp->clock_id); 1541 if (!tkd) 1542 return -ENODEV; 1543 offs = &tkd->timekeeper.offs_aux; 1544 break; 1545 default: 1546 WARN_ON_ONCE(1); 1547 return -ENODEV; 1548 } 1549 1550 tk = &tkd->timekeeper; 1551 1552 do { 1553 seq = read_seqcount_begin(&tkd->seq); 1554 /* 1555 * Try to synchronously capture device time and a system 1556 * counter value calling back into the device driver 1557 */ 1558 ret = get_time_fn(&xtstamp->device, &xtstamp->sys_counter, ctx); 1559 if (ret) 1560 return ret; 1561 1562 /* 1563 * Verify that the clocksource ID associated with the captured 1564 * system counter value is the same as for the currently 1565 * installed timekeeper clocksource and convert to it. 1566 */ 1567 if (xtstamp->sys_counter.cs_id == CSID_GENERIC || 1568 !convert_base_to_cs(&xtstamp->sys_counter)) 1569 return -ENODEV; 1570 1571 cycles = syscnt_cycles = xtstamp->sys_counter.cycles; 1572 1573 /* 1574 * Check whether the system counter value provided by the 1575 * device driver is on the current timekeeping interval. 1576 */ 1577 now = tk_clock_read(&tk->tkr_mono); 1578 interval_start = tk->tkr_mono.cycle_last; 1579 if (!timestamp_in_interval(interval_start, now, cycles)) { 1580 clock_was_set_seq = tk->clock_was_set_seq; 1581 cs_was_changed_seq = tk->cs_was_changed_seq; 1582 cycles = interval_start; 1583 do_interp = true; 1584 } else { 1585 do_interp = false; 1586 } 1587 1588 base_sys = ktime_add(tk->tkr_mono.base, *offs); 1589 base_raw = tk->tkr_raw.base; 1590 1591 nsec_sys = timekeeping_cycles_to_ns(&tk->tkr_mono, cycles); 1592 nsec_raw = timekeeping_cycles_to_ns(&tk->tkr_raw, cycles); 1593 } while (read_seqcount_retry(&tkd->seq, seq)); 1594 1595 xtstamp->sys_systime = ktime_add_ns(base_sys, nsec_sys); 1596 xtstamp->sys_monoraw = ktime_add_ns(base_raw, nsec_raw); 1597 1598 /* 1599 * Interpolate if necessary, adjusting back from the start of the 1600 * current interval 1601 */ 1602 if (do_interp) { 1603 u64 partial_history_cycles, total_history_cycles; 1604 bool discontinuity; 1605 1606 /* 1607 * Check that the counter value is not before the provided 1608 * history reference and that the history doesn't cross a 1609 * clocksource change 1610 */ 1611 if (!history_begin || 1612 !timestamp_in_interval(history_begin->cycles, cycles, syscnt_cycles) || 1613 history_begin->cs_was_changed_seq != cs_was_changed_seq) 1614 return -EINVAL; 1615 1616 partial_history_cycles = cycles - syscnt_cycles; 1617 total_history_cycles = cycles - history_begin->cycles; 1618 discontinuity = history_begin->clock_was_set_seq != clock_was_set_seq; 1619 1620 ret = adjust_historical_crosststamp(history_begin, partial_history_cycles, 1621 total_history_cycles, discontinuity, xtstamp); 1622 } 1623 1624 return ret; 1625 } 1626 EXPORT_SYMBOL_GPL(get_device_system_crosststamp); 1627 1628 /** 1629 * timekeeping_clocksource_has_base - Check whether the current clocksource 1630 * is based on given a base clock 1631 * @id: base clocksource ID 1632 * 1633 * Note: The return value is a snapshot which can become invalid right 1634 * after the function returns. 1635 * 1636 * Return: true if the timekeeper clocksource has a base clock with @id, 1637 * false otherwise 1638 */ 1639 bool timekeeping_clocksource_has_base(enum clocksource_ids id) 1640 { 1641 /* 1642 * This is a snapshot, so no point in using the sequence 1643 * count. Just prevent the compiler from re-evaluating @base as the 1644 * clocksource might change concurrently. 1645 */ 1646 struct clocksource_base *base = READ_ONCE(tk_core.timekeeper.tkr_mono.clock->base); 1647 1648 return base ? base->id == id : false; 1649 } 1650 EXPORT_SYMBOL_GPL(timekeeping_clocksource_has_base); 1651 1652 /** 1653 * do_settimeofday64 - Sets the time of day. 1654 * @ts: pointer to the timespec64 variable containing the new time 1655 * 1656 * Sets the time of day to the new time and update NTP and notify hrtimers 1657 */ 1658 int do_settimeofday64(const struct timespec64 *ts) 1659 { 1660 struct timespec64 ts_delta, xt; 1661 1662 if (!timespec64_valid_settod(ts)) 1663 return -EINVAL; 1664 1665 scoped_guard (raw_spinlock_irqsave, &tk_core.lock) { 1666 struct timekeeper *tks = &tk_core.shadow_timekeeper; 1667 1668 timekeeping_forward_now(tks); 1669 1670 xt = tk_xtime(tks); 1671 ts_delta = timespec64_sub(*ts, xt); 1672 1673 if (timespec64_compare(&tks->wall_to_monotonic, &ts_delta) > 0) { 1674 timekeeping_restore_shadow(&tk_core); 1675 return -EINVAL; 1676 } 1677 1678 tk_set_wall_to_mono(tks, timespec64_sub(tks->wall_to_monotonic, ts_delta)); 1679 tk_set_xtime(tks, ts); 1680 timekeeping_update_from_shadow(&tk_core, TK_UPDATE_ALL); 1681 } 1682 1683 /* Signal hrtimers about time change */ 1684 clock_was_set(CLOCK_SET_WALL); 1685 1686 audit_tk_injoffset(ts_delta); 1687 add_device_randomness(ts, sizeof(*ts)); 1688 return 0; 1689 } 1690 EXPORT_SYMBOL(do_settimeofday64); 1691 1692 static inline bool timekeeper_is_core_tk(struct timekeeper *tk) 1693 { 1694 return !IS_ENABLED(CONFIG_POSIX_AUX_CLOCKS) || tk->id == TIMEKEEPER_CORE; 1695 } 1696 1697 /** 1698 * __timekeeping_inject_offset - Adds or subtracts from the current time. 1699 * @tkd: Pointer to the timekeeper to modify 1700 * @ts: Pointer to the timespec variable containing the offset 1701 * 1702 * Adds or subtracts an offset value from the current time. 1703 */ 1704 static int __timekeeping_inject_offset(struct tk_data *tkd, const struct timespec64 *ts) 1705 { 1706 struct timekeeper *tks = &tkd->shadow_timekeeper; 1707 struct timespec64 tmp; 1708 1709 if (ts->tv_nsec < 0 || ts->tv_nsec >= NSEC_PER_SEC) 1710 return -EINVAL; 1711 1712 timekeeping_forward_now(tks); 1713 1714 if (timekeeper_is_core_tk(tks)) { 1715 /* Make sure the proposed value is valid */ 1716 tmp = timespec64_add(tk_xtime(tks), *ts); 1717 if (timespec64_compare(&tks->wall_to_monotonic, ts) > 0 || 1718 !timespec64_valid_settod(&tmp)) { 1719 timekeeping_restore_shadow(tkd); 1720 return -EINVAL; 1721 } 1722 1723 tk_xtime_add(tks, ts); 1724 tk_set_wall_to_mono(tks, timespec64_sub(tks->wall_to_monotonic, *ts)); 1725 } else { 1726 struct tk_read_base *tkr_mono = &tks->tkr_mono; 1727 ktime_t now, offs; 1728 1729 /* Get the current time */ 1730 now = ktime_add_ns(tkr_mono->base, timekeeping_get_ns(tkr_mono)); 1731 /* Add the relative offset change */ 1732 offs = ktime_add(tks->offs_aux, timespec64_to_ktime(*ts)); 1733 1734 /* Prevent that the resulting time becomes negative */ 1735 if (ktime_add(now, offs) < 0) { 1736 timekeeping_restore_shadow(tkd); 1737 return -EINVAL; 1738 } 1739 tk_update_aux_offs(tks, offs); 1740 } 1741 1742 timekeeping_update_from_shadow(tkd, TK_UPDATE_ALL); 1743 return 0; 1744 } 1745 1746 static int timekeeping_inject_offset(const struct timespec64 *ts) 1747 { 1748 int ret; 1749 1750 scoped_guard (raw_spinlock_irqsave, &tk_core.lock) 1751 ret = __timekeeping_inject_offset(&tk_core, ts); 1752 1753 /* Signal hrtimers about time change */ 1754 if (!ret) 1755 clock_was_set(CLOCK_SET_WALL); 1756 return ret; 1757 } 1758 1759 /* 1760 * Indicates if there is an offset between the system clock and the hardware 1761 * clock/persistent clock/rtc. 1762 */ 1763 int persistent_clock_is_local; 1764 1765 /* 1766 * Adjust the time obtained from the CMOS to be UTC time instead of 1767 * local time. 1768 * 1769 * This is ugly, but preferable to the alternatives. Otherwise we 1770 * would either need to write a program to do it in /etc/rc (and risk 1771 * confusion if the program gets run more than once; it would also be 1772 * hard to make the program warp the clock precisely n hours) or 1773 * compile in the timezone information into the kernel. Bad, bad.... 1774 * 1775 * - TYT, 1992-01-01 1776 * 1777 * The best thing to do is to keep the CMOS clock in universal time (UTC) 1778 * as real UNIX machines always do it. This avoids all headaches about 1779 * daylight saving times and warping kernel clocks. 1780 */ 1781 void timekeeping_warp_clock(void) 1782 { 1783 if (sys_tz.tz_minuteswest != 0) { 1784 struct timespec64 adjust; 1785 1786 persistent_clock_is_local = 1; 1787 adjust.tv_sec = sys_tz.tz_minuteswest * 60; 1788 adjust.tv_nsec = 0; 1789 timekeeping_inject_offset(&adjust); 1790 } 1791 } 1792 1793 /* 1794 * __timekeeping_set_tai_offset - Sets the TAI offset from UTC and monotonic 1795 */ 1796 static void __timekeeping_set_tai_offset(struct timekeeper *tk, s32 tai_offset) 1797 { 1798 tk->tai_offset = tai_offset; 1799 tk->offs_tai = ktime_add(tk->offs_real, ktime_set(tai_offset, 0)); 1800 } 1801 1802 /* 1803 * change_clocksource - Swaps clocksources if a new one is available 1804 * 1805 * Accumulates current time interval and initializes new clocksource 1806 */ 1807 static int change_clocksource(void *data) 1808 { 1809 struct clocksource *new = data, *old = NULL; 1810 1811 /* 1812 * If the clocksource is in a module, get a module reference. 1813 * Succeeds for built-in code (owner == NULL) as well. Abort if the 1814 * reference can't be acquired. 1815 */ 1816 if (!try_module_get(new->owner)) 1817 return 0; 1818 1819 /* Abort if the device can't be enabled */ 1820 if (new->enable && new->enable(new) != 0) { 1821 module_put(new->owner); 1822 return 0; 1823 } 1824 1825 scoped_guard (raw_spinlock_irqsave, &tk_core.lock) { 1826 struct timekeeper *tks = &tk_core.shadow_timekeeper; 1827 1828 timekeeping_forward_now(tks); 1829 old = tks->tkr_mono.clock; 1830 tk_setup_internals(tks, new); 1831 timekeeping_update_from_shadow(&tk_core, TK_UPDATE_ALL); 1832 } 1833 1834 tk_aux_update_clocksource(); 1835 1836 if (old) { 1837 if (old->disable) 1838 old->disable(old); 1839 module_put(old->owner); 1840 } 1841 1842 return 0; 1843 } 1844 1845 /** 1846 * timekeeping_notify - Install a new clock source 1847 * @clock: pointer to the clock source 1848 * 1849 * This function is called from clocksource.c after a new, better clock 1850 * source has been registered. The caller holds the clocksource_mutex. 1851 */ 1852 int timekeeping_notify(struct clocksource *clock) 1853 { 1854 struct timekeeper *tk = &tk_core.timekeeper; 1855 1856 if (tk->tkr_mono.clock == clock) 1857 return 0; 1858 1859 /* Disable inlined reads accross the clocksource switch */ 1860 clocksource_disable_inline_read(); 1861 1862 stop_machine(change_clocksource, clock, NULL); 1863 1864 /* 1865 * If the clocksource has been selected and supports inlined reads 1866 * enable the branch. 1867 */ 1868 if (tk->tkr_mono.clock == clock && clock->flags & CLOCK_SOURCE_CAN_INLINE_READ) 1869 clocksource_enable_inline_read(); 1870 1871 tick_clock_notify(); 1872 return tk->tkr_mono.clock == clock ? 0 : -1; 1873 } 1874 1875 /** 1876 * ktime_get_raw_ts64 - Returns the raw monotonic time in a timespec 1877 * @ts: pointer to the timespec64 to be set 1878 * 1879 * Returns the raw monotonic time (completely un-modified by ntp) 1880 */ 1881 void ktime_get_raw_ts64(struct timespec64 *ts) 1882 { 1883 struct timekeeper *tk = &tk_core.timekeeper; 1884 unsigned int seq; 1885 u64 nsecs; 1886 1887 do { 1888 seq = read_seqcount_begin(&tk_core.seq); 1889 ts->tv_sec = tk->raw_sec; 1890 nsecs = timekeeping_get_ns(&tk->tkr_raw); 1891 1892 } while (read_seqcount_retry(&tk_core.seq, seq)); 1893 1894 ts->tv_nsec = 0; 1895 timespec64_add_ns(ts, nsecs); 1896 } 1897 EXPORT_SYMBOL(ktime_get_raw_ts64); 1898 1899 /** 1900 * ktime_get_clock_ts64 - Returns time of a clock in a timespec 1901 * @id: POSIX clock ID of the clock to read 1902 * @ts: Pointer to the timespec64 to be set 1903 * 1904 * The timestamp is invalidated (@ts->sec is set to -1) if the 1905 * clock @id is not available. 1906 */ 1907 void ktime_get_clock_ts64(clockid_t id, struct timespec64 *ts) 1908 { 1909 /* Invalidate time stamp */ 1910 ts->tv_sec = -1; 1911 ts->tv_nsec = 0; 1912 1913 switch (id) { 1914 case CLOCK_REALTIME: 1915 ktime_get_real_ts64(ts); 1916 return; 1917 case CLOCK_MONOTONIC: 1918 ktime_get_ts64(ts); 1919 return; 1920 case CLOCK_MONOTONIC_RAW: 1921 ktime_get_raw_ts64(ts); 1922 return; 1923 case CLOCK_AUX ... CLOCK_AUX_LAST: 1924 if (IS_ENABLED(CONFIG_POSIX_AUX_CLOCKS)) 1925 ktime_get_aux_ts64(id, ts); 1926 return; 1927 default: 1928 WARN_ON_ONCE(1); 1929 } 1930 } 1931 EXPORT_SYMBOL_GPL(ktime_get_clock_ts64); 1932 1933 /** 1934 * timekeeping_valid_for_hres - Check if timekeeping is suitable for hres 1935 */ 1936 int timekeeping_valid_for_hres(void) 1937 { 1938 struct timekeeper *tk = &tk_core.timekeeper; 1939 unsigned int seq; 1940 int ret; 1941 1942 do { 1943 seq = read_seqcount_begin(&tk_core.seq); 1944 1945 ret = tk->tkr_mono.clock->flags & CLOCK_SOURCE_VALID_FOR_HRES; 1946 1947 } while (read_seqcount_retry(&tk_core.seq, seq)); 1948 1949 return ret; 1950 } 1951 1952 /** 1953 * timekeeping_max_deferment - Returns max time the clocksource can be deferred 1954 */ 1955 u64 timekeeping_max_deferment(void) 1956 { 1957 struct timekeeper *tk = &tk_core.timekeeper; 1958 unsigned int seq; 1959 u64 ret; 1960 1961 do { 1962 seq = read_seqcount_begin(&tk_core.seq); 1963 1964 ret = tk->tkr_mono.clock->max_idle_ns; 1965 1966 } while (read_seqcount_retry(&tk_core.seq, seq)); 1967 1968 return ret; 1969 } 1970 1971 /** 1972 * read_persistent_clock64 - Return time from the persistent clock. 1973 * @ts: Pointer to the storage for the readout value 1974 * 1975 * Weak dummy function for arches that do not yet support it. 1976 * Reads the time from the battery backed persistent clock. 1977 * Returns a timespec with tv_sec=0 and tv_nsec=0 if unsupported. 1978 * 1979 * XXX - Do be sure to remove it once all arches implement it. 1980 */ 1981 void __weak read_persistent_clock64(struct timespec64 *ts) 1982 { 1983 ts->tv_sec = 0; 1984 ts->tv_nsec = 0; 1985 } 1986 1987 /** 1988 * read_persistent_wall_and_boot_offset - Read persistent clock, and also offset 1989 * from the boot. 1990 * @wall_time: current time as returned by persistent clock 1991 * @boot_offset: offset that is defined as wall_time - boot_time 1992 * 1993 * Weak dummy function for arches that do not yet support it. 1994 * 1995 * The default function calculates offset based on the current value of 1996 * local_clock(). This way architectures that support sched_clock() but don't 1997 * support dedicated boot time clock will provide the best estimate of the 1998 * boot time. 1999 */ 2000 void __weak __init 2001 read_persistent_wall_and_boot_offset(struct timespec64 *wall_time, 2002 struct timespec64 *boot_offset) 2003 { 2004 read_persistent_clock64(wall_time); 2005 *boot_offset = ns_to_timespec64(local_clock()); 2006 } 2007 2008 static __init void tkd_basic_setup(struct tk_data *tkd, enum timekeeper_ids tk_id, bool valid) 2009 { 2010 raw_spin_lock_init(&tkd->lock); 2011 seqcount_raw_spinlock_init(&tkd->seq, &tkd->lock); 2012 tkd->timekeeper.id = tkd->shadow_timekeeper.id = tk_id; 2013 tkd->timekeeper.clock_valid = tkd->shadow_timekeeper.clock_valid = valid; 2014 } 2015 2016 /* 2017 * Flag reflecting whether timekeeping_resume() has injected sleeptime. 2018 * 2019 * The flag starts of false and is only set when a suspend reaches 2020 * timekeeping_suspend(), timekeeping_resume() sets it to false when the 2021 * timekeeper clocksource is not stopping across suspend and has been 2022 * used to update sleep time. If the timekeeper clocksource has stopped 2023 * then the flag stays true and is used by the RTC resume code to decide 2024 * whether sleeptime must be injected and if so the flag gets false then. 2025 * 2026 * If a suspend fails before reaching timekeeping_resume() then the flag 2027 * stays false and prevents erroneous sleeptime injection. 2028 */ 2029 static bool suspend_timing_needed; 2030 2031 /* Flag for if there is a persistent clock on this platform */ 2032 static bool persistent_clock_exists; 2033 2034 /* 2035 * timekeeping_init - Initializes the clocksource and common timekeeping values 2036 */ 2037 void __init timekeeping_init(void) 2038 { 2039 struct timespec64 wall_time, boot_offset, wall_to_mono; 2040 struct timekeeper *tks = &tk_core.shadow_timekeeper; 2041 struct clocksource *clock; 2042 2043 tkd_basic_setup(&tk_core, TIMEKEEPER_CORE, true); 2044 tk_aux_setup(); 2045 2046 read_persistent_wall_and_boot_offset(&wall_time, &boot_offset); 2047 if (timespec64_valid_settod(&wall_time) && 2048 timespec64_to_ns(&wall_time) > 0) { 2049 persistent_clock_exists = true; 2050 } else if (timespec64_to_ns(&wall_time) != 0) { 2051 pr_warn("Persistent clock returned invalid value"); 2052 wall_time = (struct timespec64){0}; 2053 } 2054 2055 if (timespec64_compare(&wall_time, &boot_offset) < 0) 2056 boot_offset = (struct timespec64){0}; 2057 2058 /* 2059 * We want set wall_to_mono, so the following is true: 2060 * wall time + wall_to_mono = boot time 2061 */ 2062 wall_to_mono = timespec64_sub(boot_offset, wall_time); 2063 2064 guard(raw_spinlock_irqsave)(&tk_core.lock); 2065 2066 ntp_init(); 2067 2068 clock = clocksource_default_clock(); 2069 if (clock->enable) 2070 clock->enable(clock); 2071 tk_setup_internals(tks, clock); 2072 2073 tk_set_xtime(tks, &wall_time); 2074 tks->raw_sec = 0; 2075 2076 tk_set_wall_to_mono(tks, wall_to_mono); 2077 2078 timekeeping_update_from_shadow(&tk_core, TK_CLOCK_WAS_SET); 2079 } 2080 2081 /* time in seconds when suspend began for persistent clock */ 2082 static struct timespec64 timekeeping_suspend_time; 2083 2084 /** 2085 * __timekeeping_inject_sleeptime - Internal function to add sleep interval 2086 * @tk: Pointer to the timekeeper to be updated 2087 * @delta: Pointer to the delta value in timespec64 format 2088 * 2089 * Takes a timespec offset measuring a suspend interval and properly 2090 * adds the sleep offset to the timekeeping variables. 2091 */ 2092 static void __timekeeping_inject_sleeptime(struct timekeeper *tk, 2093 const struct timespec64 *delta) 2094 { 2095 if (!timespec64_valid_strict(delta)) { 2096 printk_deferred(KERN_WARNING 2097 "__timekeeping_inject_sleeptime: Invalid " 2098 "sleep delta value!\n"); 2099 return; 2100 } 2101 tk_xtime_add(tk, delta); 2102 tk_set_wall_to_mono(tk, timespec64_sub(tk->wall_to_monotonic, *delta)); 2103 tk_update_sleep_time(tk, timespec64_to_ktime(*delta)); 2104 tk_debug_account_sleep_time(delta); 2105 } 2106 2107 #if defined(CONFIG_PM_SLEEP) && defined(CONFIG_RTC_HCTOSYS_DEVICE) 2108 /* 2109 * We have three kinds of time sources to use for sleep time 2110 * injection, the preference order is: 2111 * 1) non-stop clocksource 2112 * 2) persistent clock (ie: RTC accessible when irqs are off) 2113 * 3) RTC 2114 * 2115 * 1) and 2) are used by timekeeping, 3) by RTC subsystem. 2116 * If system has neither 1) nor 2), 3) will be used finally. 2117 * 2118 * 2119 * If timekeeping has injected sleeptime via either 1) or 2), 2120 * 3) becomes needless, so in this case we don't need to call 2121 * rtc_resume(), and this is what timekeeping_rtc_skipresume() 2122 * means. 2123 */ 2124 bool timekeeping_rtc_skipresume(void) 2125 { 2126 return !suspend_timing_needed; 2127 } 2128 2129 /* 2130 * 1) can be determined whether to use or not only when doing 2131 * timekeeping_resume() which is invoked after rtc_suspend(), 2132 * so we can't skip rtc_suspend() surely if system has 1). 2133 * 2134 * But if system has 2), 2) will definitely be used, so in this 2135 * case we don't need to call rtc_suspend(), and this is what 2136 * timekeeping_rtc_skipsuspend() means. 2137 */ 2138 bool timekeeping_rtc_skipsuspend(void) 2139 { 2140 return persistent_clock_exists; 2141 } 2142 2143 /** 2144 * timekeeping_inject_sleeptime64 - Adds suspend interval to timeekeeping values 2145 * @delta: pointer to a timespec64 delta value 2146 * 2147 * This hook is for architectures that cannot support read_persistent_clock64 2148 * because their RTC/persistent clock is only accessible when irqs are enabled. 2149 * and also don't have an effective nonstop clocksource. 2150 * 2151 * This function should only be called by rtc_resume(), and allows 2152 * a suspend offset to be injected into the timekeeping values. 2153 */ 2154 void timekeeping_inject_sleeptime64(const struct timespec64 *delta) 2155 { 2156 scoped_guard(raw_spinlock_irqsave, &tk_core.lock) { 2157 struct timekeeper *tks = &tk_core.shadow_timekeeper; 2158 2159 suspend_timing_needed = false; 2160 timekeeping_forward_now(tks); 2161 __timekeeping_inject_sleeptime(tks, delta); 2162 timekeeping_update_from_shadow(&tk_core, TK_UPDATE_ALL); 2163 } 2164 2165 /* Signal hrtimers about time change */ 2166 clock_was_set(CLOCK_SET_WALL | CLOCK_SET_BOOT); 2167 } 2168 #endif 2169 2170 /** 2171 * timekeeping_resume - Resumes the generic timekeeping subsystem. 2172 */ 2173 void timekeeping_resume(void) 2174 { 2175 struct timekeeper *tks = &tk_core.shadow_timekeeper; 2176 struct clocksource *clock = tks->tkr_mono.clock; 2177 struct timespec64 ts_new, ts_delta; 2178 bool inject_sleeptime = false; 2179 u64 cycle_now, nsec; 2180 unsigned long flags; 2181 2182 read_persistent_clock64(&ts_new); 2183 2184 clockevents_resume(); 2185 clocksource_resume(); 2186 2187 raw_spin_lock_irqsave(&tk_core.lock, flags); 2188 2189 /* 2190 * After system resumes, we need to calculate the suspended time and 2191 * compensate it for the OS time. There are 3 sources that could be 2192 * used: Nonstop clocksource during suspend, persistent clock and rtc 2193 * device. 2194 * 2195 * One specific platform may have 1 or 2 or all of them, and the 2196 * preference will be: 2197 * suspend-nonstop clocksource -> persistent clock -> rtc 2198 * The less preferred source will only be tried if there is no better 2199 * usable source. The rtc part is handled separately in rtc core code. 2200 */ 2201 cycle_now = tk_clock_read(&tks->tkr_mono); 2202 nsec = clocksource_stop_suspend_timing(clock, cycle_now); 2203 if (nsec > 0) { 2204 ts_delta = ns_to_timespec64(nsec); 2205 inject_sleeptime = true; 2206 } else if (timespec64_compare(&ts_new, &timekeeping_suspend_time) > 0) { 2207 ts_delta = timespec64_sub(ts_new, timekeeping_suspend_time); 2208 inject_sleeptime = true; 2209 } 2210 2211 if (inject_sleeptime) { 2212 suspend_timing_needed = false; 2213 __timekeeping_inject_sleeptime(tks, &ts_delta); 2214 } 2215 2216 /* Re-base the last cycle value */ 2217 tks->tkr_mono.cycle_last = cycle_now; 2218 tks->tkr_raw.cycle_last = cycle_now; 2219 2220 tks->ntp_error = 0; 2221 timekeeping_suspended = 0; 2222 timekeeping_update_from_shadow(&tk_core, TK_CLOCK_WAS_SET); 2223 raw_spin_unlock_irqrestore(&tk_core.lock, flags); 2224 2225 touch_softlockup_watchdog(); 2226 2227 /* Resume the clockevent device(s) and hrtimers */ 2228 tick_resume(); 2229 /* Notify timerfd as resume is equivalent to clock_was_set() */ 2230 timerfd_resume(); 2231 } 2232 2233 static void timekeeping_syscore_resume(void *data) 2234 { 2235 timekeeping_resume(); 2236 } 2237 2238 int timekeeping_suspend(void) 2239 { 2240 struct timekeeper *tks = &tk_core.shadow_timekeeper; 2241 struct timespec64 delta, delta_delta; 2242 static struct timespec64 old_delta; 2243 struct clocksource *curr_clock; 2244 unsigned long flags; 2245 u64 cycle_now; 2246 2247 read_persistent_clock64(&timekeeping_suspend_time); 2248 2249 /* 2250 * On some systems the persistent_clock can not be detected at 2251 * timekeeping_init by its return value, so if we see a valid 2252 * value returned, update the persistent_clock_exists flag. 2253 */ 2254 if (timekeeping_suspend_time.tv_sec || timekeeping_suspend_time.tv_nsec) 2255 persistent_clock_exists = true; 2256 2257 suspend_timing_needed = true; 2258 2259 raw_spin_lock_irqsave(&tk_core.lock, flags); 2260 timekeeping_forward_now(tks); 2261 timekeeping_suspended = 1; 2262 2263 /* 2264 * Since we've called forward_now, cycle_last stores the value 2265 * just read from the current clocksource. Save this to potentially 2266 * use in suspend timing. 2267 */ 2268 curr_clock = tks->tkr_mono.clock; 2269 cycle_now = tks->tkr_mono.cycle_last; 2270 clocksource_start_suspend_timing(curr_clock, cycle_now); 2271 2272 if (persistent_clock_exists) { 2273 /* 2274 * To avoid drift caused by repeated suspend/resumes, 2275 * which each can add ~1 second drift error, 2276 * try to compensate so the difference in system time 2277 * and persistent_clock time stays close to constant. 2278 */ 2279 delta = timespec64_sub(tk_xtime(tks), timekeeping_suspend_time); 2280 delta_delta = timespec64_sub(delta, old_delta); 2281 if (abs(delta_delta.tv_sec) >= 2) { 2282 /* 2283 * if delta_delta is too large, assume time correction 2284 * has occurred and set old_delta to the current delta. 2285 */ 2286 old_delta = delta; 2287 } else { 2288 /* Otherwise try to adjust old_system to compensate */ 2289 timekeeping_suspend_time = 2290 timespec64_add(timekeeping_suspend_time, delta_delta); 2291 } 2292 } 2293 2294 timekeeping_update_from_shadow(&tk_core, 0); 2295 halt_fast_timekeeper(tks); 2296 raw_spin_unlock_irqrestore(&tk_core.lock, flags); 2297 2298 tick_suspend(); 2299 clocksource_suspend(); 2300 clockevents_suspend(); 2301 2302 return 0; 2303 } 2304 2305 static int timekeeping_syscore_suspend(void *data) 2306 { 2307 return timekeeping_suspend(); 2308 } 2309 2310 /* sysfs resume/suspend bits for timekeeping */ 2311 static const struct syscore_ops timekeeping_syscore_ops = { 2312 .resume = timekeeping_syscore_resume, 2313 .suspend = timekeeping_syscore_suspend, 2314 }; 2315 2316 static struct syscore timekeeping_syscore = { 2317 .ops = &timekeeping_syscore_ops, 2318 }; 2319 2320 static int __init timekeeping_init_ops(void) 2321 { 2322 register_syscore(&timekeeping_syscore); 2323 return 0; 2324 } 2325 device_initcall(timekeeping_init_ops); 2326 2327 /* 2328 * Apply a multiplier adjustment to the timekeeper 2329 */ 2330 static __always_inline void timekeeping_apply_adjustment(struct timekeeper *tk, 2331 s64 offset, 2332 s32 mult_adj) 2333 { 2334 s64 interval = tk->cycle_interval; 2335 2336 if (mult_adj == 0) { 2337 return; 2338 } else if (mult_adj == -1) { 2339 interval = -interval; 2340 offset = -offset; 2341 } else if (mult_adj != 1) { 2342 interval *= mult_adj; 2343 offset *= mult_adj; 2344 } 2345 2346 /* 2347 * So the following can be confusing. 2348 * 2349 * To keep things simple, lets assume mult_adj == 1 for now. 2350 * 2351 * When mult_adj != 1, remember that the interval and offset values 2352 * have been appropriately scaled so the math is the same. 2353 * 2354 * The basic idea here is that we're increasing the multiplier 2355 * by one, this causes the xtime_interval to be incremented by 2356 * one cycle_interval. This is because: 2357 * xtime_interval = cycle_interval * mult 2358 * So if mult is being incremented by one: 2359 * xtime_interval = cycle_interval * (mult + 1) 2360 * Its the same as: 2361 * xtime_interval = (cycle_interval * mult) + cycle_interval 2362 * Which can be shortened to: 2363 * xtime_interval += cycle_interval 2364 * 2365 * So offset stores the non-accumulated cycles. Thus the current 2366 * time (in shifted nanoseconds) is: 2367 * now = (offset * adj) + xtime_nsec 2368 * Now, even though we're adjusting the clock frequency, we have 2369 * to keep time consistent. In other words, we can't jump back 2370 * in time, and we also want to avoid jumping forward in time. 2371 * 2372 * So given the same offset value, we need the time to be the same 2373 * both before and after the freq adjustment. 2374 * now = (offset * adj_1) + xtime_nsec_1 2375 * now = (offset * adj_2) + xtime_nsec_2 2376 * So: 2377 * (offset * adj_1) + xtime_nsec_1 = 2378 * (offset * adj_2) + xtime_nsec_2 2379 * And we know: 2380 * adj_2 = adj_1 + 1 2381 * So: 2382 * (offset * adj_1) + xtime_nsec_1 = 2383 * (offset * (adj_1+1)) + xtime_nsec_2 2384 * (offset * adj_1) + xtime_nsec_1 = 2385 * (offset * adj_1) + offset + xtime_nsec_2 2386 * Canceling the sides: 2387 * xtime_nsec_1 = offset + xtime_nsec_2 2388 * Which gives us: 2389 * xtime_nsec_2 = xtime_nsec_1 - offset 2390 * Which simplifies to: 2391 * xtime_nsec -= offset 2392 */ 2393 if ((mult_adj > 0) && (tk->tkr_mono.mult + mult_adj < mult_adj)) { 2394 /* NTP adjustment caused clocksource mult overflow */ 2395 WARN_ON_ONCE(1); 2396 return; 2397 } 2398 2399 tk->tkr_mono.mult += mult_adj; 2400 tk->xtime_interval += interval; 2401 tk->tkr_mono.xtime_nsec -= offset; 2402 } 2403 2404 /* 2405 * Adjust the timekeeper's multiplier to the correct frequency 2406 * and also to reduce the accumulated error value. 2407 */ 2408 static void timekeeping_adjust(struct timekeeper *tk, s64 offset) 2409 { 2410 u64 ntp_tl = ntp_tick_length(tk->id); 2411 u32 mult; 2412 2413 /* 2414 * Determine the multiplier from the current NTP tick length. 2415 * Avoid expensive division when the tick length doesn't change. 2416 */ 2417 if (likely(tk->ntp_tick == ntp_tl)) { 2418 mult = tk->tkr_mono.mult - tk->ntp_err_mult; 2419 } else { 2420 tk->ntp_tick = ntp_tl; 2421 mult = div64_u64((tk->ntp_tick >> tk->ntp_error_shift) - 2422 tk->xtime_remainder, tk->cycle_interval); 2423 } 2424 2425 /* 2426 * If the clock is behind the NTP time, increase the multiplier by 1 2427 * to catch up with it. If it's ahead and there was a remainder in the 2428 * tick division, the clock will slow down. Otherwise it will stay 2429 * ahead until the tick length changes to a non-divisible value. 2430 */ 2431 tk->ntp_err_mult = tk->ntp_error > 0 ? 1 : 0; 2432 mult += tk->ntp_err_mult; 2433 2434 timekeeping_apply_adjustment(tk, offset, mult - tk->tkr_mono.mult); 2435 2436 if (unlikely(tk->tkr_mono.clock->maxadj && 2437 (abs(tk->tkr_mono.mult - tk->tkr_mono.clock->mult) 2438 > tk->tkr_mono.clock->maxadj))) { 2439 printk_once(KERN_WARNING 2440 "Adjusting %s more than 11%% (%ld vs %ld)\n", 2441 tk->tkr_mono.clock->name, (long)tk->tkr_mono.mult, 2442 (long)tk->tkr_mono.clock->mult + tk->tkr_mono.clock->maxadj); 2443 } 2444 2445 /* 2446 * It may be possible that when we entered this function, xtime_nsec 2447 * was very small. Further, if we're slightly speeding the clocksource 2448 * in the code above, its possible the required corrective factor to 2449 * xtime_nsec could cause it to underflow. 2450 * 2451 * Now, since we have already accumulated the second and the NTP 2452 * subsystem has been notified via second_overflow(), we need to skip 2453 * the next update. 2454 */ 2455 if (unlikely((s64)tk->tkr_mono.xtime_nsec < 0)) { 2456 tk->tkr_mono.xtime_nsec += (u64)NSEC_PER_SEC << 2457 tk->tkr_mono.shift; 2458 tk->xtime_sec--; 2459 tk->skip_second_overflow = 1; 2460 } 2461 } 2462 2463 /* 2464 * accumulate_nsecs_to_secs - Accumulates nsecs into secs 2465 * 2466 * Helper function that accumulates the nsecs greater than a second 2467 * from the xtime_nsec field to the xtime_secs field. 2468 * It also calls into the NTP code to handle leapsecond processing. 2469 */ 2470 static inline unsigned int accumulate_nsecs_to_secs(struct timekeeper *tk) 2471 { 2472 u64 nsecps = (u64)NSEC_PER_SEC << tk->tkr_mono.shift; 2473 unsigned int clock_set = 0; 2474 2475 while (tk->tkr_mono.xtime_nsec >= nsecps) { 2476 int leap; 2477 2478 tk->tkr_mono.xtime_nsec -= nsecps; 2479 tk->xtime_sec++; 2480 2481 /* 2482 * Skip NTP update if this second was accumulated before, 2483 * i.e. xtime_nsec underflowed in timekeeping_adjust() 2484 */ 2485 if (unlikely(tk->skip_second_overflow)) { 2486 tk->skip_second_overflow = 0; 2487 continue; 2488 } 2489 2490 /* Figure out if its a leap sec and apply if needed */ 2491 leap = second_overflow(tk->id, tk->xtime_sec); 2492 if (unlikely(leap)) { 2493 struct timespec64 ts; 2494 2495 tk->xtime_sec += leap; 2496 2497 ts.tv_sec = leap; 2498 ts.tv_nsec = 0; 2499 tk_set_wall_to_mono(tk, 2500 timespec64_sub(tk->wall_to_monotonic, ts)); 2501 2502 __timekeeping_set_tai_offset(tk, tk->tai_offset - leap); 2503 2504 clock_set = TK_CLOCK_WAS_SET; 2505 } 2506 } 2507 return clock_set; 2508 } 2509 2510 /* 2511 * logarithmic_accumulation - shifted accumulation of cycles 2512 * 2513 * This functions accumulates a shifted interval of cycles into 2514 * a shifted interval nanoseconds. Allows for O(log) accumulation 2515 * loop. 2516 * 2517 * Returns the unconsumed cycles. 2518 */ 2519 static u64 logarithmic_accumulation(struct timekeeper *tk, u64 offset, 2520 u32 shift, unsigned int *clock_set) 2521 { 2522 u64 interval = tk->cycle_interval << shift; 2523 u64 snsec_per_sec; 2524 2525 /* If the offset is smaller than a shifted interval, do nothing */ 2526 if (offset < interval) 2527 return offset; 2528 2529 /* Accumulate one shifted interval */ 2530 offset -= interval; 2531 tk->tkr_mono.cycle_last += interval; 2532 tk->tkr_raw.cycle_last += interval; 2533 2534 tk->tkr_mono.xtime_nsec += tk->xtime_interval << shift; 2535 *clock_set |= accumulate_nsecs_to_secs(tk); 2536 2537 /* Accumulate raw time */ 2538 tk->tkr_raw.xtime_nsec += tk->raw_interval << shift; 2539 snsec_per_sec = (u64)NSEC_PER_SEC << tk->tkr_raw.shift; 2540 while (tk->tkr_raw.xtime_nsec >= snsec_per_sec) { 2541 tk->tkr_raw.xtime_nsec -= snsec_per_sec; 2542 tk->raw_sec++; 2543 } 2544 2545 /* Accumulate error between NTP and clock interval */ 2546 tk->ntp_error += tk->ntp_tick << shift; 2547 tk->ntp_error -= (tk->xtime_interval + tk->xtime_remainder) << 2548 (tk->ntp_error_shift + shift); 2549 2550 return offset; 2551 } 2552 2553 /* 2554 * timekeeping_advance - Updates the timekeeper to the current time and 2555 * current NTP tick length 2556 */ 2557 static bool __timekeeping_advance(struct tk_data *tkd, enum timekeeping_adv_mode mode) 2558 { 2559 struct timekeeper *tk = &tkd->shadow_timekeeper; 2560 struct timekeeper *real_tk = &tkd->timekeeper; 2561 unsigned int clock_set = 0; 2562 int shift = 0, maxshift; 2563 u64 offset, orig_offset; 2564 2565 /* Make sure we're fully resumed: */ 2566 if (unlikely(timekeeping_suspended)) 2567 return false; 2568 2569 offset = clocksource_delta(tk_clock_read(&tk->tkr_mono), 2570 tk->tkr_mono.cycle_last, tk->tkr_mono.mask, 2571 tk->tkr_mono.clock->max_raw_delta); 2572 orig_offset = offset; 2573 /* Check if there's really nothing to do */ 2574 if (offset < real_tk->cycle_interval && mode == TK_ADV_TICK) 2575 return false; 2576 2577 /* 2578 * With NO_HZ we may have to accumulate many cycle_intervals 2579 * (think "ticks") worth of time at once. To do this efficiently, 2580 * we calculate the largest doubling multiple of cycle_intervals 2581 * that is smaller than the offset. We then accumulate that 2582 * chunk in one go, and then try to consume the next smaller 2583 * doubled multiple. 2584 */ 2585 shift = ilog2(offset) - ilog2(tk->cycle_interval); 2586 shift = max(0, shift); 2587 /* Bound shift to one less than what overflows tick_length */ 2588 maxshift = (64 - (ilog2(ntp_tick_length(tk->id)) + 1)) - 1; 2589 shift = min(shift, maxshift); 2590 while (offset >= tk->cycle_interval) { 2591 offset = logarithmic_accumulation(tk, offset, shift, &clock_set); 2592 if (offset < tk->cycle_interval<<shift) 2593 shift--; 2594 } 2595 2596 /* Adjust the multiplier to correct NTP error */ 2597 timekeeping_adjust(tk, offset); 2598 2599 /* 2600 * Finally, make sure that after the rounding 2601 * xtime_nsec isn't larger than NSEC_PER_SEC 2602 */ 2603 clock_set |= accumulate_nsecs_to_secs(tk); 2604 2605 /* 2606 * To avoid inconsistencies caused adjtimex TK_ADV_FREQ calls 2607 * making small negative adjustments to the base xtime_nsec 2608 * value, only update the coarse clocks if we accumulated time 2609 */ 2610 if (orig_offset != offset) 2611 tk_update_coarse_nsecs(tk); 2612 2613 timekeeping_update_from_shadow(tkd, clock_set); 2614 2615 return !!clock_set; 2616 } 2617 2618 static bool timekeeping_advance(enum timekeeping_adv_mode mode) 2619 { 2620 guard(raw_spinlock_irqsave)(&tk_core.lock); 2621 return __timekeeping_advance(&tk_core, mode); 2622 } 2623 2624 /** 2625 * update_wall_time - Uses the current clocksource to increment the wall time 2626 * 2627 * It also updates the enabled auxiliary clock timekeepers 2628 */ 2629 void update_wall_time(void) 2630 { 2631 if (timekeeping_advance(TK_ADV_TICK)) 2632 clock_was_set_delayed(); 2633 tk_aux_advance(); 2634 } 2635 2636 /** 2637 * getboottime64 - Return the real time of system boot. 2638 * @ts: pointer to the timespec64 to be set 2639 * 2640 * Returns the wall-time of boot in a timespec64. 2641 * 2642 * This is based on the wall_to_monotonic offset and the total suspend 2643 * time. Calls to settimeofday will affect the value returned (which 2644 * basically means that however wrong your real time clock is at boot time, 2645 * you get the right time here). 2646 */ 2647 void getboottime64(struct timespec64 *ts) 2648 { 2649 struct timekeeper *tk = &tk_core.timekeeper; 2650 ktime_t t = ktime_sub(tk->offs_real, tk->offs_boot); 2651 2652 *ts = ktime_to_timespec64(t); 2653 } 2654 EXPORT_SYMBOL_GPL(getboottime64); 2655 2656 void ktime_get_coarse_real_ts64(struct timespec64 *ts) 2657 { 2658 struct timekeeper *tk = &tk_core.timekeeper; 2659 unsigned int seq; 2660 2661 do { 2662 seq = read_seqcount_begin(&tk_core.seq); 2663 2664 *ts = tk_xtime_coarse(tk); 2665 } while (read_seqcount_retry(&tk_core.seq, seq)); 2666 } 2667 EXPORT_SYMBOL(ktime_get_coarse_real_ts64); 2668 2669 /** 2670 * ktime_get_coarse_real_ts64_mg - return latter of coarse grained time or floor 2671 * @ts: timespec64 to be filled 2672 * 2673 * Fetch the global mg_floor value, convert it to realtime and compare it 2674 * to the current coarse-grained time. Fill @ts with whichever is 2675 * latest. Note that this is a filesystem-specific interface and should be 2676 * avoided outside of that context. 2677 */ 2678 void ktime_get_coarse_real_ts64_mg(struct timespec64 *ts) 2679 { 2680 struct timekeeper *tk = &tk_core.timekeeper; 2681 u64 floor = atomic64_read(&mg_floor); 2682 ktime_t f_real, offset, coarse; 2683 unsigned int seq; 2684 2685 do { 2686 seq = read_seqcount_begin(&tk_core.seq); 2687 *ts = tk_xtime_coarse(tk); 2688 offset = tk_core.timekeeper.offs_real; 2689 } while (read_seqcount_retry(&tk_core.seq, seq)); 2690 2691 coarse = timespec64_to_ktime(*ts); 2692 f_real = ktime_add(floor, offset); 2693 if (ktime_after(f_real, coarse)) 2694 *ts = ktime_to_timespec64(f_real); 2695 } 2696 2697 /** 2698 * ktime_get_real_ts64_mg - attempt to update floor value and return result 2699 * @ts: pointer to the timespec to be set 2700 * 2701 * Get a monotonic fine-grained time value and attempt to swap it into 2702 * mg_floor. If that succeeds then accept the new floor value. If it fails 2703 * then another task raced in during the interim time and updated the 2704 * floor. Since any update to the floor must be later than the previous 2705 * floor, either outcome is acceptable. 2706 * 2707 * Typically this will be called after calling ktime_get_coarse_real_ts64_mg(), 2708 * and determining that the resulting coarse-grained timestamp did not effect 2709 * a change in ctime. Any more recent floor value would effect a change to 2710 * ctime, so there is no need to retry the atomic64_try_cmpxchg() on failure. 2711 * 2712 * @ts will be filled with the latest floor value, regardless of the outcome of 2713 * the cmpxchg. Note that this is a filesystem specific interface and should be 2714 * avoided outside of that context. 2715 */ 2716 void ktime_get_real_ts64_mg(struct timespec64 *ts) 2717 { 2718 struct timekeeper *tk = &tk_core.timekeeper; 2719 ktime_t old = atomic64_read(&mg_floor); 2720 ktime_t offset, mono; 2721 unsigned int seq; 2722 u64 nsecs; 2723 2724 do { 2725 seq = read_seqcount_begin(&tk_core.seq); 2726 2727 ts->tv_sec = tk->xtime_sec; 2728 mono = tk->tkr_mono.base; 2729 nsecs = timekeeping_get_ns(&tk->tkr_mono); 2730 offset = tk_core.timekeeper.offs_real; 2731 } while (read_seqcount_retry(&tk_core.seq, seq)); 2732 2733 mono = ktime_add_ns(mono, nsecs); 2734 2735 /* 2736 * Attempt to update the floor with the new time value. As any 2737 * update must be later then the existing floor, and would effect 2738 * a change to ctime from the perspective of the current task, 2739 * accept the resulting floor value regardless of the outcome of 2740 * the swap. 2741 */ 2742 if (atomic64_try_cmpxchg(&mg_floor, &old, mono)) { 2743 ts->tv_nsec = 0; 2744 timespec64_add_ns(ts, nsecs); 2745 timekeeping_inc_mg_floor_swaps(); 2746 } else { 2747 /* 2748 * Another task changed mg_floor since "old" was fetched. 2749 * "old" has been updated with the latest value of "mg_floor". 2750 * That value is newer than the previous floor value, which 2751 * is enough to effect a change to ctime. Accept it. 2752 */ 2753 *ts = ktime_to_timespec64(ktime_add(old, offset)); 2754 } 2755 } 2756 2757 void ktime_get_coarse_ts64(struct timespec64 *ts) 2758 { 2759 struct timekeeper *tk = &tk_core.timekeeper; 2760 struct timespec64 now, mono; 2761 unsigned int seq; 2762 2763 do { 2764 seq = read_seqcount_begin(&tk_core.seq); 2765 2766 now = tk_xtime_coarse(tk); 2767 mono = tk->wall_to_monotonic; 2768 } while (read_seqcount_retry(&tk_core.seq, seq)); 2769 2770 set_normalized_timespec64(ts, now.tv_sec + mono.tv_sec, 2771 now.tv_nsec + mono.tv_nsec); 2772 } 2773 EXPORT_SYMBOL(ktime_get_coarse_ts64); 2774 2775 /* 2776 * Must hold jiffies_lock 2777 */ 2778 void do_timer(unsigned long ticks) 2779 { 2780 jiffies_64 += ticks; 2781 calc_global_load(); 2782 } 2783 2784 /** 2785 * ktime_get_update_offsets_now - hrtimer helper 2786 * @cwsseq: pointer to check and store the clock was set sequence number 2787 * @offs_real: pointer to storage for monotonic -> realtime offset 2788 * @offs_boot: pointer to storage for monotonic -> boottime offset 2789 * @offs_tai: pointer to storage for monotonic -> clock tai offset 2790 * 2791 * Returns current monotonic time and updates the offsets if the 2792 * sequence number in @cwsseq and timekeeper.clock_was_set_seq are 2793 * different. 2794 * 2795 * Called from hrtimer_interrupt() or retrigger_next_event() 2796 */ 2797 ktime_t ktime_get_update_offsets_now(unsigned int *cwsseq, ktime_t *offs_real, 2798 ktime_t *offs_boot, ktime_t *offs_tai) 2799 { 2800 struct timekeeper *tk = &tk_core.timekeeper; 2801 unsigned int seq; 2802 ktime_t base; 2803 u64 nsecs; 2804 2805 do { 2806 seq = read_seqcount_begin(&tk_core.seq); 2807 2808 base = tk->tkr_mono.base; 2809 nsecs = timekeeping_get_ns(&tk->tkr_mono); 2810 base = ktime_add_ns(base, nsecs); 2811 2812 if (*cwsseq != tk->clock_was_set_seq) { 2813 *cwsseq = tk->clock_was_set_seq; 2814 *offs_real = tk->offs_real; 2815 *offs_boot = tk->offs_boot; 2816 *offs_tai = tk->offs_tai; 2817 } 2818 2819 /* Handle leapsecond insertion adjustments */ 2820 if (unlikely(base >= tk->next_leap_ktime)) 2821 *offs_real = ktime_sub(tk->offs_real, ktime_set(1, 0)); 2822 2823 } while (read_seqcount_retry(&tk_core.seq, seq)); 2824 2825 return base; 2826 } 2827 2828 /* 2829 * timekeeping_validate_timex - Ensures the timex is ok for use in do_adjtimex 2830 */ 2831 static int timekeeping_validate_timex(const struct __kernel_timex *txc, bool aux_clock) 2832 { 2833 if (txc->modes & ADJ_ADJTIME) { 2834 /* singleshot must not be used with any other mode bits */ 2835 if (!(txc->modes & ADJ_OFFSET_SINGLESHOT)) 2836 return -EINVAL; 2837 if (!(txc->modes & ADJ_OFFSET_READONLY) && 2838 !capable(CAP_SYS_TIME)) 2839 return -EPERM; 2840 } else { 2841 /* In order to modify anything, you gotta be super-user! */ 2842 if (txc->modes && !capable(CAP_SYS_TIME)) 2843 return -EPERM; 2844 /* 2845 * if the quartz is off by more than 10% then 2846 * something is VERY wrong! 2847 */ 2848 if (txc->modes & ADJ_TICK && 2849 (txc->tick < 900000/USER_HZ || 2850 txc->tick > 1100000/USER_HZ)) 2851 return -EINVAL; 2852 } 2853 2854 if (txc->modes & ADJ_SETOFFSET) { 2855 /* In order to inject time, you gotta be super-user! */ 2856 if (!capable(CAP_SYS_TIME)) 2857 return -EPERM; 2858 2859 /* 2860 * Validate if a timespec/timeval used to inject a time 2861 * offset is valid. Offsets can be positive or negative, so 2862 * we don't check tv_sec. The value of the timeval/timespec 2863 * is the sum of its fields,but *NOTE*: 2864 * The field tv_usec/tv_nsec must always be non-negative and 2865 * we can't have more nanoseconds/microseconds than a second. 2866 */ 2867 if (txc->time.tv_usec < 0) 2868 return -EINVAL; 2869 2870 if (txc->modes & ADJ_NANO) { 2871 if (txc->time.tv_usec >= NSEC_PER_SEC) 2872 return -EINVAL; 2873 } else { 2874 if (txc->time.tv_usec >= USEC_PER_SEC) 2875 return -EINVAL; 2876 } 2877 } 2878 2879 /* 2880 * Check for potential multiplication overflows that can 2881 * only happen on 64-bit systems: 2882 */ 2883 if ((txc->modes & ADJ_FREQUENCY) && (BITS_PER_LONG == 64)) { 2884 if (LLONG_MIN / PPM_SCALE > txc->freq) 2885 return -EINVAL; 2886 if (LLONG_MAX / PPM_SCALE < txc->freq) 2887 return -EINVAL; 2888 } 2889 2890 if (aux_clock) { 2891 /* Auxiliary clocks are similar to TAI and do not have leap seconds */ 2892 if (txc->modes & ADJ_STATUS && 2893 txc->status & (STA_INS | STA_DEL)) 2894 return -EINVAL; 2895 2896 /* No TAI offset setting */ 2897 if (txc->modes & ADJ_TAI) 2898 return -EINVAL; 2899 2900 /* No PPS support either */ 2901 if (txc->modes & ADJ_STATUS && 2902 txc->status & (STA_PPSFREQ | STA_PPSTIME)) 2903 return -EINVAL; 2904 } 2905 2906 return 0; 2907 } 2908 2909 /** 2910 * random_get_entropy_fallback - Returns the raw clock source value, 2911 * used by random.c for platforms with no valid random_get_entropy(). 2912 */ 2913 unsigned long random_get_entropy_fallback(void) 2914 { 2915 struct tk_read_base *tkr = &tk_core.timekeeper.tkr_mono; 2916 struct clocksource *clock = READ_ONCE(tkr->clock); 2917 2918 if (unlikely(timekeeping_suspended || !clock)) 2919 return 0; 2920 return clock->read(clock); 2921 } 2922 EXPORT_SYMBOL_GPL(random_get_entropy_fallback); 2923 2924 struct adjtimex_result { 2925 struct audit_ntp_data ad; 2926 struct timespec64 delta; 2927 bool clock_set; 2928 }; 2929 2930 static int __do_adjtimex(struct tk_data *tkd, struct __kernel_timex *txc, 2931 struct adjtimex_result *result) 2932 { 2933 struct timekeeper *tks = &tkd->shadow_timekeeper; 2934 bool aux_clock = !timekeeper_is_core_tk(tks); 2935 struct timespec64 ts; 2936 s32 orig_tai, tai; 2937 int ret; 2938 2939 /* Validate the data before disabling interrupts */ 2940 ret = timekeeping_validate_timex(txc, aux_clock); 2941 if (ret) 2942 return ret; 2943 add_device_randomness(txc, sizeof(*txc)); 2944 2945 if (!aux_clock) 2946 ktime_get_real_ts64(&ts); 2947 else 2948 tk_get_aux_ts64(tkd->timekeeper.id, &ts); 2949 2950 add_device_randomness(&ts, sizeof(ts)); 2951 2952 guard(raw_spinlock_irqsave)(&tkd->lock); 2953 2954 if (!tks->clock_valid) 2955 return -ENODEV; 2956 2957 if (txc->modes & ADJ_SETOFFSET) { 2958 result->delta.tv_sec = txc->time.tv_sec; 2959 result->delta.tv_nsec = txc->time.tv_usec; 2960 if (!(txc->modes & ADJ_NANO)) 2961 result->delta.tv_nsec *= 1000; 2962 ret = __timekeeping_inject_offset(tkd, &result->delta); 2963 if (ret) 2964 return ret; 2965 result->clock_set = true; 2966 } 2967 2968 orig_tai = tai = tks->tai_offset; 2969 ret = ntp_adjtimex(tks->id, txc, &ts, &tai, &result->ad); 2970 2971 if (tai != orig_tai) { 2972 __timekeeping_set_tai_offset(tks, tai); 2973 timekeeping_update_from_shadow(tkd, TK_CLOCK_WAS_SET); 2974 result->clock_set = true; 2975 } else { 2976 tk_update_leap_state_all(tkd); 2977 } 2978 2979 /* Update the multiplier immediately if frequency was set directly */ 2980 if (txc->modes & (ADJ_FREQUENCY | ADJ_TICK)) 2981 result->clock_set |= __timekeeping_advance(tkd, TK_ADV_FREQ); 2982 2983 return ret; 2984 } 2985 2986 /** 2987 * do_adjtimex() - Accessor function to NTP __do_adjtimex function 2988 * @txc: Pointer to kernel_timex structure containing NTP parameters 2989 */ 2990 int do_adjtimex(struct __kernel_timex *txc) 2991 { 2992 struct adjtimex_result result = { }; 2993 int ret; 2994 2995 ret = __do_adjtimex(&tk_core, txc, &result); 2996 if (ret < 0) 2997 return ret; 2998 2999 if (txc->modes & ADJ_SETOFFSET) 3000 audit_tk_injoffset(result.delta); 3001 3002 audit_ntp_log(&result.ad); 3003 3004 if (result.clock_set) 3005 clock_was_set(CLOCK_SET_WALL); 3006 3007 ntp_notify_cmos_timer(result.delta.tv_sec != 0); 3008 3009 return ret; 3010 } 3011 3012 /* 3013 * Invoked from NTP with the time keeper lock held, so lockless access is 3014 * fine. 3015 */ 3016 long ktime_get_ntp_seconds(unsigned int id) 3017 { 3018 return timekeeper_data[id].timekeeper.xtime_sec; 3019 } 3020 3021 #ifdef CONFIG_NTP_PPS 3022 /** 3023 * hardpps() - Accessor function to NTP __hardpps function 3024 * @phase_ts: Pointer to timespec64 structure representing phase timestamp 3025 * @raw_ts: Pointer to timespec64 structure representing raw timestamp 3026 */ 3027 void hardpps(const struct timespec64 *phase_ts, const struct timespec64 *raw_ts) 3028 { 3029 guard(raw_spinlock_irqsave)(&tk_core.lock); 3030 __hardpps(phase_ts, raw_ts); 3031 } 3032 EXPORT_SYMBOL(hardpps); 3033 #endif /* CONFIG_NTP_PPS */ 3034 3035 #ifdef CONFIG_POSIX_AUX_CLOCKS 3036 #include "posix-timers.h" 3037 3038 /* 3039 * Bitmap for the activated auxiliary timekeepers to allow lockless quick 3040 * checks in the hot paths without touching extra cache lines. If set, then 3041 * the state of the corresponding timekeeper has to be re-checked under 3042 * timekeeper::lock. 3043 */ 3044 static unsigned long aux_timekeepers; 3045 3046 static inline unsigned int clockid_to_tkid(unsigned int id) 3047 { 3048 return TIMEKEEPER_AUX_FIRST + id - CLOCK_AUX; 3049 } 3050 3051 static inline struct tk_data *aux_get_tk_data(clockid_t id) 3052 { 3053 if (!clockid_aux_valid(id)) 3054 return NULL; 3055 return &timekeeper_data[clockid_to_tkid(id)]; 3056 } 3057 3058 /* Invoked from timekeeping after a clocksource change */ 3059 static void tk_aux_update_clocksource(void) 3060 { 3061 unsigned long active = READ_ONCE(aux_timekeepers); 3062 unsigned int id; 3063 3064 for_each_set_bit(id, &active, BITS_PER_LONG) { 3065 struct tk_data *tkd = &timekeeper_data[id + TIMEKEEPER_AUX_FIRST]; 3066 struct timekeeper *tks = &tkd->shadow_timekeeper; 3067 3068 guard(raw_spinlock_irqsave)(&tkd->lock); 3069 if (!tks->clock_valid) 3070 continue; 3071 3072 timekeeping_forward_now(tks); 3073 tk_setup_internals(tks, tk_core.timekeeper.tkr_raw.clock); 3074 timekeeping_update_from_shadow(tkd, TK_UPDATE_ALL); 3075 } 3076 } 3077 3078 static void tk_aux_advance(void) 3079 { 3080 unsigned long active = READ_ONCE(aux_timekeepers); 3081 unsigned int id; 3082 3083 /* Lockless quick check to avoid extra cache lines */ 3084 for_each_set_bit(id, &active, BITS_PER_LONG) { 3085 struct tk_data *aux_tkd = &timekeeper_data[id + TIMEKEEPER_AUX_FIRST]; 3086 3087 guard(raw_spinlock)(&aux_tkd->lock); 3088 if (aux_tkd->shadow_timekeeper.clock_valid) 3089 __timekeeping_advance(aux_tkd, TK_ADV_TICK); 3090 } 3091 } 3092 3093 /** 3094 * ktime_get_aux - Get time for a AUX clock 3095 * @id: ID of the clock to read (CLOCK_AUX...) 3096 * @kt: Pointer to ktime_t to store the time stamp 3097 * 3098 * Returns: True if the timestamp is valid, false otherwise 3099 */ 3100 bool ktime_get_aux(clockid_t id, ktime_t *kt) 3101 { 3102 struct tk_data *aux_tkd = aux_get_tk_data(id); 3103 struct timekeeper *aux_tk; 3104 unsigned int seq; 3105 ktime_t base; 3106 u64 nsecs; 3107 3108 WARN_ON(timekeeping_suspended); 3109 3110 if (!aux_tkd) 3111 return false; 3112 3113 aux_tk = &aux_tkd->timekeeper; 3114 do { 3115 seq = read_seqcount_begin(&aux_tkd->seq); 3116 if (!aux_tk->clock_valid) 3117 return false; 3118 3119 base = ktime_add(aux_tk->tkr_mono.base, aux_tk->offs_aux); 3120 nsecs = timekeeping_get_ns(&aux_tk->tkr_mono); 3121 } while (read_seqcount_retry(&aux_tkd->seq, seq)); 3122 3123 *kt = ktime_add_ns(base, nsecs); 3124 return true; 3125 } 3126 EXPORT_SYMBOL_GPL(ktime_get_aux); 3127 3128 /** 3129 * ktime_get_aux_ts64 - Get time for a AUX clock 3130 * @id: ID of the clock to read (CLOCK_AUX...) 3131 * @ts: Pointer to timespec64 to store the time stamp 3132 * 3133 * Returns: True if the timestamp is valid, false otherwise 3134 */ 3135 bool ktime_get_aux_ts64(clockid_t id, struct timespec64 *ts) 3136 { 3137 ktime_t now; 3138 3139 if (!ktime_get_aux(id, &now)) 3140 return false; 3141 *ts = ktime_to_timespec64(now); 3142 return true; 3143 } 3144 EXPORT_SYMBOL_GPL(ktime_get_aux_ts64); 3145 3146 static int aux_get_res(clockid_t id, struct timespec64 *tp) 3147 { 3148 if (!clockid_aux_valid(id)) 3149 return -ENODEV; 3150 3151 tp->tv_sec = aux_clock_resolution_ns() / NSEC_PER_SEC; 3152 tp->tv_nsec = aux_clock_resolution_ns() % NSEC_PER_SEC; 3153 return 0; 3154 } 3155 3156 static int aux_get_timespec(clockid_t id, struct timespec64 *tp) 3157 { 3158 return ktime_get_aux_ts64(id, tp) ? 0 : -ENODEV; 3159 } 3160 3161 static int aux_clock_set(const clockid_t id, const struct timespec64 *tnew) 3162 { 3163 struct tk_data *aux_tkd = aux_get_tk_data(id); 3164 struct timekeeper *aux_tks; 3165 ktime_t tnow, nsecs; 3166 3167 if (!timespec64_valid_settod(tnew)) 3168 return -EINVAL; 3169 if (!aux_tkd) 3170 return -ENODEV; 3171 3172 aux_tks = &aux_tkd->shadow_timekeeper; 3173 3174 guard(raw_spinlock_irq)(&aux_tkd->lock); 3175 if (!aux_tks->clock_valid) 3176 return -ENODEV; 3177 3178 /* Forward the timekeeper base time */ 3179 timekeeping_forward_now(aux_tks); 3180 /* 3181 * Get the updated base time. tkr_mono.base has not been 3182 * updated yet, so do that first. That makes the update 3183 * in timekeeping_update_from_shadow() redundant, but 3184 * that's harmless. After that @tnow can be calculated 3185 * by using tkr_mono::cycle_last, which has been set 3186 * by timekeeping_forward_now(). 3187 */ 3188 tk_update_ktime_data(aux_tks); 3189 nsecs = timekeeping_cycles_to_ns(&aux_tks->tkr_mono, aux_tks->tkr_mono.cycle_last); 3190 tnow = ktime_add(aux_tks->tkr_mono.base, nsecs); 3191 3192 /* 3193 * Calculate the new AUX offset as delta to @tnow ("monotonic"). 3194 * That avoids all the tk::xtime back and forth conversions as 3195 * xtime ("realtime") is not applicable for auxiliary clocks and 3196 * kept in sync with "monotonic". 3197 */ 3198 tk_update_aux_offs(aux_tks, ktime_sub(timespec64_to_ktime(*tnew), tnow)); 3199 3200 timekeeping_update_from_shadow(aux_tkd, TK_UPDATE_ALL); 3201 return 0; 3202 } 3203 3204 static int aux_clock_adj(const clockid_t id, struct __kernel_timex *txc) 3205 { 3206 struct tk_data *aux_tkd = aux_get_tk_data(id); 3207 struct adjtimex_result result = { }; 3208 3209 if (!aux_tkd) 3210 return -ENODEV; 3211 3212 /* 3213 * @result is ignored for now as there are neither hrtimers nor a 3214 * RTC related to auxiliary clocks for now. 3215 */ 3216 return __do_adjtimex(aux_tkd, txc, &result); 3217 } 3218 3219 const struct k_clock clock_aux = { 3220 .clock_getres = aux_get_res, 3221 .clock_get_timespec = aux_get_timespec, 3222 .clock_set = aux_clock_set, 3223 .clock_adj = aux_clock_adj, 3224 }; 3225 3226 static void aux_clock_enable(clockid_t id) 3227 { 3228 struct tk_read_base *tkr_raw = &tk_core.timekeeper.tkr_raw; 3229 struct tk_data *aux_tkd = aux_get_tk_data(id); 3230 struct timekeeper *aux_tks = &aux_tkd->shadow_timekeeper; 3231 3232 /* Prevent the core timekeeper from changing. */ 3233 guard(raw_spinlock_irq)(&tk_core.lock); 3234 3235 /* 3236 * Setup the auxiliary clock assuming that the raw core timekeeper 3237 * clock frequency conversion is close enough. Userspace has to 3238 * adjust for the deviation via clock_adjtime(2). 3239 */ 3240 guard(raw_spinlock_nested)(&aux_tkd->lock); 3241 3242 /* Remove leftovers of a previous registration */ 3243 memset(aux_tks, 0, sizeof(*aux_tks)); 3244 /* Restore the timekeeper id */ 3245 aux_tks->id = aux_tkd->timekeeper.id; 3246 /* Setup the timekeeper based on the current system clocksource */ 3247 tk_setup_internals(aux_tks, tkr_raw->clock); 3248 3249 /* Mark it valid and set it live */ 3250 aux_tks->clock_valid = true; 3251 timekeeping_update_from_shadow(aux_tkd, TK_UPDATE_ALL); 3252 } 3253 3254 static void aux_clock_disable(clockid_t id) 3255 { 3256 struct tk_data *aux_tkd = aux_get_tk_data(id); 3257 3258 guard(raw_spinlock_irq)(&aux_tkd->lock); 3259 aux_tkd->shadow_timekeeper.clock_valid = false; 3260 timekeeping_update_from_shadow(aux_tkd, TK_UPDATE_ALL); 3261 } 3262 3263 static DEFINE_MUTEX(aux_clock_mutex); 3264 3265 static ssize_t aux_clock_enable_store(struct kobject *kobj, struct kobj_attribute *attr, 3266 const char *buf, size_t count) 3267 { 3268 /* Lazy atoi() as name is "0..7" */ 3269 int id = kobj->name[0] & 0x7; 3270 bool enable; 3271 3272 if (!capable(CAP_SYS_TIME)) 3273 return -EPERM; 3274 3275 if (kstrtobool(buf, &enable) < 0) 3276 return -EINVAL; 3277 3278 guard(mutex)(&aux_clock_mutex); 3279 if (enable == test_bit(id, &aux_timekeepers)) 3280 return count; 3281 3282 if (enable) { 3283 aux_clock_enable(CLOCK_AUX + id); 3284 set_bit(id, &aux_timekeepers); 3285 } else { 3286 aux_clock_disable(CLOCK_AUX + id); 3287 clear_bit(id, &aux_timekeepers); 3288 } 3289 return count; 3290 } 3291 3292 static ssize_t aux_clock_enable_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) 3293 { 3294 unsigned long active = READ_ONCE(aux_timekeepers); 3295 /* Lazy atoi() as name is "0..7" */ 3296 int id = kobj->name[0] & 0x7; 3297 3298 return sysfs_emit(buf, "%d\n", test_bit(id, &active)); 3299 } 3300 3301 static struct kobj_attribute aux_clock_enable_attr = __ATTR_RW(aux_clock_enable); 3302 3303 static struct attribute *aux_clock_enable_attrs[] = { 3304 &aux_clock_enable_attr.attr, 3305 NULL 3306 }; 3307 3308 static const struct attribute_group aux_clock_enable_attr_group = { 3309 .attrs = aux_clock_enable_attrs, 3310 }; 3311 3312 static int __init tk_aux_sysfs_init(void) 3313 { 3314 struct kobject *auxo, *tko = kobject_create_and_add("time", kernel_kobj); 3315 int ret = -ENOMEM; 3316 3317 if (!tko) 3318 return ret; 3319 3320 auxo = kobject_create_and_add("aux_clocks", tko); 3321 if (!auxo) 3322 goto err_clean; 3323 3324 for (int i = 0; i < MAX_AUX_CLOCKS; i++) { 3325 char id[2] = { [0] = '0' + i, }; 3326 struct kobject *clk = kobject_create_and_add(id, auxo); 3327 3328 if (!clk) { 3329 ret = -ENOMEM; 3330 goto err_clean; 3331 } 3332 3333 ret = sysfs_create_group(clk, &aux_clock_enable_attr_group); 3334 if (ret) 3335 goto err_clean; 3336 } 3337 return 0; 3338 3339 err_clean: 3340 kobject_put(auxo); 3341 kobject_put(tko); 3342 return ret; 3343 } 3344 late_initcall(tk_aux_sysfs_init); 3345 3346 static __init void tk_aux_setup(void) 3347 { 3348 for (int i = TIMEKEEPER_AUX_FIRST; i <= TIMEKEEPER_AUX_LAST; i++) 3349 tkd_basic_setup(&timekeeper_data[i], i, false); 3350 } 3351 #endif /* CONFIG_POSIX_AUX_CLOCKS */ 3352