1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Kernel timekeeping code and accessor functions. Based on code from 4 * timer.c, moved in commit 8524070b7982. 5 */ 6 #include <linux/timekeeper_internal.h> 7 #include <linux/module.h> 8 #include <linux/interrupt.h> 9 #include <linux/kobject.h> 10 #include <linux/percpu.h> 11 #include <linux/init.h> 12 #include <linux/mm.h> 13 #include <linux/nmi.h> 14 #include <linux/sched.h> 15 #include <linux/sched/loadavg.h> 16 #include <linux/sched/clock.h> 17 #include <linux/syscore_ops.h> 18 #include <linux/clocksource.h> 19 #include <linux/jiffies.h> 20 #include <linux/time.h> 21 #include <linux/timex.h> 22 #include <linux/tick.h> 23 #include <linux/stop_machine.h> 24 #include <linux/pvclock_gtod.h> 25 #include <linux/compiler.h> 26 #include <linux/audit.h> 27 #include <linux/random.h> 28 29 #include <vdso/auxclock.h> 30 31 #include "tick-internal.h" 32 #include "ntp_internal.h" 33 #include "timekeeping_internal.h" 34 35 #define TK_CLEAR_NTP (1 << 0) 36 #define TK_CLOCK_WAS_SET (1 << 1) 37 38 #define TK_UPDATE_ALL (TK_CLEAR_NTP | TK_CLOCK_WAS_SET) 39 40 enum timekeeping_adv_mode { 41 /* Update timekeeper when a tick has passed */ 42 TK_ADV_TICK, 43 44 /* Update timekeeper on a direct frequency change */ 45 TK_ADV_FREQ 46 }; 47 48 /* 49 * The most important data for readout fits into a single 64 byte 50 * cache line. 51 */ 52 struct tk_data { 53 seqcount_raw_spinlock_t seq; 54 struct timekeeper timekeeper; 55 struct timekeeper shadow_timekeeper; 56 raw_spinlock_t lock; 57 } ____cacheline_aligned; 58 59 static struct tk_data timekeeper_data[TIMEKEEPERS_MAX]; 60 61 /* The core timekeeper */ 62 #define tk_core (timekeeper_data[TIMEKEEPER_CORE]) 63 64 #ifdef CONFIG_POSIX_AUX_CLOCKS 65 static inline bool tk_get_aux_ts64(unsigned int tkid, struct timespec64 *ts) 66 { 67 return ktime_get_aux_ts64(CLOCK_AUX + tkid - TIMEKEEPER_AUX_FIRST, ts); 68 } 69 #else 70 static inline bool tk_get_aux_ts64(unsigned int tkid, struct timespec64 *ts) 71 { 72 return false; 73 } 74 #endif 75 76 /* flag for if timekeeping is suspended */ 77 int __read_mostly timekeeping_suspended; 78 79 /** 80 * struct tk_fast - NMI safe timekeeper 81 * @seq: Sequence counter for protecting updates. The lowest bit 82 * is the index for the tk_read_base array 83 * @base: tk_read_base array. Access is indexed by the lowest bit of 84 * @seq. 85 * 86 * See @update_fast_timekeeper() below. 87 */ 88 struct tk_fast { 89 seqcount_latch_t seq; 90 struct tk_read_base base[2]; 91 }; 92 93 /* Suspend-time cycles value for halted fast timekeeper. */ 94 static u64 cycles_at_suspend; 95 96 static u64 dummy_clock_read(struct clocksource *cs) 97 { 98 if (timekeeping_suspended) 99 return cycles_at_suspend; 100 return local_clock(); 101 } 102 103 static struct clocksource dummy_clock = { 104 .read = dummy_clock_read, 105 }; 106 107 /* 108 * Boot time initialization which allows local_clock() to be utilized 109 * during early boot when clocksources are not available. local_clock() 110 * returns nanoseconds already so no conversion is required, hence mult=1 111 * and shift=0. When the first proper clocksource is installed then 112 * the fast time keepers are updated with the correct values. 113 */ 114 #define FAST_TK_INIT \ 115 { \ 116 .clock = &dummy_clock, \ 117 .mask = CLOCKSOURCE_MASK(64), \ 118 .mult = 1, \ 119 .shift = 0, \ 120 } 121 122 static struct tk_fast tk_fast_mono ____cacheline_aligned = { 123 .seq = SEQCNT_LATCH_ZERO(tk_fast_mono.seq), 124 .base[0] = FAST_TK_INIT, 125 .base[1] = FAST_TK_INIT, 126 }; 127 128 static struct tk_fast tk_fast_raw ____cacheline_aligned = { 129 .seq = SEQCNT_LATCH_ZERO(tk_fast_raw.seq), 130 .base[0] = FAST_TK_INIT, 131 .base[1] = FAST_TK_INIT, 132 }; 133 134 #ifdef CONFIG_POSIX_AUX_CLOCKS 135 static __init void tk_aux_setup(void); 136 static void tk_aux_update_clocksource(void); 137 static void tk_aux_advance(void); 138 #else 139 static inline void tk_aux_setup(void) { } 140 static inline void tk_aux_update_clocksource(void) { } 141 static inline void tk_aux_advance(void) { } 142 #endif 143 144 unsigned long timekeeper_lock_irqsave(void) 145 { 146 unsigned long flags; 147 148 raw_spin_lock_irqsave(&tk_core.lock, flags); 149 return flags; 150 } 151 152 void timekeeper_unlock_irqrestore(unsigned long flags) 153 { 154 raw_spin_unlock_irqrestore(&tk_core.lock, flags); 155 } 156 157 /* 158 * Multigrain timestamps require tracking the latest fine-grained timestamp 159 * that has been issued, and never returning a coarse-grained timestamp that is 160 * earlier than that value. 161 * 162 * mg_floor represents the latest fine-grained time that has been handed out as 163 * a file timestamp on the system. This is tracked as a monotonic ktime_t, and 164 * converted to a realtime clock value on an as-needed basis. 165 * 166 * Maintaining mg_floor ensures the multigrain interfaces never issue a 167 * timestamp earlier than one that has been previously issued. 168 * 169 * The exception to this rule is when there is a backward realtime clock jump. If 170 * such an event occurs, a timestamp can appear to be earlier than a previous one. 171 */ 172 static __cacheline_aligned_in_smp atomic64_t mg_floor; 173 174 static inline void tk_normalize_xtime(struct timekeeper *tk) 175 { 176 while (tk->tkr_mono.xtime_nsec >= ((u64)NSEC_PER_SEC << tk->tkr_mono.shift)) { 177 tk->tkr_mono.xtime_nsec -= (u64)NSEC_PER_SEC << tk->tkr_mono.shift; 178 tk->xtime_sec++; 179 } 180 while (tk->tkr_raw.xtime_nsec >= ((u64)NSEC_PER_SEC << tk->tkr_raw.shift)) { 181 tk->tkr_raw.xtime_nsec -= (u64)NSEC_PER_SEC << tk->tkr_raw.shift; 182 tk->raw_sec++; 183 } 184 } 185 186 static inline struct timespec64 tk_xtime(const struct timekeeper *tk) 187 { 188 struct timespec64 ts; 189 190 ts.tv_sec = tk->xtime_sec; 191 ts.tv_nsec = (long)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift); 192 return ts; 193 } 194 195 static inline struct timespec64 tk_xtime_coarse(const struct timekeeper *tk) 196 { 197 struct timespec64 ts; 198 199 ts.tv_sec = tk->xtime_sec; 200 ts.tv_nsec = tk->coarse_nsec; 201 return ts; 202 } 203 204 /* 205 * Update the nanoseconds part for the coarse time keepers. They can't rely 206 * on xtime_nsec because xtime_nsec could be adjusted by a small negative 207 * amount when the multiplication factor of the clock is adjusted, which 208 * could cause the coarse clocks to go slightly backwards. See 209 * timekeeping_apply_adjustment(). Thus we keep a separate copy for the coarse 210 * clockids which only is updated when the clock has been set or we have 211 * accumulated time. 212 */ 213 static inline void tk_update_coarse_nsecs(struct timekeeper *tk) 214 { 215 tk->coarse_nsec = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift; 216 } 217 218 static void tk_set_xtime(struct timekeeper *tk, const struct timespec64 *ts) 219 { 220 tk->xtime_sec = ts->tv_sec; 221 tk->tkr_mono.xtime_nsec = (u64)ts->tv_nsec << tk->tkr_mono.shift; 222 tk_update_coarse_nsecs(tk); 223 } 224 225 static void tk_xtime_add(struct timekeeper *tk, const struct timespec64 *ts) 226 { 227 tk->xtime_sec += ts->tv_sec; 228 tk->tkr_mono.xtime_nsec += (u64)ts->tv_nsec << tk->tkr_mono.shift; 229 tk_normalize_xtime(tk); 230 tk_update_coarse_nsecs(tk); 231 } 232 233 static void tk_set_wall_to_mono(struct timekeeper *tk, struct timespec64 wtm) 234 { 235 struct timespec64 tmp; 236 237 /* 238 * Verify consistency of: offset_real = -wall_to_monotonic 239 * before modifying anything 240 */ 241 set_normalized_timespec64(&tmp, -tk->wall_to_monotonic.tv_sec, 242 -tk->wall_to_monotonic.tv_nsec); 243 WARN_ON_ONCE(tk->offs_real != timespec64_to_ktime(tmp)); 244 tk->wall_to_monotonic = wtm; 245 set_normalized_timespec64(&tmp, -wtm.tv_sec, -wtm.tv_nsec); 246 /* Paired with READ_ONCE() in ktime_mono_to_any() */ 247 WRITE_ONCE(tk->offs_real, timespec64_to_ktime(tmp)); 248 WRITE_ONCE(tk->offs_tai, ktime_add(tk->offs_real, ktime_set(tk->tai_offset, 0))); 249 } 250 251 static inline void tk_update_sleep_time(struct timekeeper *tk, ktime_t delta) 252 { 253 /* Paired with READ_ONCE() in ktime_mono_to_any() */ 254 WRITE_ONCE(tk->offs_boot, ktime_add(tk->offs_boot, delta)); 255 /* 256 * Timespec representation for VDSO update to avoid 64bit division 257 * on every update. 258 */ 259 tk->monotonic_to_boot = ktime_to_timespec64(tk->offs_boot); 260 } 261 262 /* 263 * tk_clock_read - atomic clocksource read() helper 264 * 265 * This helper is necessary to use in the read paths because, while the 266 * seqcount ensures we don't return a bad value while structures are updated, 267 * it doesn't protect from potential crashes. There is the possibility that 268 * the tkr's clocksource may change between the read reference, and the 269 * clock reference passed to the read function. This can cause crashes if 270 * the wrong clocksource is passed to the wrong read function. 271 * This isn't necessary to use when holding the tk_core.lock or doing 272 * a read of the fast-timekeeper tkrs (which is protected by its own locking 273 * and update logic). 274 */ 275 static inline u64 tk_clock_read(const struct tk_read_base *tkr) 276 { 277 struct clocksource *clock = READ_ONCE(tkr->clock); 278 279 return clock->read(clock); 280 } 281 282 /** 283 * tk_setup_internals - Set up internals to use clocksource clock. 284 * 285 * @tk: The target timekeeper to setup. 286 * @clock: Pointer to clocksource. 287 * 288 * Calculates a fixed cycle/nsec interval for a given clocksource/adjustment 289 * pair and interval request. 290 * 291 * Unless you're the timekeeping code, you should not be using this! 292 */ 293 static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock) 294 { 295 u64 interval; 296 u64 tmp, ntpinterval; 297 struct clocksource *old_clock; 298 299 ++tk->cs_was_changed_seq; 300 old_clock = tk->tkr_mono.clock; 301 tk->tkr_mono.clock = clock; 302 tk->tkr_mono.mask = clock->mask; 303 tk->tkr_mono.cycle_last = tk_clock_read(&tk->tkr_mono); 304 305 tk->tkr_raw.clock = clock; 306 tk->tkr_raw.mask = clock->mask; 307 tk->tkr_raw.cycle_last = tk->tkr_mono.cycle_last; 308 309 /* Do the ns -> cycle conversion first, using original mult */ 310 tmp = NTP_INTERVAL_LENGTH; 311 tmp <<= clock->shift; 312 ntpinterval = tmp; 313 tmp += clock->mult/2; 314 do_div(tmp, clock->mult); 315 if (tmp == 0) 316 tmp = 1; 317 318 interval = (u64) tmp; 319 tk->cycle_interval = interval; 320 321 /* Go back from cycles -> shifted ns */ 322 tk->xtime_interval = interval * clock->mult; 323 tk->xtime_remainder = ntpinterval - tk->xtime_interval; 324 tk->raw_interval = interval * clock->mult; 325 326 /* if changing clocks, convert xtime_nsec shift units */ 327 if (old_clock) { 328 int shift_change = clock->shift - old_clock->shift; 329 if (shift_change < 0) { 330 tk->tkr_mono.xtime_nsec >>= -shift_change; 331 tk->tkr_raw.xtime_nsec >>= -shift_change; 332 } else { 333 tk->tkr_mono.xtime_nsec <<= shift_change; 334 tk->tkr_raw.xtime_nsec <<= shift_change; 335 } 336 } 337 338 tk->tkr_mono.shift = clock->shift; 339 tk->tkr_raw.shift = clock->shift; 340 341 tk->ntp_error = 0; 342 tk->ntp_error_shift = NTP_SCALE_SHIFT - clock->shift; 343 tk->ntp_tick = ntpinterval << tk->ntp_error_shift; 344 345 /* 346 * The timekeeper keeps its own mult values for the currently 347 * active clocksource. These value will be adjusted via NTP 348 * to counteract clock drifting. 349 */ 350 tk->tkr_mono.mult = clock->mult; 351 tk->tkr_raw.mult = clock->mult; 352 tk->ntp_err_mult = 0; 353 tk->skip_second_overflow = 0; 354 } 355 356 /* Timekeeper helper functions. */ 357 static noinline u64 delta_to_ns_safe(const struct tk_read_base *tkr, u64 delta) 358 { 359 return mul_u64_u32_add_u64_shr(delta, tkr->mult, tkr->xtime_nsec, tkr->shift); 360 } 361 362 static inline u64 timekeeping_cycles_to_ns(const struct tk_read_base *tkr, u64 cycles) 363 { 364 /* Calculate the delta since the last update_wall_time() */ 365 u64 mask = tkr->mask, delta = (cycles - tkr->cycle_last) & mask; 366 367 /* 368 * This detects both negative motion and the case where the delta 369 * overflows the multiplication with tkr->mult. 370 */ 371 if (unlikely(delta > tkr->clock->max_cycles)) { 372 /* 373 * Handle clocksource inconsistency between CPUs to prevent 374 * time from going backwards by checking for the MSB of the 375 * mask being set in the delta. 376 */ 377 if (delta & ~(mask >> 1)) 378 return tkr->xtime_nsec >> tkr->shift; 379 380 return delta_to_ns_safe(tkr, delta); 381 } 382 383 return ((delta * tkr->mult) + tkr->xtime_nsec) >> tkr->shift; 384 } 385 386 static __always_inline u64 timekeeping_get_ns(const struct tk_read_base *tkr) 387 { 388 return timekeeping_cycles_to_ns(tkr, tk_clock_read(tkr)); 389 } 390 391 /** 392 * update_fast_timekeeper - Update the fast and NMI safe monotonic timekeeper. 393 * @tkr: Timekeeping readout base from which we take the update 394 * @tkf: Pointer to NMI safe timekeeper 395 * 396 * We want to use this from any context including NMI and tracing / 397 * instrumenting the timekeeping code itself. 398 * 399 * Employ the latch technique; see @write_seqcount_latch. 400 * 401 * So if a NMI hits the update of base[0] then it will use base[1] 402 * which is still consistent. In the worst case this can result is a 403 * slightly wrong timestamp (a few nanoseconds). See 404 * @ktime_get_mono_fast_ns. 405 */ 406 static void update_fast_timekeeper(const struct tk_read_base *tkr, 407 struct tk_fast *tkf) 408 { 409 struct tk_read_base *base = tkf->base; 410 411 /* Force readers off to base[1] */ 412 write_seqcount_latch_begin(&tkf->seq); 413 414 /* Update base[0] */ 415 memcpy(base, tkr, sizeof(*base)); 416 417 /* Force readers back to base[0] */ 418 write_seqcount_latch(&tkf->seq); 419 420 /* Update base[1] */ 421 memcpy(base + 1, base, sizeof(*base)); 422 423 write_seqcount_latch_end(&tkf->seq); 424 } 425 426 static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf) 427 { 428 struct tk_read_base *tkr; 429 unsigned int seq; 430 u64 now; 431 432 do { 433 seq = read_seqcount_latch(&tkf->seq); 434 tkr = tkf->base + (seq & 0x01); 435 now = ktime_to_ns(tkr->base); 436 now += timekeeping_get_ns(tkr); 437 } while (read_seqcount_latch_retry(&tkf->seq, seq)); 438 439 return now; 440 } 441 442 /** 443 * ktime_get_mono_fast_ns - Fast NMI safe access to clock monotonic 444 * 445 * This timestamp is not guaranteed to be monotonic across an update. 446 * The timestamp is calculated by: 447 * 448 * now = base_mono + clock_delta * slope 449 * 450 * So if the update lowers the slope, readers who are forced to the 451 * not yet updated second array are still using the old steeper slope. 452 * 453 * tmono 454 * ^ 455 * | o n 456 * | o n 457 * | u 458 * | o 459 * |o 460 * |12345678---> reader order 461 * 462 * o = old slope 463 * u = update 464 * n = new slope 465 * 466 * So reader 6 will observe time going backwards versus reader 5. 467 * 468 * While other CPUs are likely to be able to observe that, the only way 469 * for a CPU local observation is when an NMI hits in the middle of 470 * the update. Timestamps taken from that NMI context might be ahead 471 * of the following timestamps. Callers need to be aware of that and 472 * deal with it. 473 */ 474 u64 notrace ktime_get_mono_fast_ns(void) 475 { 476 return __ktime_get_fast_ns(&tk_fast_mono); 477 } 478 EXPORT_SYMBOL_GPL(ktime_get_mono_fast_ns); 479 480 /** 481 * ktime_get_raw_fast_ns - Fast NMI safe access to clock monotonic raw 482 * 483 * Contrary to ktime_get_mono_fast_ns() this is always correct because the 484 * conversion factor is not affected by NTP/PTP correction. 485 */ 486 u64 notrace ktime_get_raw_fast_ns(void) 487 { 488 return __ktime_get_fast_ns(&tk_fast_raw); 489 } 490 EXPORT_SYMBOL_GPL(ktime_get_raw_fast_ns); 491 492 /** 493 * ktime_get_boot_fast_ns - NMI safe and fast access to boot clock. 494 * 495 * To keep it NMI safe since we're accessing from tracing, we're not using a 496 * separate timekeeper with updates to monotonic clock and boot offset 497 * protected with seqcounts. This has the following minor side effects: 498 * 499 * (1) Its possible that a timestamp be taken after the boot offset is updated 500 * but before the timekeeper is updated. If this happens, the new boot offset 501 * is added to the old timekeeping making the clock appear to update slightly 502 * earlier: 503 * CPU 0 CPU 1 504 * timekeeping_inject_sleeptime64() 505 * __timekeeping_inject_sleeptime(tk, delta); 506 * timestamp(); 507 * timekeeping_update_staged(tkd, TK_CLEAR_NTP...); 508 * 509 * (2) On 32-bit systems, the 64-bit boot offset (tk->offs_boot) may be 510 * partially updated. Since the tk->offs_boot update is a rare event, this 511 * should be a rare occurrence which postprocessing should be able to handle. 512 * 513 * The caveats vs. timestamp ordering as documented for ktime_get_mono_fast_ns() 514 * apply as well. 515 */ 516 u64 notrace ktime_get_boot_fast_ns(void) 517 { 518 struct timekeeper *tk = &tk_core.timekeeper; 519 520 return (ktime_get_mono_fast_ns() + ktime_to_ns(data_race(tk->offs_boot))); 521 } 522 EXPORT_SYMBOL_GPL(ktime_get_boot_fast_ns); 523 524 /** 525 * ktime_get_tai_fast_ns - NMI safe and fast access to tai clock. 526 * 527 * The same limitations as described for ktime_get_boot_fast_ns() apply. The 528 * mono time and the TAI offset are not read atomically which may yield wrong 529 * readouts. However, an update of the TAI offset is an rare event e.g., caused 530 * by settime or adjtimex with an offset. The user of this function has to deal 531 * with the possibility of wrong timestamps in post processing. 532 */ 533 u64 notrace ktime_get_tai_fast_ns(void) 534 { 535 struct timekeeper *tk = &tk_core.timekeeper; 536 537 return (ktime_get_mono_fast_ns() + ktime_to_ns(data_race(tk->offs_tai))); 538 } 539 EXPORT_SYMBOL_GPL(ktime_get_tai_fast_ns); 540 541 /** 542 * ktime_get_real_fast_ns: - NMI safe and fast access to clock realtime. 543 * 544 * See ktime_get_mono_fast_ns() for documentation of the time stamp ordering. 545 */ 546 u64 ktime_get_real_fast_ns(void) 547 { 548 struct tk_fast *tkf = &tk_fast_mono; 549 struct tk_read_base *tkr; 550 u64 baser, delta; 551 unsigned int seq; 552 553 do { 554 seq = raw_read_seqcount_latch(&tkf->seq); 555 tkr = tkf->base + (seq & 0x01); 556 baser = ktime_to_ns(tkr->base_real); 557 delta = timekeeping_get_ns(tkr); 558 } while (raw_read_seqcount_latch_retry(&tkf->seq, seq)); 559 560 return baser + delta; 561 } 562 EXPORT_SYMBOL_GPL(ktime_get_real_fast_ns); 563 564 /** 565 * halt_fast_timekeeper - Prevent fast timekeeper from accessing clocksource. 566 * @tk: Timekeeper to snapshot. 567 * 568 * It generally is unsafe to access the clocksource after timekeeping has been 569 * suspended, so take a snapshot of the readout base of @tk and use it as the 570 * fast timekeeper's readout base while suspended. It will return the same 571 * number of cycles every time until timekeeping is resumed at which time the 572 * proper readout base for the fast timekeeper will be restored automatically. 573 */ 574 static void halt_fast_timekeeper(const struct timekeeper *tk) 575 { 576 static struct tk_read_base tkr_dummy; 577 const struct tk_read_base *tkr = &tk->tkr_mono; 578 579 memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy)); 580 cycles_at_suspend = tk_clock_read(tkr); 581 tkr_dummy.clock = &dummy_clock; 582 tkr_dummy.base_real = tkr->base + tk->offs_real; 583 update_fast_timekeeper(&tkr_dummy, &tk_fast_mono); 584 585 tkr = &tk->tkr_raw; 586 memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy)); 587 tkr_dummy.clock = &dummy_clock; 588 update_fast_timekeeper(&tkr_dummy, &tk_fast_raw); 589 } 590 591 static RAW_NOTIFIER_HEAD(pvclock_gtod_chain); 592 593 static void update_pvclock_gtod(struct timekeeper *tk, bool was_set) 594 { 595 raw_notifier_call_chain(&pvclock_gtod_chain, was_set, tk); 596 } 597 598 /** 599 * pvclock_gtod_register_notifier - register a pvclock timedata update listener 600 * @nb: Pointer to the notifier block to register 601 */ 602 int pvclock_gtod_register_notifier(struct notifier_block *nb) 603 { 604 struct timekeeper *tk = &tk_core.timekeeper; 605 int ret; 606 607 guard(raw_spinlock_irqsave)(&tk_core.lock); 608 ret = raw_notifier_chain_register(&pvclock_gtod_chain, nb); 609 update_pvclock_gtod(tk, true); 610 611 return ret; 612 } 613 EXPORT_SYMBOL_GPL(pvclock_gtod_register_notifier); 614 615 /** 616 * pvclock_gtod_unregister_notifier - unregister a pvclock 617 * timedata update listener 618 * @nb: Pointer to the notifier block to unregister 619 */ 620 int pvclock_gtod_unregister_notifier(struct notifier_block *nb) 621 { 622 guard(raw_spinlock_irqsave)(&tk_core.lock); 623 return raw_notifier_chain_unregister(&pvclock_gtod_chain, nb); 624 } 625 EXPORT_SYMBOL_GPL(pvclock_gtod_unregister_notifier); 626 627 /* 628 * tk_update_leap_state - helper to update the next_leap_ktime 629 */ 630 static inline void tk_update_leap_state(struct timekeeper *tk) 631 { 632 tk->next_leap_ktime = ntp_get_next_leap(tk->id); 633 if (tk->next_leap_ktime != KTIME_MAX) 634 /* Convert to monotonic time */ 635 tk->next_leap_ktime = ktime_sub(tk->next_leap_ktime, tk->offs_real); 636 } 637 638 /* 639 * Leap state update for both shadow and the real timekeeper 640 * Separate to spare a full memcpy() of the timekeeper. 641 */ 642 static void tk_update_leap_state_all(struct tk_data *tkd) 643 { 644 write_seqcount_begin(&tkd->seq); 645 tk_update_leap_state(&tkd->shadow_timekeeper); 646 tkd->timekeeper.next_leap_ktime = tkd->shadow_timekeeper.next_leap_ktime; 647 write_seqcount_end(&tkd->seq); 648 } 649 650 /* 651 * Update the ktime_t based scalar nsec members of the timekeeper 652 */ 653 static inline void tk_update_ktime_data(struct timekeeper *tk) 654 { 655 u64 seconds; 656 u32 nsec; 657 658 /* 659 * The xtime based monotonic readout is: 660 * nsec = (xtime_sec + wtm_sec) * 1e9 + wtm_nsec + now(); 661 * The ktime based monotonic readout is: 662 * nsec = base_mono + now(); 663 * ==> base_mono = (xtime_sec + wtm_sec) * 1e9 + wtm_nsec 664 */ 665 seconds = (u64)(tk->xtime_sec + tk->wall_to_monotonic.tv_sec); 666 nsec = (u32) tk->wall_to_monotonic.tv_nsec; 667 tk->tkr_mono.base = ns_to_ktime(seconds * NSEC_PER_SEC + nsec); 668 669 /* 670 * The sum of the nanoseconds portions of xtime and 671 * wall_to_monotonic can be greater/equal one second. Take 672 * this into account before updating tk->ktime_sec. 673 */ 674 nsec += (u32)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift); 675 if (nsec >= NSEC_PER_SEC) 676 seconds++; 677 tk->ktime_sec = seconds; 678 679 /* Update the monotonic raw base */ 680 tk->tkr_raw.base = ns_to_ktime(tk->raw_sec * NSEC_PER_SEC); 681 } 682 683 /* 684 * Restore the shadow timekeeper from the real timekeeper. 685 */ 686 static void timekeeping_restore_shadow(struct tk_data *tkd) 687 { 688 lockdep_assert_held(&tkd->lock); 689 memcpy(&tkd->shadow_timekeeper, &tkd->timekeeper, sizeof(tkd->timekeeper)); 690 } 691 692 static void timekeeping_update_from_shadow(struct tk_data *tkd, unsigned int action) 693 { 694 struct timekeeper *tk = &tkd->shadow_timekeeper; 695 696 lockdep_assert_held(&tkd->lock); 697 698 /* 699 * Block out readers before running the updates below because that 700 * updates VDSO and other time related infrastructure. Not blocking 701 * the readers might let a reader see time going backwards when 702 * reading from the VDSO after the VDSO update and then reading in 703 * the kernel from the timekeeper before that got updated. 704 */ 705 write_seqcount_begin(&tkd->seq); 706 707 if (action & TK_CLEAR_NTP) { 708 tk->ntp_error = 0; 709 ntp_clear(tk->id); 710 } 711 712 tk_update_leap_state(tk); 713 tk_update_ktime_data(tk); 714 tk->tkr_mono.base_real = tk->tkr_mono.base + tk->offs_real; 715 716 if (tk->id == TIMEKEEPER_CORE) { 717 update_vsyscall(tk); 718 update_pvclock_gtod(tk, action & TK_CLOCK_WAS_SET); 719 720 update_fast_timekeeper(&tk->tkr_mono, &tk_fast_mono); 721 update_fast_timekeeper(&tk->tkr_raw, &tk_fast_raw); 722 } 723 724 if (action & TK_CLOCK_WAS_SET) 725 tk->clock_was_set_seq++; 726 727 /* 728 * Update the real timekeeper. 729 * 730 * We could avoid this memcpy() by switching pointers, but that has 731 * the downside that the reader side does not longer benefit from 732 * the cacheline optimized data layout of the timekeeper and requires 733 * another indirection. 734 */ 735 memcpy(&tkd->timekeeper, tk, sizeof(*tk)); 736 write_seqcount_end(&tkd->seq); 737 } 738 739 /** 740 * timekeeping_forward_now - update clock to the current time 741 * @tk: Pointer to the timekeeper to update 742 * 743 * Forward the current clock to update its state since the last call to 744 * update_wall_time(). This is useful before significant clock changes, 745 * as it avoids having to deal with this time offset explicitly. 746 */ 747 static void timekeeping_forward_now(struct timekeeper *tk) 748 { 749 u64 cycle_now, delta; 750 751 cycle_now = tk_clock_read(&tk->tkr_mono); 752 delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last, tk->tkr_mono.mask, 753 tk->tkr_mono.clock->max_raw_delta); 754 tk->tkr_mono.cycle_last = cycle_now; 755 tk->tkr_raw.cycle_last = cycle_now; 756 757 while (delta > 0) { 758 u64 max = tk->tkr_mono.clock->max_cycles; 759 u64 incr = delta < max ? delta : max; 760 761 tk->tkr_mono.xtime_nsec += incr * tk->tkr_mono.mult; 762 tk->tkr_raw.xtime_nsec += incr * tk->tkr_raw.mult; 763 tk_normalize_xtime(tk); 764 delta -= incr; 765 } 766 tk_update_coarse_nsecs(tk); 767 } 768 769 /** 770 * ktime_get_real_ts64 - Returns the time of day in a timespec64. 771 * @ts: pointer to the timespec to be set 772 * 773 * Returns the time of day in a timespec64 (WARN if suspended). 774 */ 775 void ktime_get_real_ts64(struct timespec64 *ts) 776 { 777 struct timekeeper *tk = &tk_core.timekeeper; 778 unsigned int seq; 779 u64 nsecs; 780 781 WARN_ON(timekeeping_suspended); 782 783 do { 784 seq = read_seqcount_begin(&tk_core.seq); 785 786 ts->tv_sec = tk->xtime_sec; 787 nsecs = timekeeping_get_ns(&tk->tkr_mono); 788 789 } while (read_seqcount_retry(&tk_core.seq, seq)); 790 791 ts->tv_nsec = 0; 792 timespec64_add_ns(ts, nsecs); 793 } 794 EXPORT_SYMBOL(ktime_get_real_ts64); 795 796 ktime_t ktime_get(void) 797 { 798 struct timekeeper *tk = &tk_core.timekeeper; 799 unsigned int seq; 800 ktime_t base; 801 u64 nsecs; 802 803 WARN_ON(timekeeping_suspended); 804 805 do { 806 seq = read_seqcount_begin(&tk_core.seq); 807 base = tk->tkr_mono.base; 808 nsecs = timekeeping_get_ns(&tk->tkr_mono); 809 810 } while (read_seqcount_retry(&tk_core.seq, seq)); 811 812 return ktime_add_ns(base, nsecs); 813 } 814 EXPORT_SYMBOL_GPL(ktime_get); 815 816 u32 ktime_get_resolution_ns(void) 817 { 818 struct timekeeper *tk = &tk_core.timekeeper; 819 unsigned int seq; 820 u32 nsecs; 821 822 WARN_ON(timekeeping_suspended); 823 824 do { 825 seq = read_seqcount_begin(&tk_core.seq); 826 nsecs = tk->tkr_mono.mult >> tk->tkr_mono.shift; 827 } while (read_seqcount_retry(&tk_core.seq, seq)); 828 829 return nsecs; 830 } 831 EXPORT_SYMBOL_GPL(ktime_get_resolution_ns); 832 833 static ktime_t *offsets[TK_OFFS_MAX] = { 834 [TK_OFFS_REAL] = &tk_core.timekeeper.offs_real, 835 [TK_OFFS_BOOT] = &tk_core.timekeeper.offs_boot, 836 [TK_OFFS_TAI] = &tk_core.timekeeper.offs_tai, 837 }; 838 839 ktime_t ktime_get_with_offset(enum tk_offsets offs) 840 { 841 struct timekeeper *tk = &tk_core.timekeeper; 842 unsigned int seq; 843 ktime_t base, *offset = offsets[offs]; 844 u64 nsecs; 845 846 WARN_ON(timekeeping_suspended); 847 848 do { 849 seq = read_seqcount_begin(&tk_core.seq); 850 base = ktime_add(tk->tkr_mono.base, *offset); 851 nsecs = timekeeping_get_ns(&tk->tkr_mono); 852 853 } while (read_seqcount_retry(&tk_core.seq, seq)); 854 855 return ktime_add_ns(base, nsecs); 856 857 } 858 EXPORT_SYMBOL_GPL(ktime_get_with_offset); 859 860 ktime_t ktime_get_coarse_with_offset(enum tk_offsets offs) 861 { 862 struct timekeeper *tk = &tk_core.timekeeper; 863 ktime_t base, *offset = offsets[offs]; 864 unsigned int seq; 865 u64 nsecs; 866 867 WARN_ON(timekeeping_suspended); 868 869 do { 870 seq = read_seqcount_begin(&tk_core.seq); 871 base = ktime_add(tk->tkr_mono.base, *offset); 872 nsecs = tk->coarse_nsec; 873 874 } while (read_seqcount_retry(&tk_core.seq, seq)); 875 876 return ktime_add_ns(base, nsecs); 877 } 878 EXPORT_SYMBOL_GPL(ktime_get_coarse_with_offset); 879 880 /** 881 * ktime_mono_to_any() - convert monotonic time to any other time 882 * @tmono: time to convert. 883 * @offs: which offset to use 884 */ 885 ktime_t ktime_mono_to_any(ktime_t tmono, enum tk_offsets offs) 886 { 887 ktime_t *offset = offsets[offs]; 888 unsigned int seq; 889 ktime_t tconv; 890 891 if (IS_ENABLED(CONFIG_64BIT)) { 892 /* 893 * Paired with WRITE_ONCE()s in tk_set_wall_to_mono() and 894 * tk_update_sleep_time(). 895 */ 896 return ktime_add(tmono, READ_ONCE(*offset)); 897 } 898 899 do { 900 seq = read_seqcount_begin(&tk_core.seq); 901 tconv = ktime_add(tmono, *offset); 902 } while (read_seqcount_retry(&tk_core.seq, seq)); 903 904 return tconv; 905 } 906 EXPORT_SYMBOL_GPL(ktime_mono_to_any); 907 908 /** 909 * ktime_get_raw - Returns the raw monotonic time in ktime_t format 910 */ 911 ktime_t ktime_get_raw(void) 912 { 913 struct timekeeper *tk = &tk_core.timekeeper; 914 unsigned int seq; 915 ktime_t base; 916 u64 nsecs; 917 918 do { 919 seq = read_seqcount_begin(&tk_core.seq); 920 base = tk->tkr_raw.base; 921 nsecs = timekeeping_get_ns(&tk->tkr_raw); 922 923 } while (read_seqcount_retry(&tk_core.seq, seq)); 924 925 return ktime_add_ns(base, nsecs); 926 } 927 EXPORT_SYMBOL_GPL(ktime_get_raw); 928 929 /** 930 * ktime_get_ts64 - get the monotonic clock in timespec64 format 931 * @ts: pointer to timespec variable 932 * 933 * The function calculates the monotonic clock from the realtime 934 * clock and the wall_to_monotonic offset and stores the result 935 * in normalized timespec64 format in the variable pointed to by @ts. 936 */ 937 void ktime_get_ts64(struct timespec64 *ts) 938 { 939 struct timekeeper *tk = &tk_core.timekeeper; 940 struct timespec64 tomono; 941 unsigned int seq; 942 u64 nsec; 943 944 WARN_ON(timekeeping_suspended); 945 946 do { 947 seq = read_seqcount_begin(&tk_core.seq); 948 ts->tv_sec = tk->xtime_sec; 949 nsec = timekeeping_get_ns(&tk->tkr_mono); 950 tomono = tk->wall_to_monotonic; 951 952 } while (read_seqcount_retry(&tk_core.seq, seq)); 953 954 ts->tv_sec += tomono.tv_sec; 955 ts->tv_nsec = 0; 956 timespec64_add_ns(ts, nsec + tomono.tv_nsec); 957 } 958 EXPORT_SYMBOL_GPL(ktime_get_ts64); 959 960 /** 961 * ktime_get_seconds - Get the seconds portion of CLOCK_MONOTONIC 962 * 963 * Returns the seconds portion of CLOCK_MONOTONIC with a single non 964 * serialized read. tk->ktime_sec is of type 'unsigned long' so this 965 * works on both 32 and 64 bit systems. On 32 bit systems the readout 966 * covers ~136 years of uptime which should be enough to prevent 967 * premature wrap arounds. 968 */ 969 time64_t ktime_get_seconds(void) 970 { 971 struct timekeeper *tk = &tk_core.timekeeper; 972 973 WARN_ON(timekeeping_suspended); 974 return tk->ktime_sec; 975 } 976 EXPORT_SYMBOL_GPL(ktime_get_seconds); 977 978 /** 979 * ktime_get_real_seconds - Get the seconds portion of CLOCK_REALTIME 980 * 981 * Returns the wall clock seconds since 1970. 982 * 983 * For 64bit systems the fast access to tk->xtime_sec is preserved. On 984 * 32bit systems the access must be protected with the sequence 985 * counter to provide "atomic" access to the 64bit tk->xtime_sec 986 * value. 987 */ 988 time64_t ktime_get_real_seconds(void) 989 { 990 struct timekeeper *tk = &tk_core.timekeeper; 991 time64_t seconds; 992 unsigned int seq; 993 994 if (IS_ENABLED(CONFIG_64BIT)) 995 return tk->xtime_sec; 996 997 do { 998 seq = read_seqcount_begin(&tk_core.seq); 999 seconds = tk->xtime_sec; 1000 1001 } while (read_seqcount_retry(&tk_core.seq, seq)); 1002 1003 return seconds; 1004 } 1005 EXPORT_SYMBOL_GPL(ktime_get_real_seconds); 1006 1007 /** 1008 * __ktime_get_real_seconds - Unprotected access to CLOCK_REALTIME seconds 1009 * 1010 * The same as ktime_get_real_seconds() but without the sequence counter 1011 * protection. This function is used in restricted contexts like the x86 MCE 1012 * handler and in KGDB. It's unprotected on 32-bit vs. concurrent half 1013 * completed modification and only to be used for such critical contexts. 1014 * 1015 * Returns: Racy snapshot of the CLOCK_REALTIME seconds value 1016 */ 1017 noinstr time64_t __ktime_get_real_seconds(void) 1018 { 1019 struct timekeeper *tk = &tk_core.timekeeper; 1020 1021 return tk->xtime_sec; 1022 } 1023 1024 /** 1025 * ktime_get_snapshot - snapshots the realtime/monotonic raw clocks with counter 1026 * @systime_snapshot: pointer to struct receiving the system time snapshot 1027 */ 1028 void ktime_get_snapshot(struct system_time_snapshot *systime_snapshot) 1029 { 1030 struct timekeeper *tk = &tk_core.timekeeper; 1031 unsigned int seq; 1032 ktime_t base_raw; 1033 ktime_t base_real; 1034 ktime_t base_boot; 1035 u64 nsec_raw; 1036 u64 nsec_real; 1037 u64 now; 1038 1039 WARN_ON_ONCE(timekeeping_suspended); 1040 1041 do { 1042 seq = read_seqcount_begin(&tk_core.seq); 1043 now = tk_clock_read(&tk->tkr_mono); 1044 systime_snapshot->cs_id = tk->tkr_mono.clock->id; 1045 systime_snapshot->cs_was_changed_seq = tk->cs_was_changed_seq; 1046 systime_snapshot->clock_was_set_seq = tk->clock_was_set_seq; 1047 base_real = ktime_add(tk->tkr_mono.base, 1048 tk_core.timekeeper.offs_real); 1049 base_boot = ktime_add(tk->tkr_mono.base, 1050 tk_core.timekeeper.offs_boot); 1051 base_raw = tk->tkr_raw.base; 1052 nsec_real = timekeeping_cycles_to_ns(&tk->tkr_mono, now); 1053 nsec_raw = timekeeping_cycles_to_ns(&tk->tkr_raw, now); 1054 } while (read_seqcount_retry(&tk_core.seq, seq)); 1055 1056 systime_snapshot->cycles = now; 1057 systime_snapshot->real = ktime_add_ns(base_real, nsec_real); 1058 systime_snapshot->boot = ktime_add_ns(base_boot, nsec_real); 1059 systime_snapshot->raw = ktime_add_ns(base_raw, nsec_raw); 1060 } 1061 EXPORT_SYMBOL_GPL(ktime_get_snapshot); 1062 1063 /* Scale base by mult/div checking for overflow */ 1064 static int scale64_check_overflow(u64 mult, u64 div, u64 *base) 1065 { 1066 u64 tmp, rem; 1067 1068 tmp = div64_u64_rem(*base, div, &rem); 1069 1070 if (((int)sizeof(u64)*8 - fls64(mult) < fls64(tmp)) || 1071 ((int)sizeof(u64)*8 - fls64(mult) < fls64(rem))) 1072 return -EOVERFLOW; 1073 tmp *= mult; 1074 1075 rem = div64_u64(rem * mult, div); 1076 *base = tmp + rem; 1077 return 0; 1078 } 1079 1080 /** 1081 * adjust_historical_crosststamp - adjust crosstimestamp previous to current interval 1082 * @history: Snapshot representing start of history 1083 * @partial_history_cycles: Cycle offset into history (fractional part) 1084 * @total_history_cycles: Total history length in cycles 1085 * @discontinuity: True indicates clock was set on history period 1086 * @ts: Cross timestamp that should be adjusted using 1087 * partial/total ratio 1088 * 1089 * Helper function used by get_device_system_crosststamp() to correct the 1090 * crosstimestamp corresponding to the start of the current interval to the 1091 * system counter value (timestamp point) provided by the driver. The 1092 * total_history_* quantities are the total history starting at the provided 1093 * reference point and ending at the start of the current interval. The cycle 1094 * count between the driver timestamp point and the start of the current 1095 * interval is partial_history_cycles. 1096 */ 1097 static int adjust_historical_crosststamp(struct system_time_snapshot *history, 1098 u64 partial_history_cycles, 1099 u64 total_history_cycles, 1100 bool discontinuity, 1101 struct system_device_crosststamp *ts) 1102 { 1103 struct timekeeper *tk = &tk_core.timekeeper; 1104 u64 corr_raw, corr_real; 1105 bool interp_forward; 1106 int ret; 1107 1108 if (total_history_cycles == 0 || partial_history_cycles == 0) 1109 return 0; 1110 1111 /* Interpolate shortest distance from beginning or end of history */ 1112 interp_forward = partial_history_cycles > total_history_cycles / 2; 1113 partial_history_cycles = interp_forward ? 1114 total_history_cycles - partial_history_cycles : 1115 partial_history_cycles; 1116 1117 /* 1118 * Scale the monotonic raw time delta by: 1119 * partial_history_cycles / total_history_cycles 1120 */ 1121 corr_raw = (u64)ktime_to_ns( 1122 ktime_sub(ts->sys_monoraw, history->raw)); 1123 ret = scale64_check_overflow(partial_history_cycles, 1124 total_history_cycles, &corr_raw); 1125 if (ret) 1126 return ret; 1127 1128 /* 1129 * If there is a discontinuity in the history, scale monotonic raw 1130 * correction by: 1131 * mult(real)/mult(raw) yielding the realtime correction 1132 * Otherwise, calculate the realtime correction similar to monotonic 1133 * raw calculation 1134 */ 1135 if (discontinuity) { 1136 corr_real = mul_u64_u32_div 1137 (corr_raw, tk->tkr_mono.mult, tk->tkr_raw.mult); 1138 } else { 1139 corr_real = (u64)ktime_to_ns( 1140 ktime_sub(ts->sys_realtime, history->real)); 1141 ret = scale64_check_overflow(partial_history_cycles, 1142 total_history_cycles, &corr_real); 1143 if (ret) 1144 return ret; 1145 } 1146 1147 /* Fixup monotonic raw and real time time values */ 1148 if (interp_forward) { 1149 ts->sys_monoraw = ktime_add_ns(history->raw, corr_raw); 1150 ts->sys_realtime = ktime_add_ns(history->real, corr_real); 1151 } else { 1152 ts->sys_monoraw = ktime_sub_ns(ts->sys_monoraw, corr_raw); 1153 ts->sys_realtime = ktime_sub_ns(ts->sys_realtime, corr_real); 1154 } 1155 1156 return 0; 1157 } 1158 1159 /* 1160 * timestamp_in_interval - true if ts is chronologically in [start, end] 1161 * 1162 * True if ts occurs chronologically at or after start, and before or at end. 1163 */ 1164 static bool timestamp_in_interval(u64 start, u64 end, u64 ts) 1165 { 1166 if (ts >= start && ts <= end) 1167 return true; 1168 if (start > end && (ts >= start || ts <= end)) 1169 return true; 1170 return false; 1171 } 1172 1173 static bool convert_clock(u64 *val, u32 numerator, u32 denominator) 1174 { 1175 u64 rem, res; 1176 1177 if (!numerator || !denominator) 1178 return false; 1179 1180 res = div64_u64_rem(*val, denominator, &rem) * numerator; 1181 *val = res + div_u64(rem * numerator, denominator); 1182 return true; 1183 } 1184 1185 static bool convert_base_to_cs(struct system_counterval_t *scv) 1186 { 1187 struct clocksource *cs = tk_core.timekeeper.tkr_mono.clock; 1188 struct clocksource_base *base; 1189 u32 num, den; 1190 1191 /* The timestamp was taken from the time keeper clock source */ 1192 if (cs->id == scv->cs_id) 1193 return true; 1194 1195 /* 1196 * Check whether cs_id matches the base clock. Prevent the compiler from 1197 * re-evaluating @base as the clocksource might change concurrently. 1198 */ 1199 base = READ_ONCE(cs->base); 1200 if (!base || base->id != scv->cs_id) 1201 return false; 1202 1203 num = scv->use_nsecs ? cs->freq_khz : base->numerator; 1204 den = scv->use_nsecs ? USEC_PER_SEC : base->denominator; 1205 1206 if (!convert_clock(&scv->cycles, num, den)) 1207 return false; 1208 1209 scv->cycles += base->offset; 1210 return true; 1211 } 1212 1213 static bool convert_cs_to_base(u64 *cycles, enum clocksource_ids base_id) 1214 { 1215 struct clocksource *cs = tk_core.timekeeper.tkr_mono.clock; 1216 struct clocksource_base *base; 1217 1218 /* 1219 * Check whether base_id matches the base clock. Prevent the compiler from 1220 * re-evaluating @base as the clocksource might change concurrently. 1221 */ 1222 base = READ_ONCE(cs->base); 1223 if (!base || base->id != base_id) 1224 return false; 1225 1226 *cycles -= base->offset; 1227 if (!convert_clock(cycles, base->denominator, base->numerator)) 1228 return false; 1229 return true; 1230 } 1231 1232 static bool convert_ns_to_cs(u64 *delta) 1233 { 1234 struct tk_read_base *tkr = &tk_core.timekeeper.tkr_mono; 1235 1236 if (BITS_TO_BYTES(fls64(*delta) + tkr->shift) >= sizeof(*delta)) 1237 return false; 1238 1239 *delta = div_u64((*delta << tkr->shift) - tkr->xtime_nsec, tkr->mult); 1240 return true; 1241 } 1242 1243 /** 1244 * ktime_real_to_base_clock() - Convert CLOCK_REALTIME timestamp to a base clock timestamp 1245 * @treal: CLOCK_REALTIME timestamp to convert 1246 * @base_id: base clocksource id 1247 * @cycles: pointer to store the converted base clock timestamp 1248 * 1249 * Converts a supplied, future realtime clock value to the corresponding base clock value. 1250 * 1251 * Return: true if the conversion is successful, false otherwise. 1252 */ 1253 bool ktime_real_to_base_clock(ktime_t treal, enum clocksource_ids base_id, u64 *cycles) 1254 { 1255 struct timekeeper *tk = &tk_core.timekeeper; 1256 unsigned int seq; 1257 u64 delta; 1258 1259 do { 1260 seq = read_seqcount_begin(&tk_core.seq); 1261 if ((u64)treal < tk->tkr_mono.base_real) 1262 return false; 1263 delta = (u64)treal - tk->tkr_mono.base_real; 1264 if (!convert_ns_to_cs(&delta)) 1265 return false; 1266 *cycles = tk->tkr_mono.cycle_last + delta; 1267 if (!convert_cs_to_base(cycles, base_id)) 1268 return false; 1269 } while (read_seqcount_retry(&tk_core.seq, seq)); 1270 1271 return true; 1272 } 1273 EXPORT_SYMBOL_GPL(ktime_real_to_base_clock); 1274 1275 /** 1276 * get_device_system_crosststamp - Synchronously capture system/device timestamp 1277 * @get_time_fn: Callback to get simultaneous device time and 1278 * system counter from the device driver 1279 * @ctx: Context passed to get_time_fn() 1280 * @history_begin: Historical reference point used to interpolate system 1281 * time when counter provided by the driver is before the current interval 1282 * @xtstamp: Receives simultaneously captured system and device time 1283 * 1284 * Reads a timestamp from a device and correlates it to system time 1285 */ 1286 int get_device_system_crosststamp(int (*get_time_fn) 1287 (ktime_t *device_time, 1288 struct system_counterval_t *sys_counterval, 1289 void *ctx), 1290 void *ctx, 1291 struct system_time_snapshot *history_begin, 1292 struct system_device_crosststamp *xtstamp) 1293 { 1294 struct system_counterval_t system_counterval; 1295 struct timekeeper *tk = &tk_core.timekeeper; 1296 u64 cycles, now, interval_start; 1297 unsigned int clock_was_set_seq = 0; 1298 ktime_t base_real, base_raw; 1299 u64 nsec_real, nsec_raw; 1300 u8 cs_was_changed_seq; 1301 unsigned int seq; 1302 bool do_interp; 1303 int ret; 1304 1305 do { 1306 seq = read_seqcount_begin(&tk_core.seq); 1307 /* 1308 * Try to synchronously capture device time and a system 1309 * counter value calling back into the device driver 1310 */ 1311 ret = get_time_fn(&xtstamp->device, &system_counterval, ctx); 1312 if (ret) 1313 return ret; 1314 1315 /* 1316 * Verify that the clocksource ID associated with the captured 1317 * system counter value is the same as for the currently 1318 * installed timekeeper clocksource 1319 */ 1320 if (system_counterval.cs_id == CSID_GENERIC || 1321 !convert_base_to_cs(&system_counterval)) 1322 return -ENODEV; 1323 cycles = system_counterval.cycles; 1324 1325 /* 1326 * Check whether the system counter value provided by the 1327 * device driver is on the current timekeeping interval. 1328 */ 1329 now = tk_clock_read(&tk->tkr_mono); 1330 interval_start = tk->tkr_mono.cycle_last; 1331 if (!timestamp_in_interval(interval_start, now, cycles)) { 1332 clock_was_set_seq = tk->clock_was_set_seq; 1333 cs_was_changed_seq = tk->cs_was_changed_seq; 1334 cycles = interval_start; 1335 do_interp = true; 1336 } else { 1337 do_interp = false; 1338 } 1339 1340 base_real = ktime_add(tk->tkr_mono.base, 1341 tk_core.timekeeper.offs_real); 1342 base_raw = tk->tkr_raw.base; 1343 1344 nsec_real = timekeeping_cycles_to_ns(&tk->tkr_mono, cycles); 1345 nsec_raw = timekeeping_cycles_to_ns(&tk->tkr_raw, cycles); 1346 } while (read_seqcount_retry(&tk_core.seq, seq)); 1347 1348 xtstamp->sys_realtime = ktime_add_ns(base_real, nsec_real); 1349 xtstamp->sys_monoraw = ktime_add_ns(base_raw, nsec_raw); 1350 1351 /* 1352 * Interpolate if necessary, adjusting back from the start of the 1353 * current interval 1354 */ 1355 if (do_interp) { 1356 u64 partial_history_cycles, total_history_cycles; 1357 bool discontinuity; 1358 1359 /* 1360 * Check that the counter value is not before the provided 1361 * history reference and that the history doesn't cross a 1362 * clocksource change 1363 */ 1364 if (!history_begin || 1365 !timestamp_in_interval(history_begin->cycles, 1366 cycles, system_counterval.cycles) || 1367 history_begin->cs_was_changed_seq != cs_was_changed_seq) 1368 return -EINVAL; 1369 partial_history_cycles = cycles - system_counterval.cycles; 1370 total_history_cycles = cycles - history_begin->cycles; 1371 discontinuity = 1372 history_begin->clock_was_set_seq != clock_was_set_seq; 1373 1374 ret = adjust_historical_crosststamp(history_begin, 1375 partial_history_cycles, 1376 total_history_cycles, 1377 discontinuity, xtstamp); 1378 if (ret) 1379 return ret; 1380 } 1381 1382 return 0; 1383 } 1384 EXPORT_SYMBOL_GPL(get_device_system_crosststamp); 1385 1386 /** 1387 * timekeeping_clocksource_has_base - Check whether the current clocksource 1388 * is based on given a base clock 1389 * @id: base clocksource ID 1390 * 1391 * Note: The return value is a snapshot which can become invalid right 1392 * after the function returns. 1393 * 1394 * Return: true if the timekeeper clocksource has a base clock with @id, 1395 * false otherwise 1396 */ 1397 bool timekeeping_clocksource_has_base(enum clocksource_ids id) 1398 { 1399 /* 1400 * This is a snapshot, so no point in using the sequence 1401 * count. Just prevent the compiler from re-evaluating @base as the 1402 * clocksource might change concurrently. 1403 */ 1404 struct clocksource_base *base = READ_ONCE(tk_core.timekeeper.tkr_mono.clock->base); 1405 1406 return base ? base->id == id : false; 1407 } 1408 EXPORT_SYMBOL_GPL(timekeeping_clocksource_has_base); 1409 1410 /** 1411 * do_settimeofday64 - Sets the time of day. 1412 * @ts: pointer to the timespec64 variable containing the new time 1413 * 1414 * Sets the time of day to the new time and update NTP and notify hrtimers 1415 */ 1416 int do_settimeofday64(const struct timespec64 *ts) 1417 { 1418 struct timespec64 ts_delta, xt; 1419 1420 if (!timespec64_valid_settod(ts)) 1421 return -EINVAL; 1422 1423 scoped_guard (raw_spinlock_irqsave, &tk_core.lock) { 1424 struct timekeeper *tks = &tk_core.shadow_timekeeper; 1425 1426 timekeeping_forward_now(tks); 1427 1428 xt = tk_xtime(tks); 1429 ts_delta = timespec64_sub(*ts, xt); 1430 1431 if (timespec64_compare(&tks->wall_to_monotonic, &ts_delta) > 0) { 1432 timekeeping_restore_shadow(&tk_core); 1433 return -EINVAL; 1434 } 1435 1436 tk_set_wall_to_mono(tks, timespec64_sub(tks->wall_to_monotonic, ts_delta)); 1437 tk_set_xtime(tks, ts); 1438 timekeeping_update_from_shadow(&tk_core, TK_UPDATE_ALL); 1439 } 1440 1441 /* Signal hrtimers about time change */ 1442 clock_was_set(CLOCK_SET_WALL); 1443 1444 audit_tk_injoffset(ts_delta); 1445 add_device_randomness(ts, sizeof(*ts)); 1446 return 0; 1447 } 1448 EXPORT_SYMBOL(do_settimeofday64); 1449 1450 static inline bool timekeeper_is_core_tk(struct timekeeper *tk) 1451 { 1452 return !IS_ENABLED(CONFIG_POSIX_AUX_CLOCKS) || tk->id == TIMEKEEPER_CORE; 1453 } 1454 1455 /** 1456 * __timekeeping_inject_offset - Adds or subtracts from the current time. 1457 * @tkd: Pointer to the timekeeper to modify 1458 * @ts: Pointer to the timespec variable containing the offset 1459 * 1460 * Adds or subtracts an offset value from the current time. 1461 */ 1462 static int __timekeeping_inject_offset(struct tk_data *tkd, const struct timespec64 *ts) 1463 { 1464 struct timekeeper *tks = &tkd->shadow_timekeeper; 1465 struct timespec64 tmp; 1466 1467 if (ts->tv_nsec < 0 || ts->tv_nsec >= NSEC_PER_SEC) 1468 return -EINVAL; 1469 1470 timekeeping_forward_now(tks); 1471 1472 if (timekeeper_is_core_tk(tks)) { 1473 /* Make sure the proposed value is valid */ 1474 tmp = timespec64_add(tk_xtime(tks), *ts); 1475 if (timespec64_compare(&tks->wall_to_monotonic, ts) > 0 || 1476 !timespec64_valid_settod(&tmp)) { 1477 timekeeping_restore_shadow(tkd); 1478 return -EINVAL; 1479 } 1480 1481 tk_xtime_add(tks, ts); 1482 tk_set_wall_to_mono(tks, timespec64_sub(tks->wall_to_monotonic, *ts)); 1483 } else { 1484 struct tk_read_base *tkr_mono = &tks->tkr_mono; 1485 ktime_t now, offs; 1486 1487 /* Get the current time */ 1488 now = ktime_add_ns(tkr_mono->base, timekeeping_get_ns(tkr_mono)); 1489 /* Add the relative offset change */ 1490 offs = ktime_add(tks->offs_aux, timespec64_to_ktime(*ts)); 1491 1492 /* Prevent that the resulting time becomes negative */ 1493 if (ktime_add(now, offs) < 0) { 1494 timekeeping_restore_shadow(tkd); 1495 return -EINVAL; 1496 } 1497 tks->offs_aux = offs; 1498 } 1499 1500 timekeeping_update_from_shadow(tkd, TK_UPDATE_ALL); 1501 return 0; 1502 } 1503 1504 static int timekeeping_inject_offset(const struct timespec64 *ts) 1505 { 1506 int ret; 1507 1508 scoped_guard (raw_spinlock_irqsave, &tk_core.lock) 1509 ret = __timekeeping_inject_offset(&tk_core, ts); 1510 1511 /* Signal hrtimers about time change */ 1512 if (!ret) 1513 clock_was_set(CLOCK_SET_WALL); 1514 return ret; 1515 } 1516 1517 /* 1518 * Indicates if there is an offset between the system clock and the hardware 1519 * clock/persistent clock/rtc. 1520 */ 1521 int persistent_clock_is_local; 1522 1523 /* 1524 * Adjust the time obtained from the CMOS to be UTC time instead of 1525 * local time. 1526 * 1527 * This is ugly, but preferable to the alternatives. Otherwise we 1528 * would either need to write a program to do it in /etc/rc (and risk 1529 * confusion if the program gets run more than once; it would also be 1530 * hard to make the program warp the clock precisely n hours) or 1531 * compile in the timezone information into the kernel. Bad, bad.... 1532 * 1533 * - TYT, 1992-01-01 1534 * 1535 * The best thing to do is to keep the CMOS clock in universal time (UTC) 1536 * as real UNIX machines always do it. This avoids all headaches about 1537 * daylight saving times and warping kernel clocks. 1538 */ 1539 void timekeeping_warp_clock(void) 1540 { 1541 if (sys_tz.tz_minuteswest != 0) { 1542 struct timespec64 adjust; 1543 1544 persistent_clock_is_local = 1; 1545 adjust.tv_sec = sys_tz.tz_minuteswest * 60; 1546 adjust.tv_nsec = 0; 1547 timekeeping_inject_offset(&adjust); 1548 } 1549 } 1550 1551 /* 1552 * __timekeeping_set_tai_offset - Sets the TAI offset from UTC and monotonic 1553 */ 1554 static void __timekeeping_set_tai_offset(struct timekeeper *tk, s32 tai_offset) 1555 { 1556 tk->tai_offset = tai_offset; 1557 tk->offs_tai = ktime_add(tk->offs_real, ktime_set(tai_offset, 0)); 1558 } 1559 1560 /* 1561 * change_clocksource - Swaps clocksources if a new one is available 1562 * 1563 * Accumulates current time interval and initializes new clocksource 1564 */ 1565 static int change_clocksource(void *data) 1566 { 1567 struct clocksource *new = data, *old = NULL; 1568 1569 /* 1570 * If the clocksource is in a module, get a module reference. 1571 * Succeeds for built-in code (owner == NULL) as well. Abort if the 1572 * reference can't be acquired. 1573 */ 1574 if (!try_module_get(new->owner)) 1575 return 0; 1576 1577 /* Abort if the device can't be enabled */ 1578 if (new->enable && new->enable(new) != 0) { 1579 module_put(new->owner); 1580 return 0; 1581 } 1582 1583 scoped_guard (raw_spinlock_irqsave, &tk_core.lock) { 1584 struct timekeeper *tks = &tk_core.shadow_timekeeper; 1585 1586 timekeeping_forward_now(tks); 1587 old = tks->tkr_mono.clock; 1588 tk_setup_internals(tks, new); 1589 timekeeping_update_from_shadow(&tk_core, TK_UPDATE_ALL); 1590 } 1591 1592 tk_aux_update_clocksource(); 1593 1594 if (old) { 1595 if (old->disable) 1596 old->disable(old); 1597 module_put(old->owner); 1598 } 1599 1600 return 0; 1601 } 1602 1603 /** 1604 * timekeeping_notify - Install a new clock source 1605 * @clock: pointer to the clock source 1606 * 1607 * This function is called from clocksource.c after a new, better clock 1608 * source has been registered. The caller holds the clocksource_mutex. 1609 */ 1610 int timekeeping_notify(struct clocksource *clock) 1611 { 1612 struct timekeeper *tk = &tk_core.timekeeper; 1613 1614 if (tk->tkr_mono.clock == clock) 1615 return 0; 1616 stop_machine(change_clocksource, clock, NULL); 1617 tick_clock_notify(); 1618 return tk->tkr_mono.clock == clock ? 0 : -1; 1619 } 1620 1621 /** 1622 * ktime_get_raw_ts64 - Returns the raw monotonic time in a timespec 1623 * @ts: pointer to the timespec64 to be set 1624 * 1625 * Returns the raw monotonic time (completely un-modified by ntp) 1626 */ 1627 void ktime_get_raw_ts64(struct timespec64 *ts) 1628 { 1629 struct timekeeper *tk = &tk_core.timekeeper; 1630 unsigned int seq; 1631 u64 nsecs; 1632 1633 do { 1634 seq = read_seqcount_begin(&tk_core.seq); 1635 ts->tv_sec = tk->raw_sec; 1636 nsecs = timekeeping_get_ns(&tk->tkr_raw); 1637 1638 } while (read_seqcount_retry(&tk_core.seq, seq)); 1639 1640 ts->tv_nsec = 0; 1641 timespec64_add_ns(ts, nsecs); 1642 } 1643 EXPORT_SYMBOL(ktime_get_raw_ts64); 1644 1645 /** 1646 * ktime_get_clock_ts64 - Returns time of a clock in a timespec 1647 * @id: POSIX clock ID of the clock to read 1648 * @ts: Pointer to the timespec64 to be set 1649 * 1650 * The timestamp is invalidated (@ts->sec is set to -1) if the 1651 * clock @id is not available. 1652 */ 1653 void ktime_get_clock_ts64(clockid_t id, struct timespec64 *ts) 1654 { 1655 /* Invalidate time stamp */ 1656 ts->tv_sec = -1; 1657 ts->tv_nsec = 0; 1658 1659 switch (id) { 1660 case CLOCK_REALTIME: 1661 ktime_get_real_ts64(ts); 1662 return; 1663 case CLOCK_MONOTONIC: 1664 ktime_get_ts64(ts); 1665 return; 1666 case CLOCK_MONOTONIC_RAW: 1667 ktime_get_raw_ts64(ts); 1668 return; 1669 case CLOCK_AUX ... CLOCK_AUX_LAST: 1670 if (IS_ENABLED(CONFIG_POSIX_AUX_CLOCKS)) 1671 ktime_get_aux_ts64(id, ts); 1672 return; 1673 default: 1674 WARN_ON_ONCE(1); 1675 } 1676 } 1677 EXPORT_SYMBOL_GPL(ktime_get_clock_ts64); 1678 1679 /** 1680 * timekeeping_valid_for_hres - Check if timekeeping is suitable for hres 1681 */ 1682 int timekeeping_valid_for_hres(void) 1683 { 1684 struct timekeeper *tk = &tk_core.timekeeper; 1685 unsigned int seq; 1686 int ret; 1687 1688 do { 1689 seq = read_seqcount_begin(&tk_core.seq); 1690 1691 ret = tk->tkr_mono.clock->flags & CLOCK_SOURCE_VALID_FOR_HRES; 1692 1693 } while (read_seqcount_retry(&tk_core.seq, seq)); 1694 1695 return ret; 1696 } 1697 1698 /** 1699 * timekeeping_max_deferment - Returns max time the clocksource can be deferred 1700 */ 1701 u64 timekeeping_max_deferment(void) 1702 { 1703 struct timekeeper *tk = &tk_core.timekeeper; 1704 unsigned int seq; 1705 u64 ret; 1706 1707 do { 1708 seq = read_seqcount_begin(&tk_core.seq); 1709 1710 ret = tk->tkr_mono.clock->max_idle_ns; 1711 1712 } while (read_seqcount_retry(&tk_core.seq, seq)); 1713 1714 return ret; 1715 } 1716 1717 /** 1718 * read_persistent_clock64 - Return time from the persistent clock. 1719 * @ts: Pointer to the storage for the readout value 1720 * 1721 * Weak dummy function for arches that do not yet support it. 1722 * Reads the time from the battery backed persistent clock. 1723 * Returns a timespec with tv_sec=0 and tv_nsec=0 if unsupported. 1724 * 1725 * XXX - Do be sure to remove it once all arches implement it. 1726 */ 1727 void __weak read_persistent_clock64(struct timespec64 *ts) 1728 { 1729 ts->tv_sec = 0; 1730 ts->tv_nsec = 0; 1731 } 1732 1733 /** 1734 * read_persistent_wall_and_boot_offset - Read persistent clock, and also offset 1735 * from the boot. 1736 * @wall_time: current time as returned by persistent clock 1737 * @boot_offset: offset that is defined as wall_time - boot_time 1738 * 1739 * Weak dummy function for arches that do not yet support it. 1740 * 1741 * The default function calculates offset based on the current value of 1742 * local_clock(). This way architectures that support sched_clock() but don't 1743 * support dedicated boot time clock will provide the best estimate of the 1744 * boot time. 1745 */ 1746 void __weak __init 1747 read_persistent_wall_and_boot_offset(struct timespec64 *wall_time, 1748 struct timespec64 *boot_offset) 1749 { 1750 read_persistent_clock64(wall_time); 1751 *boot_offset = ns_to_timespec64(local_clock()); 1752 } 1753 1754 static __init void tkd_basic_setup(struct tk_data *tkd, enum timekeeper_ids tk_id, bool valid) 1755 { 1756 raw_spin_lock_init(&tkd->lock); 1757 seqcount_raw_spinlock_init(&tkd->seq, &tkd->lock); 1758 tkd->timekeeper.id = tkd->shadow_timekeeper.id = tk_id; 1759 tkd->timekeeper.clock_valid = tkd->shadow_timekeeper.clock_valid = valid; 1760 } 1761 1762 /* 1763 * Flag reflecting whether timekeeping_resume() has injected sleeptime. 1764 * 1765 * The flag starts of false and is only set when a suspend reaches 1766 * timekeeping_suspend(), timekeeping_resume() sets it to false when the 1767 * timekeeper clocksource is not stopping across suspend and has been 1768 * used to update sleep time. If the timekeeper clocksource has stopped 1769 * then the flag stays true and is used by the RTC resume code to decide 1770 * whether sleeptime must be injected and if so the flag gets false then. 1771 * 1772 * If a suspend fails before reaching timekeeping_resume() then the flag 1773 * stays false and prevents erroneous sleeptime injection. 1774 */ 1775 static bool suspend_timing_needed; 1776 1777 /* Flag for if there is a persistent clock on this platform */ 1778 static bool persistent_clock_exists; 1779 1780 /* 1781 * timekeeping_init - Initializes the clocksource and common timekeeping values 1782 */ 1783 void __init timekeeping_init(void) 1784 { 1785 struct timespec64 wall_time, boot_offset, wall_to_mono; 1786 struct timekeeper *tks = &tk_core.shadow_timekeeper; 1787 struct clocksource *clock; 1788 1789 tkd_basic_setup(&tk_core, TIMEKEEPER_CORE, true); 1790 tk_aux_setup(); 1791 1792 read_persistent_wall_and_boot_offset(&wall_time, &boot_offset); 1793 if (timespec64_valid_settod(&wall_time) && 1794 timespec64_to_ns(&wall_time) > 0) { 1795 persistent_clock_exists = true; 1796 } else if (timespec64_to_ns(&wall_time) != 0) { 1797 pr_warn("Persistent clock returned invalid value"); 1798 wall_time = (struct timespec64){0}; 1799 } 1800 1801 if (timespec64_compare(&wall_time, &boot_offset) < 0) 1802 boot_offset = (struct timespec64){0}; 1803 1804 /* 1805 * We want set wall_to_mono, so the following is true: 1806 * wall time + wall_to_mono = boot time 1807 */ 1808 wall_to_mono = timespec64_sub(boot_offset, wall_time); 1809 1810 guard(raw_spinlock_irqsave)(&tk_core.lock); 1811 1812 ntp_init(); 1813 1814 clock = clocksource_default_clock(); 1815 if (clock->enable) 1816 clock->enable(clock); 1817 tk_setup_internals(tks, clock); 1818 1819 tk_set_xtime(tks, &wall_time); 1820 tks->raw_sec = 0; 1821 1822 tk_set_wall_to_mono(tks, wall_to_mono); 1823 1824 timekeeping_update_from_shadow(&tk_core, TK_CLOCK_WAS_SET); 1825 } 1826 1827 /* time in seconds when suspend began for persistent clock */ 1828 static struct timespec64 timekeeping_suspend_time; 1829 1830 /** 1831 * __timekeeping_inject_sleeptime - Internal function to add sleep interval 1832 * @tk: Pointer to the timekeeper to be updated 1833 * @delta: Pointer to the delta value in timespec64 format 1834 * 1835 * Takes a timespec offset measuring a suspend interval and properly 1836 * adds the sleep offset to the timekeeping variables. 1837 */ 1838 static void __timekeeping_inject_sleeptime(struct timekeeper *tk, 1839 const struct timespec64 *delta) 1840 { 1841 if (!timespec64_valid_strict(delta)) { 1842 printk_deferred(KERN_WARNING 1843 "__timekeeping_inject_sleeptime: Invalid " 1844 "sleep delta value!\n"); 1845 return; 1846 } 1847 tk_xtime_add(tk, delta); 1848 tk_set_wall_to_mono(tk, timespec64_sub(tk->wall_to_monotonic, *delta)); 1849 tk_update_sleep_time(tk, timespec64_to_ktime(*delta)); 1850 tk_debug_account_sleep_time(delta); 1851 } 1852 1853 #if defined(CONFIG_PM_SLEEP) && defined(CONFIG_RTC_HCTOSYS_DEVICE) 1854 /* 1855 * We have three kinds of time sources to use for sleep time 1856 * injection, the preference order is: 1857 * 1) non-stop clocksource 1858 * 2) persistent clock (ie: RTC accessible when irqs are off) 1859 * 3) RTC 1860 * 1861 * 1) and 2) are used by timekeeping, 3) by RTC subsystem. 1862 * If system has neither 1) nor 2), 3) will be used finally. 1863 * 1864 * 1865 * If timekeeping has injected sleeptime via either 1) or 2), 1866 * 3) becomes needless, so in this case we don't need to call 1867 * rtc_resume(), and this is what timekeeping_rtc_skipresume() 1868 * means. 1869 */ 1870 bool timekeeping_rtc_skipresume(void) 1871 { 1872 return !suspend_timing_needed; 1873 } 1874 1875 /* 1876 * 1) can be determined whether to use or not only when doing 1877 * timekeeping_resume() which is invoked after rtc_suspend(), 1878 * so we can't skip rtc_suspend() surely if system has 1). 1879 * 1880 * But if system has 2), 2) will definitely be used, so in this 1881 * case we don't need to call rtc_suspend(), and this is what 1882 * timekeeping_rtc_skipsuspend() means. 1883 */ 1884 bool timekeeping_rtc_skipsuspend(void) 1885 { 1886 return persistent_clock_exists; 1887 } 1888 1889 /** 1890 * timekeeping_inject_sleeptime64 - Adds suspend interval to timeekeeping values 1891 * @delta: pointer to a timespec64 delta value 1892 * 1893 * This hook is for architectures that cannot support read_persistent_clock64 1894 * because their RTC/persistent clock is only accessible when irqs are enabled. 1895 * and also don't have an effective nonstop clocksource. 1896 * 1897 * This function should only be called by rtc_resume(), and allows 1898 * a suspend offset to be injected into the timekeeping values. 1899 */ 1900 void timekeeping_inject_sleeptime64(const struct timespec64 *delta) 1901 { 1902 scoped_guard(raw_spinlock_irqsave, &tk_core.lock) { 1903 struct timekeeper *tks = &tk_core.shadow_timekeeper; 1904 1905 suspend_timing_needed = false; 1906 timekeeping_forward_now(tks); 1907 __timekeeping_inject_sleeptime(tks, delta); 1908 timekeeping_update_from_shadow(&tk_core, TK_UPDATE_ALL); 1909 } 1910 1911 /* Signal hrtimers about time change */ 1912 clock_was_set(CLOCK_SET_WALL | CLOCK_SET_BOOT); 1913 } 1914 #endif 1915 1916 /** 1917 * timekeeping_resume - Resumes the generic timekeeping subsystem. 1918 */ 1919 void timekeeping_resume(void) 1920 { 1921 struct timekeeper *tks = &tk_core.shadow_timekeeper; 1922 struct clocksource *clock = tks->tkr_mono.clock; 1923 struct timespec64 ts_new, ts_delta; 1924 bool inject_sleeptime = false; 1925 u64 cycle_now, nsec; 1926 unsigned long flags; 1927 1928 read_persistent_clock64(&ts_new); 1929 1930 clockevents_resume(); 1931 clocksource_resume(); 1932 1933 raw_spin_lock_irqsave(&tk_core.lock, flags); 1934 1935 /* 1936 * After system resumes, we need to calculate the suspended time and 1937 * compensate it for the OS time. There are 3 sources that could be 1938 * used: Nonstop clocksource during suspend, persistent clock and rtc 1939 * device. 1940 * 1941 * One specific platform may have 1 or 2 or all of them, and the 1942 * preference will be: 1943 * suspend-nonstop clocksource -> persistent clock -> rtc 1944 * The less preferred source will only be tried if there is no better 1945 * usable source. The rtc part is handled separately in rtc core code. 1946 */ 1947 cycle_now = tk_clock_read(&tks->tkr_mono); 1948 nsec = clocksource_stop_suspend_timing(clock, cycle_now); 1949 if (nsec > 0) { 1950 ts_delta = ns_to_timespec64(nsec); 1951 inject_sleeptime = true; 1952 } else if (timespec64_compare(&ts_new, &timekeeping_suspend_time) > 0) { 1953 ts_delta = timespec64_sub(ts_new, timekeeping_suspend_time); 1954 inject_sleeptime = true; 1955 } 1956 1957 if (inject_sleeptime) { 1958 suspend_timing_needed = false; 1959 __timekeeping_inject_sleeptime(tks, &ts_delta); 1960 } 1961 1962 /* Re-base the last cycle value */ 1963 tks->tkr_mono.cycle_last = cycle_now; 1964 tks->tkr_raw.cycle_last = cycle_now; 1965 1966 tks->ntp_error = 0; 1967 timekeeping_suspended = 0; 1968 timekeeping_update_from_shadow(&tk_core, TK_CLOCK_WAS_SET); 1969 raw_spin_unlock_irqrestore(&tk_core.lock, flags); 1970 1971 touch_softlockup_watchdog(); 1972 1973 /* Resume the clockevent device(s) and hrtimers */ 1974 tick_resume(); 1975 /* Notify timerfd as resume is equivalent to clock_was_set() */ 1976 timerfd_resume(); 1977 } 1978 1979 int timekeeping_suspend(void) 1980 { 1981 struct timekeeper *tks = &tk_core.shadow_timekeeper; 1982 struct timespec64 delta, delta_delta; 1983 static struct timespec64 old_delta; 1984 struct clocksource *curr_clock; 1985 unsigned long flags; 1986 u64 cycle_now; 1987 1988 read_persistent_clock64(&timekeeping_suspend_time); 1989 1990 /* 1991 * On some systems the persistent_clock can not be detected at 1992 * timekeeping_init by its return value, so if we see a valid 1993 * value returned, update the persistent_clock_exists flag. 1994 */ 1995 if (timekeeping_suspend_time.tv_sec || timekeeping_suspend_time.tv_nsec) 1996 persistent_clock_exists = true; 1997 1998 suspend_timing_needed = true; 1999 2000 raw_spin_lock_irqsave(&tk_core.lock, flags); 2001 timekeeping_forward_now(tks); 2002 timekeeping_suspended = 1; 2003 2004 /* 2005 * Since we've called forward_now, cycle_last stores the value 2006 * just read from the current clocksource. Save this to potentially 2007 * use in suspend timing. 2008 */ 2009 curr_clock = tks->tkr_mono.clock; 2010 cycle_now = tks->tkr_mono.cycle_last; 2011 clocksource_start_suspend_timing(curr_clock, cycle_now); 2012 2013 if (persistent_clock_exists) { 2014 /* 2015 * To avoid drift caused by repeated suspend/resumes, 2016 * which each can add ~1 second drift error, 2017 * try to compensate so the difference in system time 2018 * and persistent_clock time stays close to constant. 2019 */ 2020 delta = timespec64_sub(tk_xtime(tks), timekeeping_suspend_time); 2021 delta_delta = timespec64_sub(delta, old_delta); 2022 if (abs(delta_delta.tv_sec) >= 2) { 2023 /* 2024 * if delta_delta is too large, assume time correction 2025 * has occurred and set old_delta to the current delta. 2026 */ 2027 old_delta = delta; 2028 } else { 2029 /* Otherwise try to adjust old_system to compensate */ 2030 timekeeping_suspend_time = 2031 timespec64_add(timekeeping_suspend_time, delta_delta); 2032 } 2033 } 2034 2035 timekeeping_update_from_shadow(&tk_core, 0); 2036 halt_fast_timekeeper(tks); 2037 raw_spin_unlock_irqrestore(&tk_core.lock, flags); 2038 2039 tick_suspend(); 2040 clocksource_suspend(); 2041 clockevents_suspend(); 2042 2043 return 0; 2044 } 2045 2046 /* sysfs resume/suspend bits for timekeeping */ 2047 static struct syscore_ops timekeeping_syscore_ops = { 2048 .resume = timekeeping_resume, 2049 .suspend = timekeeping_suspend, 2050 }; 2051 2052 static int __init timekeeping_init_ops(void) 2053 { 2054 register_syscore_ops(&timekeeping_syscore_ops); 2055 return 0; 2056 } 2057 device_initcall(timekeeping_init_ops); 2058 2059 /* 2060 * Apply a multiplier adjustment to the timekeeper 2061 */ 2062 static __always_inline void timekeeping_apply_adjustment(struct timekeeper *tk, 2063 s64 offset, 2064 s32 mult_adj) 2065 { 2066 s64 interval = tk->cycle_interval; 2067 2068 if (mult_adj == 0) { 2069 return; 2070 } else if (mult_adj == -1) { 2071 interval = -interval; 2072 offset = -offset; 2073 } else if (mult_adj != 1) { 2074 interval *= mult_adj; 2075 offset *= mult_adj; 2076 } 2077 2078 /* 2079 * So the following can be confusing. 2080 * 2081 * To keep things simple, lets assume mult_adj == 1 for now. 2082 * 2083 * When mult_adj != 1, remember that the interval and offset values 2084 * have been appropriately scaled so the math is the same. 2085 * 2086 * The basic idea here is that we're increasing the multiplier 2087 * by one, this causes the xtime_interval to be incremented by 2088 * one cycle_interval. This is because: 2089 * xtime_interval = cycle_interval * mult 2090 * So if mult is being incremented by one: 2091 * xtime_interval = cycle_interval * (mult + 1) 2092 * Its the same as: 2093 * xtime_interval = (cycle_interval * mult) + cycle_interval 2094 * Which can be shortened to: 2095 * xtime_interval += cycle_interval 2096 * 2097 * So offset stores the non-accumulated cycles. Thus the current 2098 * time (in shifted nanoseconds) is: 2099 * now = (offset * adj) + xtime_nsec 2100 * Now, even though we're adjusting the clock frequency, we have 2101 * to keep time consistent. In other words, we can't jump back 2102 * in time, and we also want to avoid jumping forward in time. 2103 * 2104 * So given the same offset value, we need the time to be the same 2105 * both before and after the freq adjustment. 2106 * now = (offset * adj_1) + xtime_nsec_1 2107 * now = (offset * adj_2) + xtime_nsec_2 2108 * So: 2109 * (offset * adj_1) + xtime_nsec_1 = 2110 * (offset * adj_2) + xtime_nsec_2 2111 * And we know: 2112 * adj_2 = adj_1 + 1 2113 * So: 2114 * (offset * adj_1) + xtime_nsec_1 = 2115 * (offset * (adj_1+1)) + xtime_nsec_2 2116 * (offset * adj_1) + xtime_nsec_1 = 2117 * (offset * adj_1) + offset + xtime_nsec_2 2118 * Canceling the sides: 2119 * xtime_nsec_1 = offset + xtime_nsec_2 2120 * Which gives us: 2121 * xtime_nsec_2 = xtime_nsec_1 - offset 2122 * Which simplifies to: 2123 * xtime_nsec -= offset 2124 */ 2125 if ((mult_adj > 0) && (tk->tkr_mono.mult + mult_adj < mult_adj)) { 2126 /* NTP adjustment caused clocksource mult overflow */ 2127 WARN_ON_ONCE(1); 2128 return; 2129 } 2130 2131 tk->tkr_mono.mult += mult_adj; 2132 tk->xtime_interval += interval; 2133 tk->tkr_mono.xtime_nsec -= offset; 2134 } 2135 2136 /* 2137 * Adjust the timekeeper's multiplier to the correct frequency 2138 * and also to reduce the accumulated error value. 2139 */ 2140 static void timekeeping_adjust(struct timekeeper *tk, s64 offset) 2141 { 2142 u64 ntp_tl = ntp_tick_length(tk->id); 2143 u32 mult; 2144 2145 /* 2146 * Determine the multiplier from the current NTP tick length. 2147 * Avoid expensive division when the tick length doesn't change. 2148 */ 2149 if (likely(tk->ntp_tick == ntp_tl)) { 2150 mult = tk->tkr_mono.mult - tk->ntp_err_mult; 2151 } else { 2152 tk->ntp_tick = ntp_tl; 2153 mult = div64_u64((tk->ntp_tick >> tk->ntp_error_shift) - 2154 tk->xtime_remainder, tk->cycle_interval); 2155 } 2156 2157 /* 2158 * If the clock is behind the NTP time, increase the multiplier by 1 2159 * to catch up with it. If it's ahead and there was a remainder in the 2160 * tick division, the clock will slow down. Otherwise it will stay 2161 * ahead until the tick length changes to a non-divisible value. 2162 */ 2163 tk->ntp_err_mult = tk->ntp_error > 0 ? 1 : 0; 2164 mult += tk->ntp_err_mult; 2165 2166 timekeeping_apply_adjustment(tk, offset, mult - tk->tkr_mono.mult); 2167 2168 if (unlikely(tk->tkr_mono.clock->maxadj && 2169 (abs(tk->tkr_mono.mult - tk->tkr_mono.clock->mult) 2170 > tk->tkr_mono.clock->maxadj))) { 2171 printk_once(KERN_WARNING 2172 "Adjusting %s more than 11%% (%ld vs %ld)\n", 2173 tk->tkr_mono.clock->name, (long)tk->tkr_mono.mult, 2174 (long)tk->tkr_mono.clock->mult + tk->tkr_mono.clock->maxadj); 2175 } 2176 2177 /* 2178 * It may be possible that when we entered this function, xtime_nsec 2179 * was very small. Further, if we're slightly speeding the clocksource 2180 * in the code above, its possible the required corrective factor to 2181 * xtime_nsec could cause it to underflow. 2182 * 2183 * Now, since we have already accumulated the second and the NTP 2184 * subsystem has been notified via second_overflow(), we need to skip 2185 * the next update. 2186 */ 2187 if (unlikely((s64)tk->tkr_mono.xtime_nsec < 0)) { 2188 tk->tkr_mono.xtime_nsec += (u64)NSEC_PER_SEC << 2189 tk->tkr_mono.shift; 2190 tk->xtime_sec--; 2191 tk->skip_second_overflow = 1; 2192 } 2193 } 2194 2195 /* 2196 * accumulate_nsecs_to_secs - Accumulates nsecs into secs 2197 * 2198 * Helper function that accumulates the nsecs greater than a second 2199 * from the xtime_nsec field to the xtime_secs field. 2200 * It also calls into the NTP code to handle leapsecond processing. 2201 */ 2202 static inline unsigned int accumulate_nsecs_to_secs(struct timekeeper *tk) 2203 { 2204 u64 nsecps = (u64)NSEC_PER_SEC << tk->tkr_mono.shift; 2205 unsigned int clock_set = 0; 2206 2207 while (tk->tkr_mono.xtime_nsec >= nsecps) { 2208 int leap; 2209 2210 tk->tkr_mono.xtime_nsec -= nsecps; 2211 tk->xtime_sec++; 2212 2213 /* 2214 * Skip NTP update if this second was accumulated before, 2215 * i.e. xtime_nsec underflowed in timekeeping_adjust() 2216 */ 2217 if (unlikely(tk->skip_second_overflow)) { 2218 tk->skip_second_overflow = 0; 2219 continue; 2220 } 2221 2222 /* Figure out if its a leap sec and apply if needed */ 2223 leap = second_overflow(tk->id, tk->xtime_sec); 2224 if (unlikely(leap)) { 2225 struct timespec64 ts; 2226 2227 tk->xtime_sec += leap; 2228 2229 ts.tv_sec = leap; 2230 ts.tv_nsec = 0; 2231 tk_set_wall_to_mono(tk, 2232 timespec64_sub(tk->wall_to_monotonic, ts)); 2233 2234 __timekeeping_set_tai_offset(tk, tk->tai_offset - leap); 2235 2236 clock_set = TK_CLOCK_WAS_SET; 2237 } 2238 } 2239 return clock_set; 2240 } 2241 2242 /* 2243 * logarithmic_accumulation - shifted accumulation of cycles 2244 * 2245 * This functions accumulates a shifted interval of cycles into 2246 * a shifted interval nanoseconds. Allows for O(log) accumulation 2247 * loop. 2248 * 2249 * Returns the unconsumed cycles. 2250 */ 2251 static u64 logarithmic_accumulation(struct timekeeper *tk, u64 offset, 2252 u32 shift, unsigned int *clock_set) 2253 { 2254 u64 interval = tk->cycle_interval << shift; 2255 u64 snsec_per_sec; 2256 2257 /* If the offset is smaller than a shifted interval, do nothing */ 2258 if (offset < interval) 2259 return offset; 2260 2261 /* Accumulate one shifted interval */ 2262 offset -= interval; 2263 tk->tkr_mono.cycle_last += interval; 2264 tk->tkr_raw.cycle_last += interval; 2265 2266 tk->tkr_mono.xtime_nsec += tk->xtime_interval << shift; 2267 *clock_set |= accumulate_nsecs_to_secs(tk); 2268 2269 /* Accumulate raw time */ 2270 tk->tkr_raw.xtime_nsec += tk->raw_interval << shift; 2271 snsec_per_sec = (u64)NSEC_PER_SEC << tk->tkr_raw.shift; 2272 while (tk->tkr_raw.xtime_nsec >= snsec_per_sec) { 2273 tk->tkr_raw.xtime_nsec -= snsec_per_sec; 2274 tk->raw_sec++; 2275 } 2276 2277 /* Accumulate error between NTP and clock interval */ 2278 tk->ntp_error += tk->ntp_tick << shift; 2279 tk->ntp_error -= (tk->xtime_interval + tk->xtime_remainder) << 2280 (tk->ntp_error_shift + shift); 2281 2282 return offset; 2283 } 2284 2285 /* 2286 * timekeeping_advance - Updates the timekeeper to the current time and 2287 * current NTP tick length 2288 */ 2289 static bool __timekeeping_advance(struct tk_data *tkd, enum timekeeping_adv_mode mode) 2290 { 2291 struct timekeeper *tk = &tkd->shadow_timekeeper; 2292 struct timekeeper *real_tk = &tkd->timekeeper; 2293 unsigned int clock_set = 0; 2294 int shift = 0, maxshift; 2295 u64 offset, orig_offset; 2296 2297 /* Make sure we're fully resumed: */ 2298 if (unlikely(timekeeping_suspended)) 2299 return false; 2300 2301 offset = clocksource_delta(tk_clock_read(&tk->tkr_mono), 2302 tk->tkr_mono.cycle_last, tk->tkr_mono.mask, 2303 tk->tkr_mono.clock->max_raw_delta); 2304 orig_offset = offset; 2305 /* Check if there's really nothing to do */ 2306 if (offset < real_tk->cycle_interval && mode == TK_ADV_TICK) 2307 return false; 2308 2309 /* 2310 * With NO_HZ we may have to accumulate many cycle_intervals 2311 * (think "ticks") worth of time at once. To do this efficiently, 2312 * we calculate the largest doubling multiple of cycle_intervals 2313 * that is smaller than the offset. We then accumulate that 2314 * chunk in one go, and then try to consume the next smaller 2315 * doubled multiple. 2316 */ 2317 shift = ilog2(offset) - ilog2(tk->cycle_interval); 2318 shift = max(0, shift); 2319 /* Bound shift to one less than what overflows tick_length */ 2320 maxshift = (64 - (ilog2(ntp_tick_length(tk->id)) + 1)) - 1; 2321 shift = min(shift, maxshift); 2322 while (offset >= tk->cycle_interval) { 2323 offset = logarithmic_accumulation(tk, offset, shift, &clock_set); 2324 if (offset < tk->cycle_interval<<shift) 2325 shift--; 2326 } 2327 2328 /* Adjust the multiplier to correct NTP error */ 2329 timekeeping_adjust(tk, offset); 2330 2331 /* 2332 * Finally, make sure that after the rounding 2333 * xtime_nsec isn't larger than NSEC_PER_SEC 2334 */ 2335 clock_set |= accumulate_nsecs_to_secs(tk); 2336 2337 /* 2338 * To avoid inconsistencies caused adjtimex TK_ADV_FREQ calls 2339 * making small negative adjustments to the base xtime_nsec 2340 * value, only update the coarse clocks if we accumulated time 2341 */ 2342 if (orig_offset != offset) 2343 tk_update_coarse_nsecs(tk); 2344 2345 timekeeping_update_from_shadow(tkd, clock_set); 2346 2347 return !!clock_set; 2348 } 2349 2350 static bool timekeeping_advance(enum timekeeping_adv_mode mode) 2351 { 2352 guard(raw_spinlock_irqsave)(&tk_core.lock); 2353 return __timekeeping_advance(&tk_core, mode); 2354 } 2355 2356 /** 2357 * update_wall_time - Uses the current clocksource to increment the wall time 2358 * 2359 * It also updates the enabled auxiliary clock timekeepers 2360 */ 2361 void update_wall_time(void) 2362 { 2363 if (timekeeping_advance(TK_ADV_TICK)) 2364 clock_was_set_delayed(); 2365 tk_aux_advance(); 2366 } 2367 2368 /** 2369 * getboottime64 - Return the real time of system boot. 2370 * @ts: pointer to the timespec64 to be set 2371 * 2372 * Returns the wall-time of boot in a timespec64. 2373 * 2374 * This is based on the wall_to_monotonic offset and the total suspend 2375 * time. Calls to settimeofday will affect the value returned (which 2376 * basically means that however wrong your real time clock is at boot time, 2377 * you get the right time here). 2378 */ 2379 void getboottime64(struct timespec64 *ts) 2380 { 2381 struct timekeeper *tk = &tk_core.timekeeper; 2382 ktime_t t = ktime_sub(tk->offs_real, tk->offs_boot); 2383 2384 *ts = ktime_to_timespec64(t); 2385 } 2386 EXPORT_SYMBOL_GPL(getboottime64); 2387 2388 void ktime_get_coarse_real_ts64(struct timespec64 *ts) 2389 { 2390 struct timekeeper *tk = &tk_core.timekeeper; 2391 unsigned int seq; 2392 2393 do { 2394 seq = read_seqcount_begin(&tk_core.seq); 2395 2396 *ts = tk_xtime_coarse(tk); 2397 } while (read_seqcount_retry(&tk_core.seq, seq)); 2398 } 2399 EXPORT_SYMBOL(ktime_get_coarse_real_ts64); 2400 2401 /** 2402 * ktime_get_coarse_real_ts64_mg - return latter of coarse grained time or floor 2403 * @ts: timespec64 to be filled 2404 * 2405 * Fetch the global mg_floor value, convert it to realtime and compare it 2406 * to the current coarse-grained time. Fill @ts with whichever is 2407 * latest. Note that this is a filesystem-specific interface and should be 2408 * avoided outside of that context. 2409 */ 2410 void ktime_get_coarse_real_ts64_mg(struct timespec64 *ts) 2411 { 2412 struct timekeeper *tk = &tk_core.timekeeper; 2413 u64 floor = atomic64_read(&mg_floor); 2414 ktime_t f_real, offset, coarse; 2415 unsigned int seq; 2416 2417 do { 2418 seq = read_seqcount_begin(&tk_core.seq); 2419 *ts = tk_xtime_coarse(tk); 2420 offset = tk_core.timekeeper.offs_real; 2421 } while (read_seqcount_retry(&tk_core.seq, seq)); 2422 2423 coarse = timespec64_to_ktime(*ts); 2424 f_real = ktime_add(floor, offset); 2425 if (ktime_after(f_real, coarse)) 2426 *ts = ktime_to_timespec64(f_real); 2427 } 2428 2429 /** 2430 * ktime_get_real_ts64_mg - attempt to update floor value and return result 2431 * @ts: pointer to the timespec to be set 2432 * 2433 * Get a monotonic fine-grained time value and attempt to swap it into 2434 * mg_floor. If that succeeds then accept the new floor value. If it fails 2435 * then another task raced in during the interim time and updated the 2436 * floor. Since any update to the floor must be later than the previous 2437 * floor, either outcome is acceptable. 2438 * 2439 * Typically this will be called after calling ktime_get_coarse_real_ts64_mg(), 2440 * and determining that the resulting coarse-grained timestamp did not effect 2441 * a change in ctime. Any more recent floor value would effect a change to 2442 * ctime, so there is no need to retry the atomic64_try_cmpxchg() on failure. 2443 * 2444 * @ts will be filled with the latest floor value, regardless of the outcome of 2445 * the cmpxchg. Note that this is a filesystem specific interface and should be 2446 * avoided outside of that context. 2447 */ 2448 void ktime_get_real_ts64_mg(struct timespec64 *ts) 2449 { 2450 struct timekeeper *tk = &tk_core.timekeeper; 2451 ktime_t old = atomic64_read(&mg_floor); 2452 ktime_t offset, mono; 2453 unsigned int seq; 2454 u64 nsecs; 2455 2456 do { 2457 seq = read_seqcount_begin(&tk_core.seq); 2458 2459 ts->tv_sec = tk->xtime_sec; 2460 mono = tk->tkr_mono.base; 2461 nsecs = timekeeping_get_ns(&tk->tkr_mono); 2462 offset = tk_core.timekeeper.offs_real; 2463 } while (read_seqcount_retry(&tk_core.seq, seq)); 2464 2465 mono = ktime_add_ns(mono, nsecs); 2466 2467 /* 2468 * Attempt to update the floor with the new time value. As any 2469 * update must be later then the existing floor, and would effect 2470 * a change to ctime from the perspective of the current task, 2471 * accept the resulting floor value regardless of the outcome of 2472 * the swap. 2473 */ 2474 if (atomic64_try_cmpxchg(&mg_floor, &old, mono)) { 2475 ts->tv_nsec = 0; 2476 timespec64_add_ns(ts, nsecs); 2477 timekeeping_inc_mg_floor_swaps(); 2478 } else { 2479 /* 2480 * Another task changed mg_floor since "old" was fetched. 2481 * "old" has been updated with the latest value of "mg_floor". 2482 * That value is newer than the previous floor value, which 2483 * is enough to effect a change to ctime. Accept it. 2484 */ 2485 *ts = ktime_to_timespec64(ktime_add(old, offset)); 2486 } 2487 } 2488 2489 void ktime_get_coarse_ts64(struct timespec64 *ts) 2490 { 2491 struct timekeeper *tk = &tk_core.timekeeper; 2492 struct timespec64 now, mono; 2493 unsigned int seq; 2494 2495 do { 2496 seq = read_seqcount_begin(&tk_core.seq); 2497 2498 now = tk_xtime_coarse(tk); 2499 mono = tk->wall_to_monotonic; 2500 } while (read_seqcount_retry(&tk_core.seq, seq)); 2501 2502 set_normalized_timespec64(ts, now.tv_sec + mono.tv_sec, 2503 now.tv_nsec + mono.tv_nsec); 2504 } 2505 EXPORT_SYMBOL(ktime_get_coarse_ts64); 2506 2507 /* 2508 * Must hold jiffies_lock 2509 */ 2510 void do_timer(unsigned long ticks) 2511 { 2512 jiffies_64 += ticks; 2513 calc_global_load(); 2514 } 2515 2516 /** 2517 * ktime_get_update_offsets_now - hrtimer helper 2518 * @cwsseq: pointer to check and store the clock was set sequence number 2519 * @offs_real: pointer to storage for monotonic -> realtime offset 2520 * @offs_boot: pointer to storage for monotonic -> boottime offset 2521 * @offs_tai: pointer to storage for monotonic -> clock tai offset 2522 * 2523 * Returns current monotonic time and updates the offsets if the 2524 * sequence number in @cwsseq and timekeeper.clock_was_set_seq are 2525 * different. 2526 * 2527 * Called from hrtimer_interrupt() or retrigger_next_event() 2528 */ 2529 ktime_t ktime_get_update_offsets_now(unsigned int *cwsseq, ktime_t *offs_real, 2530 ktime_t *offs_boot, ktime_t *offs_tai) 2531 { 2532 struct timekeeper *tk = &tk_core.timekeeper; 2533 unsigned int seq; 2534 ktime_t base; 2535 u64 nsecs; 2536 2537 do { 2538 seq = read_seqcount_begin(&tk_core.seq); 2539 2540 base = tk->tkr_mono.base; 2541 nsecs = timekeeping_get_ns(&tk->tkr_mono); 2542 base = ktime_add_ns(base, nsecs); 2543 2544 if (*cwsseq != tk->clock_was_set_seq) { 2545 *cwsseq = tk->clock_was_set_seq; 2546 *offs_real = tk->offs_real; 2547 *offs_boot = tk->offs_boot; 2548 *offs_tai = tk->offs_tai; 2549 } 2550 2551 /* Handle leapsecond insertion adjustments */ 2552 if (unlikely(base >= tk->next_leap_ktime)) 2553 *offs_real = ktime_sub(tk->offs_real, ktime_set(1, 0)); 2554 2555 } while (read_seqcount_retry(&tk_core.seq, seq)); 2556 2557 return base; 2558 } 2559 2560 /* 2561 * timekeeping_validate_timex - Ensures the timex is ok for use in do_adjtimex 2562 */ 2563 static int timekeeping_validate_timex(const struct __kernel_timex *txc, bool aux_clock) 2564 { 2565 if (txc->modes & ADJ_ADJTIME) { 2566 /* singleshot must not be used with any other mode bits */ 2567 if (!(txc->modes & ADJ_OFFSET_SINGLESHOT)) 2568 return -EINVAL; 2569 if (!(txc->modes & ADJ_OFFSET_READONLY) && 2570 !capable(CAP_SYS_TIME)) 2571 return -EPERM; 2572 } else { 2573 /* In order to modify anything, you gotta be super-user! */ 2574 if (txc->modes && !capable(CAP_SYS_TIME)) 2575 return -EPERM; 2576 /* 2577 * if the quartz is off by more than 10% then 2578 * something is VERY wrong! 2579 */ 2580 if (txc->modes & ADJ_TICK && 2581 (txc->tick < 900000/USER_HZ || 2582 txc->tick > 1100000/USER_HZ)) 2583 return -EINVAL; 2584 } 2585 2586 if (txc->modes & ADJ_SETOFFSET) { 2587 /* In order to inject time, you gotta be super-user! */ 2588 if (!capable(CAP_SYS_TIME)) 2589 return -EPERM; 2590 2591 /* 2592 * Validate if a timespec/timeval used to inject a time 2593 * offset is valid. Offsets can be positive or negative, so 2594 * we don't check tv_sec. The value of the timeval/timespec 2595 * is the sum of its fields,but *NOTE*: 2596 * The field tv_usec/tv_nsec must always be non-negative and 2597 * we can't have more nanoseconds/microseconds than a second. 2598 */ 2599 if (txc->time.tv_usec < 0) 2600 return -EINVAL; 2601 2602 if (txc->modes & ADJ_NANO) { 2603 if (txc->time.tv_usec >= NSEC_PER_SEC) 2604 return -EINVAL; 2605 } else { 2606 if (txc->time.tv_usec >= USEC_PER_SEC) 2607 return -EINVAL; 2608 } 2609 } 2610 2611 /* 2612 * Check for potential multiplication overflows that can 2613 * only happen on 64-bit systems: 2614 */ 2615 if ((txc->modes & ADJ_FREQUENCY) && (BITS_PER_LONG == 64)) { 2616 if (LLONG_MIN / PPM_SCALE > txc->freq) 2617 return -EINVAL; 2618 if (LLONG_MAX / PPM_SCALE < txc->freq) 2619 return -EINVAL; 2620 } 2621 2622 if (aux_clock) { 2623 /* Auxiliary clocks are similar to TAI and do not have leap seconds */ 2624 if (txc->status & (STA_INS | STA_DEL)) 2625 return -EINVAL; 2626 2627 /* No TAI offset setting */ 2628 if (txc->modes & ADJ_TAI) 2629 return -EINVAL; 2630 2631 /* No PPS support either */ 2632 if (txc->status & (STA_PPSFREQ | STA_PPSTIME)) 2633 return -EINVAL; 2634 } 2635 2636 return 0; 2637 } 2638 2639 /** 2640 * random_get_entropy_fallback - Returns the raw clock source value, 2641 * used by random.c for platforms with no valid random_get_entropy(). 2642 */ 2643 unsigned long random_get_entropy_fallback(void) 2644 { 2645 struct tk_read_base *tkr = &tk_core.timekeeper.tkr_mono; 2646 struct clocksource *clock = READ_ONCE(tkr->clock); 2647 2648 if (unlikely(timekeeping_suspended || !clock)) 2649 return 0; 2650 return clock->read(clock); 2651 } 2652 EXPORT_SYMBOL_GPL(random_get_entropy_fallback); 2653 2654 struct adjtimex_result { 2655 struct audit_ntp_data ad; 2656 struct timespec64 delta; 2657 bool clock_set; 2658 }; 2659 2660 static int __do_adjtimex(struct tk_data *tkd, struct __kernel_timex *txc, 2661 struct adjtimex_result *result) 2662 { 2663 struct timekeeper *tks = &tkd->shadow_timekeeper; 2664 bool aux_clock = !timekeeper_is_core_tk(tks); 2665 struct timespec64 ts; 2666 s32 orig_tai, tai; 2667 int ret; 2668 2669 /* Validate the data before disabling interrupts */ 2670 ret = timekeeping_validate_timex(txc, aux_clock); 2671 if (ret) 2672 return ret; 2673 add_device_randomness(txc, sizeof(*txc)); 2674 2675 if (!aux_clock) 2676 ktime_get_real_ts64(&ts); 2677 else 2678 tk_get_aux_ts64(tkd->timekeeper.id, &ts); 2679 2680 add_device_randomness(&ts, sizeof(ts)); 2681 2682 guard(raw_spinlock_irqsave)(&tkd->lock); 2683 2684 if (!tks->clock_valid) 2685 return -ENODEV; 2686 2687 if (txc->modes & ADJ_SETOFFSET) { 2688 result->delta.tv_sec = txc->time.tv_sec; 2689 result->delta.tv_nsec = txc->time.tv_usec; 2690 if (!(txc->modes & ADJ_NANO)) 2691 result->delta.tv_nsec *= 1000; 2692 ret = __timekeeping_inject_offset(tkd, &result->delta); 2693 if (ret) 2694 return ret; 2695 result->clock_set = true; 2696 } 2697 2698 orig_tai = tai = tks->tai_offset; 2699 ret = ntp_adjtimex(tks->id, txc, &ts, &tai, &result->ad); 2700 2701 if (tai != orig_tai) { 2702 __timekeeping_set_tai_offset(tks, tai); 2703 timekeeping_update_from_shadow(tkd, TK_CLOCK_WAS_SET); 2704 result->clock_set = true; 2705 } else { 2706 tk_update_leap_state_all(&tk_core); 2707 } 2708 2709 /* Update the multiplier immediately if frequency was set directly */ 2710 if (txc->modes & (ADJ_FREQUENCY | ADJ_TICK)) 2711 result->clock_set |= __timekeeping_advance(tkd, TK_ADV_FREQ); 2712 2713 return ret; 2714 } 2715 2716 /** 2717 * do_adjtimex() - Accessor function to NTP __do_adjtimex function 2718 * @txc: Pointer to kernel_timex structure containing NTP parameters 2719 */ 2720 int do_adjtimex(struct __kernel_timex *txc) 2721 { 2722 struct adjtimex_result result = { }; 2723 int ret; 2724 2725 ret = __do_adjtimex(&tk_core, txc, &result); 2726 if (ret < 0) 2727 return ret; 2728 2729 if (txc->modes & ADJ_SETOFFSET) 2730 audit_tk_injoffset(result.delta); 2731 2732 audit_ntp_log(&result.ad); 2733 2734 if (result.clock_set) 2735 clock_was_set(CLOCK_SET_WALL); 2736 2737 ntp_notify_cmos_timer(result.delta.tv_sec != 0); 2738 2739 return ret; 2740 } 2741 2742 /* 2743 * Invoked from NTP with the time keeper lock held, so lockless access is 2744 * fine. 2745 */ 2746 long ktime_get_ntp_seconds(unsigned int id) 2747 { 2748 return timekeeper_data[id].timekeeper.xtime_sec; 2749 } 2750 2751 #ifdef CONFIG_NTP_PPS 2752 /** 2753 * hardpps() - Accessor function to NTP __hardpps function 2754 * @phase_ts: Pointer to timespec64 structure representing phase timestamp 2755 * @raw_ts: Pointer to timespec64 structure representing raw timestamp 2756 */ 2757 void hardpps(const struct timespec64 *phase_ts, const struct timespec64 *raw_ts) 2758 { 2759 guard(raw_spinlock_irqsave)(&tk_core.lock); 2760 __hardpps(phase_ts, raw_ts); 2761 } 2762 EXPORT_SYMBOL(hardpps); 2763 #endif /* CONFIG_NTP_PPS */ 2764 2765 #ifdef CONFIG_POSIX_AUX_CLOCKS 2766 #include "posix-timers.h" 2767 2768 /* 2769 * Bitmap for the activated auxiliary timekeepers to allow lockless quick 2770 * checks in the hot paths without touching extra cache lines. If set, then 2771 * the state of the corresponding timekeeper has to be re-checked under 2772 * timekeeper::lock. 2773 */ 2774 static unsigned long aux_timekeepers; 2775 2776 static inline unsigned int clockid_to_tkid(unsigned int id) 2777 { 2778 return TIMEKEEPER_AUX_FIRST + id - CLOCK_AUX; 2779 } 2780 2781 static inline struct tk_data *aux_get_tk_data(clockid_t id) 2782 { 2783 if (!clockid_aux_valid(id)) 2784 return NULL; 2785 return &timekeeper_data[clockid_to_tkid(id)]; 2786 } 2787 2788 /* Invoked from timekeeping after a clocksource change */ 2789 static void tk_aux_update_clocksource(void) 2790 { 2791 unsigned long active = READ_ONCE(aux_timekeepers); 2792 unsigned int id; 2793 2794 for_each_set_bit(id, &active, BITS_PER_LONG) { 2795 struct tk_data *tkd = &timekeeper_data[id + TIMEKEEPER_AUX_FIRST]; 2796 struct timekeeper *tks = &tkd->shadow_timekeeper; 2797 2798 guard(raw_spinlock_irqsave)(&tkd->lock); 2799 if (!tks->clock_valid) 2800 continue; 2801 2802 timekeeping_forward_now(tks); 2803 tk_setup_internals(tks, tk_core.timekeeper.tkr_mono.clock); 2804 timekeeping_update_from_shadow(tkd, TK_UPDATE_ALL); 2805 } 2806 } 2807 2808 static void tk_aux_advance(void) 2809 { 2810 unsigned long active = READ_ONCE(aux_timekeepers); 2811 unsigned int id; 2812 2813 /* Lockless quick check to avoid extra cache lines */ 2814 for_each_set_bit(id, &active, BITS_PER_LONG) { 2815 struct tk_data *aux_tkd = &timekeeper_data[id + TIMEKEEPER_AUX_FIRST]; 2816 2817 guard(raw_spinlock)(&aux_tkd->lock); 2818 if (aux_tkd->shadow_timekeeper.clock_valid) 2819 __timekeeping_advance(aux_tkd, TK_ADV_TICK); 2820 } 2821 } 2822 2823 /** 2824 * ktime_get_aux - Get time for a AUX clock 2825 * @id: ID of the clock to read (CLOCK_AUX...) 2826 * @kt: Pointer to ktime_t to store the time stamp 2827 * 2828 * Returns: True if the timestamp is valid, false otherwise 2829 */ 2830 bool ktime_get_aux(clockid_t id, ktime_t *kt) 2831 { 2832 struct tk_data *aux_tkd = aux_get_tk_data(id); 2833 struct timekeeper *aux_tk; 2834 unsigned int seq; 2835 ktime_t base; 2836 u64 nsecs; 2837 2838 WARN_ON(timekeeping_suspended); 2839 2840 if (!aux_tkd) 2841 return false; 2842 2843 aux_tk = &aux_tkd->timekeeper; 2844 do { 2845 seq = read_seqcount_begin(&aux_tkd->seq); 2846 if (!aux_tk->clock_valid) 2847 return false; 2848 2849 base = ktime_add(aux_tk->tkr_mono.base, aux_tk->offs_aux); 2850 nsecs = timekeeping_get_ns(&aux_tk->tkr_mono); 2851 } while (read_seqcount_retry(&aux_tkd->seq, seq)); 2852 2853 *kt = ktime_add_ns(base, nsecs); 2854 return true; 2855 } 2856 EXPORT_SYMBOL_GPL(ktime_get_aux); 2857 2858 /** 2859 * ktime_get_aux_ts64 - Get time for a AUX clock 2860 * @id: ID of the clock to read (CLOCK_AUX...) 2861 * @ts: Pointer to timespec64 to store the time stamp 2862 * 2863 * Returns: True if the timestamp is valid, false otherwise 2864 */ 2865 bool ktime_get_aux_ts64(clockid_t id, struct timespec64 *ts) 2866 { 2867 ktime_t now; 2868 2869 if (!ktime_get_aux(id, &now)) 2870 return false; 2871 *ts = ktime_to_timespec64(now); 2872 return true; 2873 } 2874 EXPORT_SYMBOL_GPL(ktime_get_aux_ts64); 2875 2876 static int aux_get_res(clockid_t id, struct timespec64 *tp) 2877 { 2878 if (!clockid_aux_valid(id)) 2879 return -ENODEV; 2880 2881 tp->tv_sec = aux_clock_resolution_ns() / NSEC_PER_SEC; 2882 tp->tv_nsec = aux_clock_resolution_ns() % NSEC_PER_SEC; 2883 return 0; 2884 } 2885 2886 static int aux_get_timespec(clockid_t id, struct timespec64 *tp) 2887 { 2888 return ktime_get_aux_ts64(id, tp) ? 0 : -ENODEV; 2889 } 2890 2891 static int aux_clock_set(const clockid_t id, const struct timespec64 *tnew) 2892 { 2893 struct tk_data *aux_tkd = aux_get_tk_data(id); 2894 struct timekeeper *aux_tks; 2895 ktime_t tnow, nsecs; 2896 2897 if (!timespec64_valid_settod(tnew)) 2898 return -EINVAL; 2899 if (!aux_tkd) 2900 return -ENODEV; 2901 2902 aux_tks = &aux_tkd->shadow_timekeeper; 2903 2904 guard(raw_spinlock_irq)(&aux_tkd->lock); 2905 if (!aux_tks->clock_valid) 2906 return -ENODEV; 2907 2908 /* Forward the timekeeper base time */ 2909 timekeeping_forward_now(aux_tks); 2910 /* 2911 * Get the updated base time. tkr_mono.base has not been 2912 * updated yet, so do that first. That makes the update 2913 * in timekeeping_update_from_shadow() redundant, but 2914 * that's harmless. After that @tnow can be calculated 2915 * by using tkr_mono::cycle_last, which has been set 2916 * by timekeeping_forward_now(). 2917 */ 2918 tk_update_ktime_data(aux_tks); 2919 nsecs = timekeeping_cycles_to_ns(&aux_tks->tkr_mono, aux_tks->tkr_mono.cycle_last); 2920 tnow = ktime_add(aux_tks->tkr_mono.base, nsecs); 2921 2922 /* 2923 * Calculate the new AUX offset as delta to @tnow ("monotonic"). 2924 * That avoids all the tk::xtime back and forth conversions as 2925 * xtime ("realtime") is not applicable for auxiliary clocks and 2926 * kept in sync with "monotonic". 2927 */ 2928 aux_tks->offs_aux = ktime_sub(timespec64_to_ktime(*tnew), tnow); 2929 2930 timekeeping_update_from_shadow(aux_tkd, TK_UPDATE_ALL); 2931 return 0; 2932 } 2933 2934 static int aux_clock_adj(const clockid_t id, struct __kernel_timex *txc) 2935 { 2936 struct tk_data *aux_tkd = aux_get_tk_data(id); 2937 struct adjtimex_result result = { }; 2938 2939 if (!aux_tkd) 2940 return -ENODEV; 2941 2942 /* 2943 * @result is ignored for now as there are neither hrtimers nor a 2944 * RTC related to auxiliary clocks for now. 2945 */ 2946 return __do_adjtimex(aux_tkd, txc, &result); 2947 } 2948 2949 const struct k_clock clock_aux = { 2950 .clock_getres = aux_get_res, 2951 .clock_get_timespec = aux_get_timespec, 2952 .clock_set = aux_clock_set, 2953 .clock_adj = aux_clock_adj, 2954 }; 2955 2956 static void aux_clock_enable(clockid_t id) 2957 { 2958 struct tk_read_base *tkr_raw = &tk_core.timekeeper.tkr_raw; 2959 struct tk_data *aux_tkd = aux_get_tk_data(id); 2960 struct timekeeper *aux_tks = &aux_tkd->shadow_timekeeper; 2961 2962 /* Prevent the core timekeeper from changing. */ 2963 guard(raw_spinlock_irq)(&tk_core.lock); 2964 2965 /* 2966 * Setup the auxiliary clock assuming that the raw core timekeeper 2967 * clock frequency conversion is close enough. Userspace has to 2968 * adjust for the deviation via clock_adjtime(2). 2969 */ 2970 guard(raw_spinlock_nested)(&aux_tkd->lock); 2971 2972 /* Remove leftovers of a previous registration */ 2973 memset(aux_tks, 0, sizeof(*aux_tks)); 2974 /* Restore the timekeeper id */ 2975 aux_tks->id = aux_tkd->timekeeper.id; 2976 /* Setup the timekeeper based on the current system clocksource */ 2977 tk_setup_internals(aux_tks, tkr_raw->clock); 2978 2979 /* Mark it valid and set it live */ 2980 aux_tks->clock_valid = true; 2981 timekeeping_update_from_shadow(aux_tkd, TK_UPDATE_ALL); 2982 } 2983 2984 static void aux_clock_disable(clockid_t id) 2985 { 2986 struct tk_data *aux_tkd = aux_get_tk_data(id); 2987 2988 guard(raw_spinlock_irq)(&aux_tkd->lock); 2989 aux_tkd->shadow_timekeeper.clock_valid = false; 2990 timekeeping_update_from_shadow(aux_tkd, TK_UPDATE_ALL); 2991 } 2992 2993 static DEFINE_MUTEX(aux_clock_mutex); 2994 2995 static ssize_t aux_clock_enable_store(struct kobject *kobj, struct kobj_attribute *attr, 2996 const char *buf, size_t count) 2997 { 2998 /* Lazy atoi() as name is "0..7" */ 2999 int id = kobj->name[0] & 0x7; 3000 bool enable; 3001 3002 if (!capable(CAP_SYS_TIME)) 3003 return -EPERM; 3004 3005 if (kstrtobool(buf, &enable) < 0) 3006 return -EINVAL; 3007 3008 guard(mutex)(&aux_clock_mutex); 3009 if (enable == test_bit(id, &aux_timekeepers)) 3010 return count; 3011 3012 if (enable) { 3013 aux_clock_enable(CLOCK_AUX + id); 3014 set_bit(id, &aux_timekeepers); 3015 } else { 3016 aux_clock_disable(CLOCK_AUX + id); 3017 clear_bit(id, &aux_timekeepers); 3018 } 3019 return count; 3020 } 3021 3022 static ssize_t aux_clock_enable_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) 3023 { 3024 unsigned long active = READ_ONCE(aux_timekeepers); 3025 /* Lazy atoi() as name is "0..7" */ 3026 int id = kobj->name[0] & 0x7; 3027 3028 return sysfs_emit(buf, "%d\n", test_bit(id, &active)); 3029 } 3030 3031 static struct kobj_attribute aux_clock_enable_attr = __ATTR_RW(aux_clock_enable); 3032 3033 static struct attribute *aux_clock_enable_attrs[] = { 3034 &aux_clock_enable_attr.attr, 3035 NULL 3036 }; 3037 3038 static const struct attribute_group aux_clock_enable_attr_group = { 3039 .attrs = aux_clock_enable_attrs, 3040 }; 3041 3042 static int __init tk_aux_sysfs_init(void) 3043 { 3044 struct kobject *auxo, *tko = kobject_create_and_add("time", kernel_kobj); 3045 3046 if (!tko) 3047 return -ENOMEM; 3048 3049 auxo = kobject_create_and_add("aux_clocks", tko); 3050 if (!auxo) { 3051 kobject_put(tko); 3052 return -ENOMEM; 3053 } 3054 3055 for (int i = 0; i <= MAX_AUX_CLOCKS; i++) { 3056 char id[2] = { [0] = '0' + i, }; 3057 struct kobject *clk = kobject_create_and_add(id, auxo); 3058 3059 if (!clk) 3060 return -ENOMEM; 3061 3062 int ret = sysfs_create_group(clk, &aux_clock_enable_attr_group); 3063 3064 if (ret) 3065 return ret; 3066 } 3067 return 0; 3068 } 3069 late_initcall(tk_aux_sysfs_init); 3070 3071 static __init void tk_aux_setup(void) 3072 { 3073 for (int i = TIMEKEEPER_AUX_FIRST; i <= TIMEKEEPER_AUX_LAST; i++) 3074 tkd_basic_setup(&timekeeper_data[i], i, false); 3075 } 3076 #endif /* CONFIG_POSIX_AUX_CLOCKS */ 3077