1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Kernel timekeeping code and accessor functions. Based on code from 4 * timer.c, moved in commit 8524070b7982. 5 */ 6 #include <linux/timekeeper_internal.h> 7 #include <linux/module.h> 8 #include <linux/interrupt.h> 9 #include <linux/kobject.h> 10 #include <linux/percpu.h> 11 #include <linux/init.h> 12 #include <linux/mm.h> 13 #include <linux/nmi.h> 14 #include <linux/sched.h> 15 #include <linux/sched/loadavg.h> 16 #include <linux/sched/clock.h> 17 #include <linux/syscore_ops.h> 18 #include <linux/clocksource.h> 19 #include <linux/jiffies.h> 20 #include <linux/time.h> 21 #include <linux/timex.h> 22 #include <linux/tick.h> 23 #include <linux/stop_machine.h> 24 #include <linux/pvclock_gtod.h> 25 #include <linux/compiler.h> 26 #include <linux/audit.h> 27 #include <linux/random.h> 28 29 #include "tick-internal.h" 30 #include "ntp_internal.h" 31 #include "timekeeping_internal.h" 32 33 #define TK_CLEAR_NTP (1 << 0) 34 #define TK_CLOCK_WAS_SET (1 << 1) 35 36 #define TK_UPDATE_ALL (TK_CLEAR_NTP | TK_CLOCK_WAS_SET) 37 38 enum timekeeping_adv_mode { 39 /* Update timekeeper when a tick has passed */ 40 TK_ADV_TICK, 41 42 /* Update timekeeper on a direct frequency change */ 43 TK_ADV_FREQ 44 }; 45 46 /* 47 * The most important data for readout fits into a single 64 byte 48 * cache line. 49 */ 50 struct tk_data { 51 seqcount_raw_spinlock_t seq; 52 struct timekeeper timekeeper; 53 struct timekeeper shadow_timekeeper; 54 raw_spinlock_t lock; 55 } ____cacheline_aligned; 56 57 static struct tk_data timekeeper_data[TIMEKEEPERS_MAX]; 58 59 /* The core timekeeper */ 60 #define tk_core (timekeeper_data[TIMEKEEPER_CORE]) 61 62 #ifdef CONFIG_POSIX_AUX_CLOCKS 63 static inline bool tk_get_aux_ts64(unsigned int tkid, struct timespec64 *ts) 64 { 65 return ktime_get_aux_ts64(CLOCK_AUX + tkid - TIMEKEEPER_AUX_FIRST, ts); 66 } 67 #else 68 static inline bool tk_get_aux_ts64(unsigned int tkid, struct timespec64 *ts) 69 { 70 return false; 71 } 72 #endif 73 74 /* flag for if timekeeping is suspended */ 75 int __read_mostly timekeeping_suspended; 76 77 /** 78 * struct tk_fast - NMI safe timekeeper 79 * @seq: Sequence counter for protecting updates. The lowest bit 80 * is the index for the tk_read_base array 81 * @base: tk_read_base array. Access is indexed by the lowest bit of 82 * @seq. 83 * 84 * See @update_fast_timekeeper() below. 85 */ 86 struct tk_fast { 87 seqcount_latch_t seq; 88 struct tk_read_base base[2]; 89 }; 90 91 /* Suspend-time cycles value for halted fast timekeeper. */ 92 static u64 cycles_at_suspend; 93 94 static u64 dummy_clock_read(struct clocksource *cs) 95 { 96 if (timekeeping_suspended) 97 return cycles_at_suspend; 98 return local_clock(); 99 } 100 101 static struct clocksource dummy_clock = { 102 .read = dummy_clock_read, 103 }; 104 105 /* 106 * Boot time initialization which allows local_clock() to be utilized 107 * during early boot when clocksources are not available. local_clock() 108 * returns nanoseconds already so no conversion is required, hence mult=1 109 * and shift=0. When the first proper clocksource is installed then 110 * the fast time keepers are updated with the correct values. 111 */ 112 #define FAST_TK_INIT \ 113 { \ 114 .clock = &dummy_clock, \ 115 .mask = CLOCKSOURCE_MASK(64), \ 116 .mult = 1, \ 117 .shift = 0, \ 118 } 119 120 static struct tk_fast tk_fast_mono ____cacheline_aligned = { 121 .seq = SEQCNT_LATCH_ZERO(tk_fast_mono.seq), 122 .base[0] = FAST_TK_INIT, 123 .base[1] = FAST_TK_INIT, 124 }; 125 126 static struct tk_fast tk_fast_raw ____cacheline_aligned = { 127 .seq = SEQCNT_LATCH_ZERO(tk_fast_raw.seq), 128 .base[0] = FAST_TK_INIT, 129 .base[1] = FAST_TK_INIT, 130 }; 131 132 #ifdef CONFIG_POSIX_AUX_CLOCKS 133 static __init void tk_aux_setup(void); 134 static void tk_aux_update_clocksource(void); 135 static void tk_aux_advance(void); 136 #else 137 static inline void tk_aux_setup(void) { } 138 static inline void tk_aux_update_clocksource(void) { } 139 static inline void tk_aux_advance(void) { } 140 #endif 141 142 unsigned long timekeeper_lock_irqsave(void) 143 { 144 unsigned long flags; 145 146 raw_spin_lock_irqsave(&tk_core.lock, flags); 147 return flags; 148 } 149 150 void timekeeper_unlock_irqrestore(unsigned long flags) 151 { 152 raw_spin_unlock_irqrestore(&tk_core.lock, flags); 153 } 154 155 /* 156 * Multigrain timestamps require tracking the latest fine-grained timestamp 157 * that has been issued, and never returning a coarse-grained timestamp that is 158 * earlier than that value. 159 * 160 * mg_floor represents the latest fine-grained time that has been handed out as 161 * a file timestamp on the system. This is tracked as a monotonic ktime_t, and 162 * converted to a realtime clock value on an as-needed basis. 163 * 164 * Maintaining mg_floor ensures the multigrain interfaces never issue a 165 * timestamp earlier than one that has been previously issued. 166 * 167 * The exception to this rule is when there is a backward realtime clock jump. If 168 * such an event occurs, a timestamp can appear to be earlier than a previous one. 169 */ 170 static __cacheline_aligned_in_smp atomic64_t mg_floor; 171 172 static inline void tk_normalize_xtime(struct timekeeper *tk) 173 { 174 while (tk->tkr_mono.xtime_nsec >= ((u64)NSEC_PER_SEC << tk->tkr_mono.shift)) { 175 tk->tkr_mono.xtime_nsec -= (u64)NSEC_PER_SEC << tk->tkr_mono.shift; 176 tk->xtime_sec++; 177 } 178 while (tk->tkr_raw.xtime_nsec >= ((u64)NSEC_PER_SEC << tk->tkr_raw.shift)) { 179 tk->tkr_raw.xtime_nsec -= (u64)NSEC_PER_SEC << tk->tkr_raw.shift; 180 tk->raw_sec++; 181 } 182 } 183 184 static inline struct timespec64 tk_xtime(const struct timekeeper *tk) 185 { 186 struct timespec64 ts; 187 188 ts.tv_sec = tk->xtime_sec; 189 ts.tv_nsec = (long)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift); 190 return ts; 191 } 192 193 static inline struct timespec64 tk_xtime_coarse(const struct timekeeper *tk) 194 { 195 struct timespec64 ts; 196 197 ts.tv_sec = tk->xtime_sec; 198 ts.tv_nsec = tk->coarse_nsec; 199 return ts; 200 } 201 202 /* 203 * Update the nanoseconds part for the coarse time keepers. They can't rely 204 * on xtime_nsec because xtime_nsec could be adjusted by a small negative 205 * amount when the multiplication factor of the clock is adjusted, which 206 * could cause the coarse clocks to go slightly backwards. See 207 * timekeeping_apply_adjustment(). Thus we keep a separate copy for the coarse 208 * clockids which only is updated when the clock has been set or we have 209 * accumulated time. 210 */ 211 static inline void tk_update_coarse_nsecs(struct timekeeper *tk) 212 { 213 tk->coarse_nsec = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift; 214 } 215 216 static void tk_set_xtime(struct timekeeper *tk, const struct timespec64 *ts) 217 { 218 tk->xtime_sec = ts->tv_sec; 219 tk->tkr_mono.xtime_nsec = (u64)ts->tv_nsec << tk->tkr_mono.shift; 220 tk_update_coarse_nsecs(tk); 221 } 222 223 static void tk_xtime_add(struct timekeeper *tk, const struct timespec64 *ts) 224 { 225 tk->xtime_sec += ts->tv_sec; 226 tk->tkr_mono.xtime_nsec += (u64)ts->tv_nsec << tk->tkr_mono.shift; 227 tk_normalize_xtime(tk); 228 tk_update_coarse_nsecs(tk); 229 } 230 231 static void tk_set_wall_to_mono(struct timekeeper *tk, struct timespec64 wtm) 232 { 233 struct timespec64 tmp; 234 235 /* 236 * Verify consistency of: offset_real = -wall_to_monotonic 237 * before modifying anything 238 */ 239 set_normalized_timespec64(&tmp, -tk->wall_to_monotonic.tv_sec, 240 -tk->wall_to_monotonic.tv_nsec); 241 WARN_ON_ONCE(tk->offs_real != timespec64_to_ktime(tmp)); 242 tk->wall_to_monotonic = wtm; 243 set_normalized_timespec64(&tmp, -wtm.tv_sec, -wtm.tv_nsec); 244 /* Paired with READ_ONCE() in ktime_mono_to_any() */ 245 WRITE_ONCE(tk->offs_real, timespec64_to_ktime(tmp)); 246 WRITE_ONCE(tk->offs_tai, ktime_add(tk->offs_real, ktime_set(tk->tai_offset, 0))); 247 } 248 249 static inline void tk_update_sleep_time(struct timekeeper *tk, ktime_t delta) 250 { 251 /* Paired with READ_ONCE() in ktime_mono_to_any() */ 252 WRITE_ONCE(tk->offs_boot, ktime_add(tk->offs_boot, delta)); 253 /* 254 * Timespec representation for VDSO update to avoid 64bit division 255 * on every update. 256 */ 257 tk->monotonic_to_boot = ktime_to_timespec64(tk->offs_boot); 258 } 259 260 /* 261 * tk_clock_read - atomic clocksource read() helper 262 * 263 * This helper is necessary to use in the read paths because, while the 264 * seqcount ensures we don't return a bad value while structures are updated, 265 * it doesn't protect from potential crashes. There is the possibility that 266 * the tkr's clocksource may change between the read reference, and the 267 * clock reference passed to the read function. This can cause crashes if 268 * the wrong clocksource is passed to the wrong read function. 269 * This isn't necessary to use when holding the tk_core.lock or doing 270 * a read of the fast-timekeeper tkrs (which is protected by its own locking 271 * and update logic). 272 */ 273 static inline u64 tk_clock_read(const struct tk_read_base *tkr) 274 { 275 struct clocksource *clock = READ_ONCE(tkr->clock); 276 277 return clock->read(clock); 278 } 279 280 /** 281 * tk_setup_internals - Set up internals to use clocksource clock. 282 * 283 * @tk: The target timekeeper to setup. 284 * @clock: Pointer to clocksource. 285 * 286 * Calculates a fixed cycle/nsec interval for a given clocksource/adjustment 287 * pair and interval request. 288 * 289 * Unless you're the timekeeping code, you should not be using this! 290 */ 291 static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock) 292 { 293 u64 interval; 294 u64 tmp, ntpinterval; 295 struct clocksource *old_clock; 296 297 ++tk->cs_was_changed_seq; 298 old_clock = tk->tkr_mono.clock; 299 tk->tkr_mono.clock = clock; 300 tk->tkr_mono.mask = clock->mask; 301 tk->tkr_mono.cycle_last = tk_clock_read(&tk->tkr_mono); 302 303 tk->tkr_raw.clock = clock; 304 tk->tkr_raw.mask = clock->mask; 305 tk->tkr_raw.cycle_last = tk->tkr_mono.cycle_last; 306 307 /* Do the ns -> cycle conversion first, using original mult */ 308 tmp = NTP_INTERVAL_LENGTH; 309 tmp <<= clock->shift; 310 ntpinterval = tmp; 311 tmp += clock->mult/2; 312 do_div(tmp, clock->mult); 313 if (tmp == 0) 314 tmp = 1; 315 316 interval = (u64) tmp; 317 tk->cycle_interval = interval; 318 319 /* Go back from cycles -> shifted ns */ 320 tk->xtime_interval = interval * clock->mult; 321 tk->xtime_remainder = ntpinterval - tk->xtime_interval; 322 tk->raw_interval = interval * clock->mult; 323 324 /* if changing clocks, convert xtime_nsec shift units */ 325 if (old_clock) { 326 int shift_change = clock->shift - old_clock->shift; 327 if (shift_change < 0) { 328 tk->tkr_mono.xtime_nsec >>= -shift_change; 329 tk->tkr_raw.xtime_nsec >>= -shift_change; 330 } else { 331 tk->tkr_mono.xtime_nsec <<= shift_change; 332 tk->tkr_raw.xtime_nsec <<= shift_change; 333 } 334 } 335 336 tk->tkr_mono.shift = clock->shift; 337 tk->tkr_raw.shift = clock->shift; 338 339 tk->ntp_error = 0; 340 tk->ntp_error_shift = NTP_SCALE_SHIFT - clock->shift; 341 tk->ntp_tick = ntpinterval << tk->ntp_error_shift; 342 343 /* 344 * The timekeeper keeps its own mult values for the currently 345 * active clocksource. These value will be adjusted via NTP 346 * to counteract clock drifting. 347 */ 348 tk->tkr_mono.mult = clock->mult; 349 tk->tkr_raw.mult = clock->mult; 350 tk->ntp_err_mult = 0; 351 tk->skip_second_overflow = 0; 352 } 353 354 /* Timekeeper helper functions. */ 355 static noinline u64 delta_to_ns_safe(const struct tk_read_base *tkr, u64 delta) 356 { 357 return mul_u64_u32_add_u64_shr(delta, tkr->mult, tkr->xtime_nsec, tkr->shift); 358 } 359 360 static inline u64 timekeeping_cycles_to_ns(const struct tk_read_base *tkr, u64 cycles) 361 { 362 /* Calculate the delta since the last update_wall_time() */ 363 u64 mask = tkr->mask, delta = (cycles - tkr->cycle_last) & mask; 364 365 /* 366 * This detects both negative motion and the case where the delta 367 * overflows the multiplication with tkr->mult. 368 */ 369 if (unlikely(delta > tkr->clock->max_cycles)) { 370 /* 371 * Handle clocksource inconsistency between CPUs to prevent 372 * time from going backwards by checking for the MSB of the 373 * mask being set in the delta. 374 */ 375 if (delta & ~(mask >> 1)) 376 return tkr->xtime_nsec >> tkr->shift; 377 378 return delta_to_ns_safe(tkr, delta); 379 } 380 381 return ((delta * tkr->mult) + tkr->xtime_nsec) >> tkr->shift; 382 } 383 384 static __always_inline u64 timekeeping_get_ns(const struct tk_read_base *tkr) 385 { 386 return timekeeping_cycles_to_ns(tkr, tk_clock_read(tkr)); 387 } 388 389 /** 390 * update_fast_timekeeper - Update the fast and NMI safe monotonic timekeeper. 391 * @tkr: Timekeeping readout base from which we take the update 392 * @tkf: Pointer to NMI safe timekeeper 393 * 394 * We want to use this from any context including NMI and tracing / 395 * instrumenting the timekeeping code itself. 396 * 397 * Employ the latch technique; see @write_seqcount_latch. 398 * 399 * So if a NMI hits the update of base[0] then it will use base[1] 400 * which is still consistent. In the worst case this can result is a 401 * slightly wrong timestamp (a few nanoseconds). See 402 * @ktime_get_mono_fast_ns. 403 */ 404 static void update_fast_timekeeper(const struct tk_read_base *tkr, 405 struct tk_fast *tkf) 406 { 407 struct tk_read_base *base = tkf->base; 408 409 /* Force readers off to base[1] */ 410 write_seqcount_latch_begin(&tkf->seq); 411 412 /* Update base[0] */ 413 memcpy(base, tkr, sizeof(*base)); 414 415 /* Force readers back to base[0] */ 416 write_seqcount_latch(&tkf->seq); 417 418 /* Update base[1] */ 419 memcpy(base + 1, base, sizeof(*base)); 420 421 write_seqcount_latch_end(&tkf->seq); 422 } 423 424 static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf) 425 { 426 struct tk_read_base *tkr; 427 unsigned int seq; 428 u64 now; 429 430 do { 431 seq = read_seqcount_latch(&tkf->seq); 432 tkr = tkf->base + (seq & 0x01); 433 now = ktime_to_ns(tkr->base); 434 now += timekeeping_get_ns(tkr); 435 } while (read_seqcount_latch_retry(&tkf->seq, seq)); 436 437 return now; 438 } 439 440 /** 441 * ktime_get_mono_fast_ns - Fast NMI safe access to clock monotonic 442 * 443 * This timestamp is not guaranteed to be monotonic across an update. 444 * The timestamp is calculated by: 445 * 446 * now = base_mono + clock_delta * slope 447 * 448 * So if the update lowers the slope, readers who are forced to the 449 * not yet updated second array are still using the old steeper slope. 450 * 451 * tmono 452 * ^ 453 * | o n 454 * | o n 455 * | u 456 * | o 457 * |o 458 * |12345678---> reader order 459 * 460 * o = old slope 461 * u = update 462 * n = new slope 463 * 464 * So reader 6 will observe time going backwards versus reader 5. 465 * 466 * While other CPUs are likely to be able to observe that, the only way 467 * for a CPU local observation is when an NMI hits in the middle of 468 * the update. Timestamps taken from that NMI context might be ahead 469 * of the following timestamps. Callers need to be aware of that and 470 * deal with it. 471 */ 472 u64 notrace ktime_get_mono_fast_ns(void) 473 { 474 return __ktime_get_fast_ns(&tk_fast_mono); 475 } 476 EXPORT_SYMBOL_GPL(ktime_get_mono_fast_ns); 477 478 /** 479 * ktime_get_raw_fast_ns - Fast NMI safe access to clock monotonic raw 480 * 481 * Contrary to ktime_get_mono_fast_ns() this is always correct because the 482 * conversion factor is not affected by NTP/PTP correction. 483 */ 484 u64 notrace ktime_get_raw_fast_ns(void) 485 { 486 return __ktime_get_fast_ns(&tk_fast_raw); 487 } 488 EXPORT_SYMBOL_GPL(ktime_get_raw_fast_ns); 489 490 /** 491 * ktime_get_boot_fast_ns - NMI safe and fast access to boot clock. 492 * 493 * To keep it NMI safe since we're accessing from tracing, we're not using a 494 * separate timekeeper with updates to monotonic clock and boot offset 495 * protected with seqcounts. This has the following minor side effects: 496 * 497 * (1) Its possible that a timestamp be taken after the boot offset is updated 498 * but before the timekeeper is updated. If this happens, the new boot offset 499 * is added to the old timekeeping making the clock appear to update slightly 500 * earlier: 501 * CPU 0 CPU 1 502 * timekeeping_inject_sleeptime64() 503 * __timekeeping_inject_sleeptime(tk, delta); 504 * timestamp(); 505 * timekeeping_update_staged(tkd, TK_CLEAR_NTP...); 506 * 507 * (2) On 32-bit systems, the 64-bit boot offset (tk->offs_boot) may be 508 * partially updated. Since the tk->offs_boot update is a rare event, this 509 * should be a rare occurrence which postprocessing should be able to handle. 510 * 511 * The caveats vs. timestamp ordering as documented for ktime_get_mono_fast_ns() 512 * apply as well. 513 */ 514 u64 notrace ktime_get_boot_fast_ns(void) 515 { 516 struct timekeeper *tk = &tk_core.timekeeper; 517 518 return (ktime_get_mono_fast_ns() + ktime_to_ns(data_race(tk->offs_boot))); 519 } 520 EXPORT_SYMBOL_GPL(ktime_get_boot_fast_ns); 521 522 /** 523 * ktime_get_tai_fast_ns - NMI safe and fast access to tai clock. 524 * 525 * The same limitations as described for ktime_get_boot_fast_ns() apply. The 526 * mono time and the TAI offset are not read atomically which may yield wrong 527 * readouts. However, an update of the TAI offset is an rare event e.g., caused 528 * by settime or adjtimex with an offset. The user of this function has to deal 529 * with the possibility of wrong timestamps in post processing. 530 */ 531 u64 notrace ktime_get_tai_fast_ns(void) 532 { 533 struct timekeeper *tk = &tk_core.timekeeper; 534 535 return (ktime_get_mono_fast_ns() + ktime_to_ns(data_race(tk->offs_tai))); 536 } 537 EXPORT_SYMBOL_GPL(ktime_get_tai_fast_ns); 538 539 /** 540 * ktime_get_real_fast_ns: - NMI safe and fast access to clock realtime. 541 * 542 * See ktime_get_mono_fast_ns() for documentation of the time stamp ordering. 543 */ 544 u64 ktime_get_real_fast_ns(void) 545 { 546 struct tk_fast *tkf = &tk_fast_mono; 547 struct tk_read_base *tkr; 548 u64 baser, delta; 549 unsigned int seq; 550 551 do { 552 seq = raw_read_seqcount_latch(&tkf->seq); 553 tkr = tkf->base + (seq & 0x01); 554 baser = ktime_to_ns(tkr->base_real); 555 delta = timekeeping_get_ns(tkr); 556 } while (raw_read_seqcount_latch_retry(&tkf->seq, seq)); 557 558 return baser + delta; 559 } 560 EXPORT_SYMBOL_GPL(ktime_get_real_fast_ns); 561 562 /** 563 * halt_fast_timekeeper - Prevent fast timekeeper from accessing clocksource. 564 * @tk: Timekeeper to snapshot. 565 * 566 * It generally is unsafe to access the clocksource after timekeeping has been 567 * suspended, so take a snapshot of the readout base of @tk and use it as the 568 * fast timekeeper's readout base while suspended. It will return the same 569 * number of cycles every time until timekeeping is resumed at which time the 570 * proper readout base for the fast timekeeper will be restored automatically. 571 */ 572 static void halt_fast_timekeeper(const struct timekeeper *tk) 573 { 574 static struct tk_read_base tkr_dummy; 575 const struct tk_read_base *tkr = &tk->tkr_mono; 576 577 memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy)); 578 cycles_at_suspend = tk_clock_read(tkr); 579 tkr_dummy.clock = &dummy_clock; 580 tkr_dummy.base_real = tkr->base + tk->offs_real; 581 update_fast_timekeeper(&tkr_dummy, &tk_fast_mono); 582 583 tkr = &tk->tkr_raw; 584 memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy)); 585 tkr_dummy.clock = &dummy_clock; 586 update_fast_timekeeper(&tkr_dummy, &tk_fast_raw); 587 } 588 589 static RAW_NOTIFIER_HEAD(pvclock_gtod_chain); 590 591 static void update_pvclock_gtod(struct timekeeper *tk, bool was_set) 592 { 593 raw_notifier_call_chain(&pvclock_gtod_chain, was_set, tk); 594 } 595 596 /** 597 * pvclock_gtod_register_notifier - register a pvclock timedata update listener 598 * @nb: Pointer to the notifier block to register 599 */ 600 int pvclock_gtod_register_notifier(struct notifier_block *nb) 601 { 602 struct timekeeper *tk = &tk_core.timekeeper; 603 int ret; 604 605 guard(raw_spinlock_irqsave)(&tk_core.lock); 606 ret = raw_notifier_chain_register(&pvclock_gtod_chain, nb); 607 update_pvclock_gtod(tk, true); 608 609 return ret; 610 } 611 EXPORT_SYMBOL_GPL(pvclock_gtod_register_notifier); 612 613 /** 614 * pvclock_gtod_unregister_notifier - unregister a pvclock 615 * timedata update listener 616 * @nb: Pointer to the notifier block to unregister 617 */ 618 int pvclock_gtod_unregister_notifier(struct notifier_block *nb) 619 { 620 guard(raw_spinlock_irqsave)(&tk_core.lock); 621 return raw_notifier_chain_unregister(&pvclock_gtod_chain, nb); 622 } 623 EXPORT_SYMBOL_GPL(pvclock_gtod_unregister_notifier); 624 625 /* 626 * tk_update_leap_state - helper to update the next_leap_ktime 627 */ 628 static inline void tk_update_leap_state(struct timekeeper *tk) 629 { 630 tk->next_leap_ktime = ntp_get_next_leap(tk->id); 631 if (tk->next_leap_ktime != KTIME_MAX) 632 /* Convert to monotonic time */ 633 tk->next_leap_ktime = ktime_sub(tk->next_leap_ktime, tk->offs_real); 634 } 635 636 /* 637 * Leap state update for both shadow and the real timekeeper 638 * Separate to spare a full memcpy() of the timekeeper. 639 */ 640 static void tk_update_leap_state_all(struct tk_data *tkd) 641 { 642 write_seqcount_begin(&tkd->seq); 643 tk_update_leap_state(&tkd->shadow_timekeeper); 644 tkd->timekeeper.next_leap_ktime = tkd->shadow_timekeeper.next_leap_ktime; 645 write_seqcount_end(&tkd->seq); 646 } 647 648 /* 649 * Update the ktime_t based scalar nsec members of the timekeeper 650 */ 651 static inline void tk_update_ktime_data(struct timekeeper *tk) 652 { 653 u64 seconds; 654 u32 nsec; 655 656 /* 657 * The xtime based monotonic readout is: 658 * nsec = (xtime_sec + wtm_sec) * 1e9 + wtm_nsec + now(); 659 * The ktime based monotonic readout is: 660 * nsec = base_mono + now(); 661 * ==> base_mono = (xtime_sec + wtm_sec) * 1e9 + wtm_nsec 662 */ 663 seconds = (u64)(tk->xtime_sec + tk->wall_to_monotonic.tv_sec); 664 nsec = (u32) tk->wall_to_monotonic.tv_nsec; 665 tk->tkr_mono.base = ns_to_ktime(seconds * NSEC_PER_SEC + nsec); 666 667 /* 668 * The sum of the nanoseconds portions of xtime and 669 * wall_to_monotonic can be greater/equal one second. Take 670 * this into account before updating tk->ktime_sec. 671 */ 672 nsec += (u32)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift); 673 if (nsec >= NSEC_PER_SEC) 674 seconds++; 675 tk->ktime_sec = seconds; 676 677 /* Update the monotonic raw base */ 678 tk->tkr_raw.base = ns_to_ktime(tk->raw_sec * NSEC_PER_SEC); 679 } 680 681 /* 682 * Restore the shadow timekeeper from the real timekeeper. 683 */ 684 static void timekeeping_restore_shadow(struct tk_data *tkd) 685 { 686 lockdep_assert_held(&tkd->lock); 687 memcpy(&tkd->shadow_timekeeper, &tkd->timekeeper, sizeof(tkd->timekeeper)); 688 } 689 690 static void timekeeping_update_from_shadow(struct tk_data *tkd, unsigned int action) 691 { 692 struct timekeeper *tk = &tkd->shadow_timekeeper; 693 694 lockdep_assert_held(&tkd->lock); 695 696 /* 697 * Block out readers before running the updates below because that 698 * updates VDSO and other time related infrastructure. Not blocking 699 * the readers might let a reader see time going backwards when 700 * reading from the VDSO after the VDSO update and then reading in 701 * the kernel from the timekeeper before that got updated. 702 */ 703 write_seqcount_begin(&tkd->seq); 704 705 if (action & TK_CLEAR_NTP) { 706 tk->ntp_error = 0; 707 ntp_clear(tk->id); 708 } 709 710 tk_update_leap_state(tk); 711 tk_update_ktime_data(tk); 712 tk->tkr_mono.base_real = tk->tkr_mono.base + tk->offs_real; 713 714 if (tk->id == TIMEKEEPER_CORE) { 715 update_vsyscall(tk); 716 update_pvclock_gtod(tk, action & TK_CLOCK_WAS_SET); 717 718 update_fast_timekeeper(&tk->tkr_mono, &tk_fast_mono); 719 update_fast_timekeeper(&tk->tkr_raw, &tk_fast_raw); 720 } 721 722 if (action & TK_CLOCK_WAS_SET) 723 tk->clock_was_set_seq++; 724 725 /* 726 * Update the real timekeeper. 727 * 728 * We could avoid this memcpy() by switching pointers, but that has 729 * the downside that the reader side does not longer benefit from 730 * the cacheline optimized data layout of the timekeeper and requires 731 * another indirection. 732 */ 733 memcpy(&tkd->timekeeper, tk, sizeof(*tk)); 734 write_seqcount_end(&tkd->seq); 735 } 736 737 /** 738 * timekeeping_forward_now - update clock to the current time 739 * @tk: Pointer to the timekeeper to update 740 * 741 * Forward the current clock to update its state since the last call to 742 * update_wall_time(). This is useful before significant clock changes, 743 * as it avoids having to deal with this time offset explicitly. 744 */ 745 static void timekeeping_forward_now(struct timekeeper *tk) 746 { 747 u64 cycle_now, delta; 748 749 cycle_now = tk_clock_read(&tk->tkr_mono); 750 delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last, tk->tkr_mono.mask, 751 tk->tkr_mono.clock->max_raw_delta); 752 tk->tkr_mono.cycle_last = cycle_now; 753 tk->tkr_raw.cycle_last = cycle_now; 754 755 while (delta > 0) { 756 u64 max = tk->tkr_mono.clock->max_cycles; 757 u64 incr = delta < max ? delta : max; 758 759 tk->tkr_mono.xtime_nsec += incr * tk->tkr_mono.mult; 760 tk->tkr_raw.xtime_nsec += incr * tk->tkr_raw.mult; 761 tk_normalize_xtime(tk); 762 delta -= incr; 763 } 764 tk_update_coarse_nsecs(tk); 765 } 766 767 /** 768 * ktime_get_real_ts64 - Returns the time of day in a timespec64. 769 * @ts: pointer to the timespec to be set 770 * 771 * Returns the time of day in a timespec64 (WARN if suspended). 772 */ 773 void ktime_get_real_ts64(struct timespec64 *ts) 774 { 775 struct timekeeper *tk = &tk_core.timekeeper; 776 unsigned int seq; 777 u64 nsecs; 778 779 WARN_ON(timekeeping_suspended); 780 781 do { 782 seq = read_seqcount_begin(&tk_core.seq); 783 784 ts->tv_sec = tk->xtime_sec; 785 nsecs = timekeeping_get_ns(&tk->tkr_mono); 786 787 } while (read_seqcount_retry(&tk_core.seq, seq)); 788 789 ts->tv_nsec = 0; 790 timespec64_add_ns(ts, nsecs); 791 } 792 EXPORT_SYMBOL(ktime_get_real_ts64); 793 794 ktime_t ktime_get(void) 795 { 796 struct timekeeper *tk = &tk_core.timekeeper; 797 unsigned int seq; 798 ktime_t base; 799 u64 nsecs; 800 801 WARN_ON(timekeeping_suspended); 802 803 do { 804 seq = read_seqcount_begin(&tk_core.seq); 805 base = tk->tkr_mono.base; 806 nsecs = timekeeping_get_ns(&tk->tkr_mono); 807 808 } while (read_seqcount_retry(&tk_core.seq, seq)); 809 810 return ktime_add_ns(base, nsecs); 811 } 812 EXPORT_SYMBOL_GPL(ktime_get); 813 814 u32 ktime_get_resolution_ns(void) 815 { 816 struct timekeeper *tk = &tk_core.timekeeper; 817 unsigned int seq; 818 u32 nsecs; 819 820 WARN_ON(timekeeping_suspended); 821 822 do { 823 seq = read_seqcount_begin(&tk_core.seq); 824 nsecs = tk->tkr_mono.mult >> tk->tkr_mono.shift; 825 } while (read_seqcount_retry(&tk_core.seq, seq)); 826 827 return nsecs; 828 } 829 EXPORT_SYMBOL_GPL(ktime_get_resolution_ns); 830 831 static ktime_t *offsets[TK_OFFS_MAX] = { 832 [TK_OFFS_REAL] = &tk_core.timekeeper.offs_real, 833 [TK_OFFS_BOOT] = &tk_core.timekeeper.offs_boot, 834 [TK_OFFS_TAI] = &tk_core.timekeeper.offs_tai, 835 }; 836 837 ktime_t ktime_get_with_offset(enum tk_offsets offs) 838 { 839 struct timekeeper *tk = &tk_core.timekeeper; 840 unsigned int seq; 841 ktime_t base, *offset = offsets[offs]; 842 u64 nsecs; 843 844 WARN_ON(timekeeping_suspended); 845 846 do { 847 seq = read_seqcount_begin(&tk_core.seq); 848 base = ktime_add(tk->tkr_mono.base, *offset); 849 nsecs = timekeeping_get_ns(&tk->tkr_mono); 850 851 } while (read_seqcount_retry(&tk_core.seq, seq)); 852 853 return ktime_add_ns(base, nsecs); 854 855 } 856 EXPORT_SYMBOL_GPL(ktime_get_with_offset); 857 858 ktime_t ktime_get_coarse_with_offset(enum tk_offsets offs) 859 { 860 struct timekeeper *tk = &tk_core.timekeeper; 861 ktime_t base, *offset = offsets[offs]; 862 unsigned int seq; 863 u64 nsecs; 864 865 WARN_ON(timekeeping_suspended); 866 867 do { 868 seq = read_seqcount_begin(&tk_core.seq); 869 base = ktime_add(tk->tkr_mono.base, *offset); 870 nsecs = tk->coarse_nsec; 871 872 } while (read_seqcount_retry(&tk_core.seq, seq)); 873 874 return ktime_add_ns(base, nsecs); 875 } 876 EXPORT_SYMBOL_GPL(ktime_get_coarse_with_offset); 877 878 /** 879 * ktime_mono_to_any() - convert monotonic time to any other time 880 * @tmono: time to convert. 881 * @offs: which offset to use 882 */ 883 ktime_t ktime_mono_to_any(ktime_t tmono, enum tk_offsets offs) 884 { 885 ktime_t *offset = offsets[offs]; 886 unsigned int seq; 887 ktime_t tconv; 888 889 if (IS_ENABLED(CONFIG_64BIT)) { 890 /* 891 * Paired with WRITE_ONCE()s in tk_set_wall_to_mono() and 892 * tk_update_sleep_time(). 893 */ 894 return ktime_add(tmono, READ_ONCE(*offset)); 895 } 896 897 do { 898 seq = read_seqcount_begin(&tk_core.seq); 899 tconv = ktime_add(tmono, *offset); 900 } while (read_seqcount_retry(&tk_core.seq, seq)); 901 902 return tconv; 903 } 904 EXPORT_SYMBOL_GPL(ktime_mono_to_any); 905 906 /** 907 * ktime_get_raw - Returns the raw monotonic time in ktime_t format 908 */ 909 ktime_t ktime_get_raw(void) 910 { 911 struct timekeeper *tk = &tk_core.timekeeper; 912 unsigned int seq; 913 ktime_t base; 914 u64 nsecs; 915 916 do { 917 seq = read_seqcount_begin(&tk_core.seq); 918 base = tk->tkr_raw.base; 919 nsecs = timekeeping_get_ns(&tk->tkr_raw); 920 921 } while (read_seqcount_retry(&tk_core.seq, seq)); 922 923 return ktime_add_ns(base, nsecs); 924 } 925 EXPORT_SYMBOL_GPL(ktime_get_raw); 926 927 /** 928 * ktime_get_ts64 - get the monotonic clock in timespec64 format 929 * @ts: pointer to timespec variable 930 * 931 * The function calculates the monotonic clock from the realtime 932 * clock and the wall_to_monotonic offset and stores the result 933 * in normalized timespec64 format in the variable pointed to by @ts. 934 */ 935 void ktime_get_ts64(struct timespec64 *ts) 936 { 937 struct timekeeper *tk = &tk_core.timekeeper; 938 struct timespec64 tomono; 939 unsigned int seq; 940 u64 nsec; 941 942 WARN_ON(timekeeping_suspended); 943 944 do { 945 seq = read_seqcount_begin(&tk_core.seq); 946 ts->tv_sec = tk->xtime_sec; 947 nsec = timekeeping_get_ns(&tk->tkr_mono); 948 tomono = tk->wall_to_monotonic; 949 950 } while (read_seqcount_retry(&tk_core.seq, seq)); 951 952 ts->tv_sec += tomono.tv_sec; 953 ts->tv_nsec = 0; 954 timespec64_add_ns(ts, nsec + tomono.tv_nsec); 955 } 956 EXPORT_SYMBOL_GPL(ktime_get_ts64); 957 958 /** 959 * ktime_get_seconds - Get the seconds portion of CLOCK_MONOTONIC 960 * 961 * Returns the seconds portion of CLOCK_MONOTONIC with a single non 962 * serialized read. tk->ktime_sec is of type 'unsigned long' so this 963 * works on both 32 and 64 bit systems. On 32 bit systems the readout 964 * covers ~136 years of uptime which should be enough to prevent 965 * premature wrap arounds. 966 */ 967 time64_t ktime_get_seconds(void) 968 { 969 struct timekeeper *tk = &tk_core.timekeeper; 970 971 WARN_ON(timekeeping_suspended); 972 return tk->ktime_sec; 973 } 974 EXPORT_SYMBOL_GPL(ktime_get_seconds); 975 976 /** 977 * ktime_get_real_seconds - Get the seconds portion of CLOCK_REALTIME 978 * 979 * Returns the wall clock seconds since 1970. 980 * 981 * For 64bit systems the fast access to tk->xtime_sec is preserved. On 982 * 32bit systems the access must be protected with the sequence 983 * counter to provide "atomic" access to the 64bit tk->xtime_sec 984 * value. 985 */ 986 time64_t ktime_get_real_seconds(void) 987 { 988 struct timekeeper *tk = &tk_core.timekeeper; 989 time64_t seconds; 990 unsigned int seq; 991 992 if (IS_ENABLED(CONFIG_64BIT)) 993 return tk->xtime_sec; 994 995 do { 996 seq = read_seqcount_begin(&tk_core.seq); 997 seconds = tk->xtime_sec; 998 999 } while (read_seqcount_retry(&tk_core.seq, seq)); 1000 1001 return seconds; 1002 } 1003 EXPORT_SYMBOL_GPL(ktime_get_real_seconds); 1004 1005 /** 1006 * __ktime_get_real_seconds - Unprotected access to CLOCK_REALTIME seconds 1007 * 1008 * The same as ktime_get_real_seconds() but without the sequence counter 1009 * protection. This function is used in restricted contexts like the x86 MCE 1010 * handler and in KGDB. It's unprotected on 32-bit vs. concurrent half 1011 * completed modification and only to be used for such critical contexts. 1012 * 1013 * Returns: Racy snapshot of the CLOCK_REALTIME seconds value 1014 */ 1015 noinstr time64_t __ktime_get_real_seconds(void) 1016 { 1017 struct timekeeper *tk = &tk_core.timekeeper; 1018 1019 return tk->xtime_sec; 1020 } 1021 1022 /** 1023 * ktime_get_snapshot - snapshots the realtime/monotonic raw clocks with counter 1024 * @systime_snapshot: pointer to struct receiving the system time snapshot 1025 */ 1026 void ktime_get_snapshot(struct system_time_snapshot *systime_snapshot) 1027 { 1028 struct timekeeper *tk = &tk_core.timekeeper; 1029 unsigned int seq; 1030 ktime_t base_raw; 1031 ktime_t base_real; 1032 ktime_t base_boot; 1033 u64 nsec_raw; 1034 u64 nsec_real; 1035 u64 now; 1036 1037 WARN_ON_ONCE(timekeeping_suspended); 1038 1039 do { 1040 seq = read_seqcount_begin(&tk_core.seq); 1041 now = tk_clock_read(&tk->tkr_mono); 1042 systime_snapshot->cs_id = tk->tkr_mono.clock->id; 1043 systime_snapshot->cs_was_changed_seq = tk->cs_was_changed_seq; 1044 systime_snapshot->clock_was_set_seq = tk->clock_was_set_seq; 1045 base_real = ktime_add(tk->tkr_mono.base, 1046 tk_core.timekeeper.offs_real); 1047 base_boot = ktime_add(tk->tkr_mono.base, 1048 tk_core.timekeeper.offs_boot); 1049 base_raw = tk->tkr_raw.base; 1050 nsec_real = timekeeping_cycles_to_ns(&tk->tkr_mono, now); 1051 nsec_raw = timekeeping_cycles_to_ns(&tk->tkr_raw, now); 1052 } while (read_seqcount_retry(&tk_core.seq, seq)); 1053 1054 systime_snapshot->cycles = now; 1055 systime_snapshot->real = ktime_add_ns(base_real, nsec_real); 1056 systime_snapshot->boot = ktime_add_ns(base_boot, nsec_real); 1057 systime_snapshot->raw = ktime_add_ns(base_raw, nsec_raw); 1058 } 1059 EXPORT_SYMBOL_GPL(ktime_get_snapshot); 1060 1061 /* Scale base by mult/div checking for overflow */ 1062 static int scale64_check_overflow(u64 mult, u64 div, u64 *base) 1063 { 1064 u64 tmp, rem; 1065 1066 tmp = div64_u64_rem(*base, div, &rem); 1067 1068 if (((int)sizeof(u64)*8 - fls64(mult) < fls64(tmp)) || 1069 ((int)sizeof(u64)*8 - fls64(mult) < fls64(rem))) 1070 return -EOVERFLOW; 1071 tmp *= mult; 1072 1073 rem = div64_u64(rem * mult, div); 1074 *base = tmp + rem; 1075 return 0; 1076 } 1077 1078 /** 1079 * adjust_historical_crosststamp - adjust crosstimestamp previous to current interval 1080 * @history: Snapshot representing start of history 1081 * @partial_history_cycles: Cycle offset into history (fractional part) 1082 * @total_history_cycles: Total history length in cycles 1083 * @discontinuity: True indicates clock was set on history period 1084 * @ts: Cross timestamp that should be adjusted using 1085 * partial/total ratio 1086 * 1087 * Helper function used by get_device_system_crosststamp() to correct the 1088 * crosstimestamp corresponding to the start of the current interval to the 1089 * system counter value (timestamp point) provided by the driver. The 1090 * total_history_* quantities are the total history starting at the provided 1091 * reference point and ending at the start of the current interval. The cycle 1092 * count between the driver timestamp point and the start of the current 1093 * interval is partial_history_cycles. 1094 */ 1095 static int adjust_historical_crosststamp(struct system_time_snapshot *history, 1096 u64 partial_history_cycles, 1097 u64 total_history_cycles, 1098 bool discontinuity, 1099 struct system_device_crosststamp *ts) 1100 { 1101 struct timekeeper *tk = &tk_core.timekeeper; 1102 u64 corr_raw, corr_real; 1103 bool interp_forward; 1104 int ret; 1105 1106 if (total_history_cycles == 0 || partial_history_cycles == 0) 1107 return 0; 1108 1109 /* Interpolate shortest distance from beginning or end of history */ 1110 interp_forward = partial_history_cycles > total_history_cycles / 2; 1111 partial_history_cycles = interp_forward ? 1112 total_history_cycles - partial_history_cycles : 1113 partial_history_cycles; 1114 1115 /* 1116 * Scale the monotonic raw time delta by: 1117 * partial_history_cycles / total_history_cycles 1118 */ 1119 corr_raw = (u64)ktime_to_ns( 1120 ktime_sub(ts->sys_monoraw, history->raw)); 1121 ret = scale64_check_overflow(partial_history_cycles, 1122 total_history_cycles, &corr_raw); 1123 if (ret) 1124 return ret; 1125 1126 /* 1127 * If there is a discontinuity in the history, scale monotonic raw 1128 * correction by: 1129 * mult(real)/mult(raw) yielding the realtime correction 1130 * Otherwise, calculate the realtime correction similar to monotonic 1131 * raw calculation 1132 */ 1133 if (discontinuity) { 1134 corr_real = mul_u64_u32_div 1135 (corr_raw, tk->tkr_mono.mult, tk->tkr_raw.mult); 1136 } else { 1137 corr_real = (u64)ktime_to_ns( 1138 ktime_sub(ts->sys_realtime, history->real)); 1139 ret = scale64_check_overflow(partial_history_cycles, 1140 total_history_cycles, &corr_real); 1141 if (ret) 1142 return ret; 1143 } 1144 1145 /* Fixup monotonic raw and real time time values */ 1146 if (interp_forward) { 1147 ts->sys_monoraw = ktime_add_ns(history->raw, corr_raw); 1148 ts->sys_realtime = ktime_add_ns(history->real, corr_real); 1149 } else { 1150 ts->sys_monoraw = ktime_sub_ns(ts->sys_monoraw, corr_raw); 1151 ts->sys_realtime = ktime_sub_ns(ts->sys_realtime, corr_real); 1152 } 1153 1154 return 0; 1155 } 1156 1157 /* 1158 * timestamp_in_interval - true if ts is chronologically in [start, end] 1159 * 1160 * True if ts occurs chronologically at or after start, and before or at end. 1161 */ 1162 static bool timestamp_in_interval(u64 start, u64 end, u64 ts) 1163 { 1164 if (ts >= start && ts <= end) 1165 return true; 1166 if (start > end && (ts >= start || ts <= end)) 1167 return true; 1168 return false; 1169 } 1170 1171 static bool convert_clock(u64 *val, u32 numerator, u32 denominator) 1172 { 1173 u64 rem, res; 1174 1175 if (!numerator || !denominator) 1176 return false; 1177 1178 res = div64_u64_rem(*val, denominator, &rem) * numerator; 1179 *val = res + div_u64(rem * numerator, denominator); 1180 return true; 1181 } 1182 1183 static bool convert_base_to_cs(struct system_counterval_t *scv) 1184 { 1185 struct clocksource *cs = tk_core.timekeeper.tkr_mono.clock; 1186 struct clocksource_base *base; 1187 u32 num, den; 1188 1189 /* The timestamp was taken from the time keeper clock source */ 1190 if (cs->id == scv->cs_id) 1191 return true; 1192 1193 /* 1194 * Check whether cs_id matches the base clock. Prevent the compiler from 1195 * re-evaluating @base as the clocksource might change concurrently. 1196 */ 1197 base = READ_ONCE(cs->base); 1198 if (!base || base->id != scv->cs_id) 1199 return false; 1200 1201 num = scv->use_nsecs ? cs->freq_khz : base->numerator; 1202 den = scv->use_nsecs ? USEC_PER_SEC : base->denominator; 1203 1204 if (!convert_clock(&scv->cycles, num, den)) 1205 return false; 1206 1207 scv->cycles += base->offset; 1208 return true; 1209 } 1210 1211 static bool convert_cs_to_base(u64 *cycles, enum clocksource_ids base_id) 1212 { 1213 struct clocksource *cs = tk_core.timekeeper.tkr_mono.clock; 1214 struct clocksource_base *base; 1215 1216 /* 1217 * Check whether base_id matches the base clock. Prevent the compiler from 1218 * re-evaluating @base as the clocksource might change concurrently. 1219 */ 1220 base = READ_ONCE(cs->base); 1221 if (!base || base->id != base_id) 1222 return false; 1223 1224 *cycles -= base->offset; 1225 if (!convert_clock(cycles, base->denominator, base->numerator)) 1226 return false; 1227 return true; 1228 } 1229 1230 static bool convert_ns_to_cs(u64 *delta) 1231 { 1232 struct tk_read_base *tkr = &tk_core.timekeeper.tkr_mono; 1233 1234 if (BITS_TO_BYTES(fls64(*delta) + tkr->shift) >= sizeof(*delta)) 1235 return false; 1236 1237 *delta = div_u64((*delta << tkr->shift) - tkr->xtime_nsec, tkr->mult); 1238 return true; 1239 } 1240 1241 /** 1242 * ktime_real_to_base_clock() - Convert CLOCK_REALTIME timestamp to a base clock timestamp 1243 * @treal: CLOCK_REALTIME timestamp to convert 1244 * @base_id: base clocksource id 1245 * @cycles: pointer to store the converted base clock timestamp 1246 * 1247 * Converts a supplied, future realtime clock value to the corresponding base clock value. 1248 * 1249 * Return: true if the conversion is successful, false otherwise. 1250 */ 1251 bool ktime_real_to_base_clock(ktime_t treal, enum clocksource_ids base_id, u64 *cycles) 1252 { 1253 struct timekeeper *tk = &tk_core.timekeeper; 1254 unsigned int seq; 1255 u64 delta; 1256 1257 do { 1258 seq = read_seqcount_begin(&tk_core.seq); 1259 if ((u64)treal < tk->tkr_mono.base_real) 1260 return false; 1261 delta = (u64)treal - tk->tkr_mono.base_real; 1262 if (!convert_ns_to_cs(&delta)) 1263 return false; 1264 *cycles = tk->tkr_mono.cycle_last + delta; 1265 if (!convert_cs_to_base(cycles, base_id)) 1266 return false; 1267 } while (read_seqcount_retry(&tk_core.seq, seq)); 1268 1269 return true; 1270 } 1271 EXPORT_SYMBOL_GPL(ktime_real_to_base_clock); 1272 1273 /** 1274 * get_device_system_crosststamp - Synchronously capture system/device timestamp 1275 * @get_time_fn: Callback to get simultaneous device time and 1276 * system counter from the device driver 1277 * @ctx: Context passed to get_time_fn() 1278 * @history_begin: Historical reference point used to interpolate system 1279 * time when counter provided by the driver is before the current interval 1280 * @xtstamp: Receives simultaneously captured system and device time 1281 * 1282 * Reads a timestamp from a device and correlates it to system time 1283 */ 1284 int get_device_system_crosststamp(int (*get_time_fn) 1285 (ktime_t *device_time, 1286 struct system_counterval_t *sys_counterval, 1287 void *ctx), 1288 void *ctx, 1289 struct system_time_snapshot *history_begin, 1290 struct system_device_crosststamp *xtstamp) 1291 { 1292 struct system_counterval_t system_counterval; 1293 struct timekeeper *tk = &tk_core.timekeeper; 1294 u64 cycles, now, interval_start; 1295 unsigned int clock_was_set_seq = 0; 1296 ktime_t base_real, base_raw; 1297 u64 nsec_real, nsec_raw; 1298 u8 cs_was_changed_seq; 1299 unsigned int seq; 1300 bool do_interp; 1301 int ret; 1302 1303 do { 1304 seq = read_seqcount_begin(&tk_core.seq); 1305 /* 1306 * Try to synchronously capture device time and a system 1307 * counter value calling back into the device driver 1308 */ 1309 ret = get_time_fn(&xtstamp->device, &system_counterval, ctx); 1310 if (ret) 1311 return ret; 1312 1313 /* 1314 * Verify that the clocksource ID associated with the captured 1315 * system counter value is the same as for the currently 1316 * installed timekeeper clocksource 1317 */ 1318 if (system_counterval.cs_id == CSID_GENERIC || 1319 !convert_base_to_cs(&system_counterval)) 1320 return -ENODEV; 1321 cycles = system_counterval.cycles; 1322 1323 /* 1324 * Check whether the system counter value provided by the 1325 * device driver is on the current timekeeping interval. 1326 */ 1327 now = tk_clock_read(&tk->tkr_mono); 1328 interval_start = tk->tkr_mono.cycle_last; 1329 if (!timestamp_in_interval(interval_start, now, cycles)) { 1330 clock_was_set_seq = tk->clock_was_set_seq; 1331 cs_was_changed_seq = tk->cs_was_changed_seq; 1332 cycles = interval_start; 1333 do_interp = true; 1334 } else { 1335 do_interp = false; 1336 } 1337 1338 base_real = ktime_add(tk->tkr_mono.base, 1339 tk_core.timekeeper.offs_real); 1340 base_raw = tk->tkr_raw.base; 1341 1342 nsec_real = timekeeping_cycles_to_ns(&tk->tkr_mono, cycles); 1343 nsec_raw = timekeeping_cycles_to_ns(&tk->tkr_raw, cycles); 1344 } while (read_seqcount_retry(&tk_core.seq, seq)); 1345 1346 xtstamp->sys_realtime = ktime_add_ns(base_real, nsec_real); 1347 xtstamp->sys_monoraw = ktime_add_ns(base_raw, nsec_raw); 1348 1349 /* 1350 * Interpolate if necessary, adjusting back from the start of the 1351 * current interval 1352 */ 1353 if (do_interp) { 1354 u64 partial_history_cycles, total_history_cycles; 1355 bool discontinuity; 1356 1357 /* 1358 * Check that the counter value is not before the provided 1359 * history reference and that the history doesn't cross a 1360 * clocksource change 1361 */ 1362 if (!history_begin || 1363 !timestamp_in_interval(history_begin->cycles, 1364 cycles, system_counterval.cycles) || 1365 history_begin->cs_was_changed_seq != cs_was_changed_seq) 1366 return -EINVAL; 1367 partial_history_cycles = cycles - system_counterval.cycles; 1368 total_history_cycles = cycles - history_begin->cycles; 1369 discontinuity = 1370 history_begin->clock_was_set_seq != clock_was_set_seq; 1371 1372 ret = adjust_historical_crosststamp(history_begin, 1373 partial_history_cycles, 1374 total_history_cycles, 1375 discontinuity, xtstamp); 1376 if (ret) 1377 return ret; 1378 } 1379 1380 return 0; 1381 } 1382 EXPORT_SYMBOL_GPL(get_device_system_crosststamp); 1383 1384 /** 1385 * timekeeping_clocksource_has_base - Check whether the current clocksource 1386 * is based on given a base clock 1387 * @id: base clocksource ID 1388 * 1389 * Note: The return value is a snapshot which can become invalid right 1390 * after the function returns. 1391 * 1392 * Return: true if the timekeeper clocksource has a base clock with @id, 1393 * false otherwise 1394 */ 1395 bool timekeeping_clocksource_has_base(enum clocksource_ids id) 1396 { 1397 /* 1398 * This is a snapshot, so no point in using the sequence 1399 * count. Just prevent the compiler from re-evaluating @base as the 1400 * clocksource might change concurrently. 1401 */ 1402 struct clocksource_base *base = READ_ONCE(tk_core.timekeeper.tkr_mono.clock->base); 1403 1404 return base ? base->id == id : false; 1405 } 1406 EXPORT_SYMBOL_GPL(timekeeping_clocksource_has_base); 1407 1408 /** 1409 * do_settimeofday64 - Sets the time of day. 1410 * @ts: pointer to the timespec64 variable containing the new time 1411 * 1412 * Sets the time of day to the new time and update NTP and notify hrtimers 1413 */ 1414 int do_settimeofday64(const struct timespec64 *ts) 1415 { 1416 struct timespec64 ts_delta, xt; 1417 1418 if (!timespec64_valid_settod(ts)) 1419 return -EINVAL; 1420 1421 scoped_guard (raw_spinlock_irqsave, &tk_core.lock) { 1422 struct timekeeper *tks = &tk_core.shadow_timekeeper; 1423 1424 timekeeping_forward_now(tks); 1425 1426 xt = tk_xtime(tks); 1427 ts_delta = timespec64_sub(*ts, xt); 1428 1429 if (timespec64_compare(&tks->wall_to_monotonic, &ts_delta) > 0) { 1430 timekeeping_restore_shadow(&tk_core); 1431 return -EINVAL; 1432 } 1433 1434 tk_set_wall_to_mono(tks, timespec64_sub(tks->wall_to_monotonic, ts_delta)); 1435 tk_set_xtime(tks, ts); 1436 timekeeping_update_from_shadow(&tk_core, TK_UPDATE_ALL); 1437 } 1438 1439 /* Signal hrtimers about time change */ 1440 clock_was_set(CLOCK_SET_WALL); 1441 1442 audit_tk_injoffset(ts_delta); 1443 add_device_randomness(ts, sizeof(*ts)); 1444 return 0; 1445 } 1446 EXPORT_SYMBOL(do_settimeofday64); 1447 1448 static inline bool timekeeper_is_core_tk(struct timekeeper *tk) 1449 { 1450 return !IS_ENABLED(CONFIG_POSIX_AUX_CLOCKS) || tk->id == TIMEKEEPER_CORE; 1451 } 1452 1453 /** 1454 * __timekeeping_inject_offset - Adds or subtracts from the current time. 1455 * @tkd: Pointer to the timekeeper to modify 1456 * @ts: Pointer to the timespec variable containing the offset 1457 * 1458 * Adds or subtracts an offset value from the current time. 1459 */ 1460 static int __timekeeping_inject_offset(struct tk_data *tkd, const struct timespec64 *ts) 1461 { 1462 struct timekeeper *tks = &tkd->shadow_timekeeper; 1463 struct timespec64 tmp; 1464 1465 if (ts->tv_nsec < 0 || ts->tv_nsec >= NSEC_PER_SEC) 1466 return -EINVAL; 1467 1468 timekeeping_forward_now(tks); 1469 1470 if (timekeeper_is_core_tk(tks)) { 1471 /* Make sure the proposed value is valid */ 1472 tmp = timespec64_add(tk_xtime(tks), *ts); 1473 if (timespec64_compare(&tks->wall_to_monotonic, ts) > 0 || 1474 !timespec64_valid_settod(&tmp)) { 1475 timekeeping_restore_shadow(tkd); 1476 return -EINVAL; 1477 } 1478 1479 tk_xtime_add(tks, ts); 1480 tk_set_wall_to_mono(tks, timespec64_sub(tks->wall_to_monotonic, *ts)); 1481 } else { 1482 struct tk_read_base *tkr_mono = &tks->tkr_mono; 1483 ktime_t now, offs; 1484 1485 /* Get the current time */ 1486 now = ktime_add_ns(tkr_mono->base, timekeeping_get_ns(tkr_mono)); 1487 /* Add the relative offset change */ 1488 offs = ktime_add(tks->offs_aux, timespec64_to_ktime(*ts)); 1489 1490 /* Prevent that the resulting time becomes negative */ 1491 if (ktime_add(now, offs) < 0) { 1492 timekeeping_restore_shadow(tkd); 1493 return -EINVAL; 1494 } 1495 tks->offs_aux = offs; 1496 } 1497 1498 timekeeping_update_from_shadow(tkd, TK_UPDATE_ALL); 1499 return 0; 1500 } 1501 1502 static int timekeeping_inject_offset(const struct timespec64 *ts) 1503 { 1504 int ret; 1505 1506 scoped_guard (raw_spinlock_irqsave, &tk_core.lock) 1507 ret = __timekeeping_inject_offset(&tk_core, ts); 1508 1509 /* Signal hrtimers about time change */ 1510 if (!ret) 1511 clock_was_set(CLOCK_SET_WALL); 1512 return ret; 1513 } 1514 1515 /* 1516 * Indicates if there is an offset between the system clock and the hardware 1517 * clock/persistent clock/rtc. 1518 */ 1519 int persistent_clock_is_local; 1520 1521 /* 1522 * Adjust the time obtained from the CMOS to be UTC time instead of 1523 * local time. 1524 * 1525 * This is ugly, but preferable to the alternatives. Otherwise we 1526 * would either need to write a program to do it in /etc/rc (and risk 1527 * confusion if the program gets run more than once; it would also be 1528 * hard to make the program warp the clock precisely n hours) or 1529 * compile in the timezone information into the kernel. Bad, bad.... 1530 * 1531 * - TYT, 1992-01-01 1532 * 1533 * The best thing to do is to keep the CMOS clock in universal time (UTC) 1534 * as real UNIX machines always do it. This avoids all headaches about 1535 * daylight saving times and warping kernel clocks. 1536 */ 1537 void timekeeping_warp_clock(void) 1538 { 1539 if (sys_tz.tz_minuteswest != 0) { 1540 struct timespec64 adjust; 1541 1542 persistent_clock_is_local = 1; 1543 adjust.tv_sec = sys_tz.tz_minuteswest * 60; 1544 adjust.tv_nsec = 0; 1545 timekeeping_inject_offset(&adjust); 1546 } 1547 } 1548 1549 /* 1550 * __timekeeping_set_tai_offset - Sets the TAI offset from UTC and monotonic 1551 */ 1552 static void __timekeeping_set_tai_offset(struct timekeeper *tk, s32 tai_offset) 1553 { 1554 tk->tai_offset = tai_offset; 1555 tk->offs_tai = ktime_add(tk->offs_real, ktime_set(tai_offset, 0)); 1556 } 1557 1558 /* 1559 * change_clocksource - Swaps clocksources if a new one is available 1560 * 1561 * Accumulates current time interval and initializes new clocksource 1562 */ 1563 static int change_clocksource(void *data) 1564 { 1565 struct clocksource *new = data, *old = NULL; 1566 1567 /* 1568 * If the clocksource is in a module, get a module reference. 1569 * Succeeds for built-in code (owner == NULL) as well. Abort if the 1570 * reference can't be acquired. 1571 */ 1572 if (!try_module_get(new->owner)) 1573 return 0; 1574 1575 /* Abort if the device can't be enabled */ 1576 if (new->enable && new->enable(new) != 0) { 1577 module_put(new->owner); 1578 return 0; 1579 } 1580 1581 scoped_guard (raw_spinlock_irqsave, &tk_core.lock) { 1582 struct timekeeper *tks = &tk_core.shadow_timekeeper; 1583 1584 timekeeping_forward_now(tks); 1585 old = tks->tkr_mono.clock; 1586 tk_setup_internals(tks, new); 1587 timekeeping_update_from_shadow(&tk_core, TK_UPDATE_ALL); 1588 } 1589 1590 tk_aux_update_clocksource(); 1591 1592 if (old) { 1593 if (old->disable) 1594 old->disable(old); 1595 module_put(old->owner); 1596 } 1597 1598 return 0; 1599 } 1600 1601 /** 1602 * timekeeping_notify - Install a new clock source 1603 * @clock: pointer to the clock source 1604 * 1605 * This function is called from clocksource.c after a new, better clock 1606 * source has been registered. The caller holds the clocksource_mutex. 1607 */ 1608 int timekeeping_notify(struct clocksource *clock) 1609 { 1610 struct timekeeper *tk = &tk_core.timekeeper; 1611 1612 if (tk->tkr_mono.clock == clock) 1613 return 0; 1614 stop_machine(change_clocksource, clock, NULL); 1615 tick_clock_notify(); 1616 return tk->tkr_mono.clock == clock ? 0 : -1; 1617 } 1618 1619 /** 1620 * ktime_get_raw_ts64 - Returns the raw monotonic time in a timespec 1621 * @ts: pointer to the timespec64 to be set 1622 * 1623 * Returns the raw monotonic time (completely un-modified by ntp) 1624 */ 1625 void ktime_get_raw_ts64(struct timespec64 *ts) 1626 { 1627 struct timekeeper *tk = &tk_core.timekeeper; 1628 unsigned int seq; 1629 u64 nsecs; 1630 1631 do { 1632 seq = read_seqcount_begin(&tk_core.seq); 1633 ts->tv_sec = tk->raw_sec; 1634 nsecs = timekeeping_get_ns(&tk->tkr_raw); 1635 1636 } while (read_seqcount_retry(&tk_core.seq, seq)); 1637 1638 ts->tv_nsec = 0; 1639 timespec64_add_ns(ts, nsecs); 1640 } 1641 EXPORT_SYMBOL(ktime_get_raw_ts64); 1642 1643 /** 1644 * ktime_get_clock_ts64 - Returns time of a clock in a timespec 1645 * @id: POSIX clock ID of the clock to read 1646 * @ts: Pointer to the timespec64 to be set 1647 * 1648 * The timestamp is invalidated (@ts->sec is set to -1) if the 1649 * clock @id is not available. 1650 */ 1651 void ktime_get_clock_ts64(clockid_t id, struct timespec64 *ts) 1652 { 1653 /* Invalidate time stamp */ 1654 ts->tv_sec = -1; 1655 ts->tv_nsec = 0; 1656 1657 switch (id) { 1658 case CLOCK_REALTIME: 1659 ktime_get_real_ts64(ts); 1660 return; 1661 case CLOCK_MONOTONIC: 1662 ktime_get_ts64(ts); 1663 return; 1664 case CLOCK_MONOTONIC_RAW: 1665 ktime_get_raw_ts64(ts); 1666 return; 1667 case CLOCK_AUX ... CLOCK_AUX_LAST: 1668 if (IS_ENABLED(CONFIG_POSIX_AUX_CLOCKS)) 1669 ktime_get_aux_ts64(id, ts); 1670 return; 1671 default: 1672 WARN_ON_ONCE(1); 1673 } 1674 } 1675 EXPORT_SYMBOL_GPL(ktime_get_clock_ts64); 1676 1677 /** 1678 * timekeeping_valid_for_hres - Check if timekeeping is suitable for hres 1679 */ 1680 int timekeeping_valid_for_hres(void) 1681 { 1682 struct timekeeper *tk = &tk_core.timekeeper; 1683 unsigned int seq; 1684 int ret; 1685 1686 do { 1687 seq = read_seqcount_begin(&tk_core.seq); 1688 1689 ret = tk->tkr_mono.clock->flags & CLOCK_SOURCE_VALID_FOR_HRES; 1690 1691 } while (read_seqcount_retry(&tk_core.seq, seq)); 1692 1693 return ret; 1694 } 1695 1696 /** 1697 * timekeeping_max_deferment - Returns max time the clocksource can be deferred 1698 */ 1699 u64 timekeeping_max_deferment(void) 1700 { 1701 struct timekeeper *tk = &tk_core.timekeeper; 1702 unsigned int seq; 1703 u64 ret; 1704 1705 do { 1706 seq = read_seqcount_begin(&tk_core.seq); 1707 1708 ret = tk->tkr_mono.clock->max_idle_ns; 1709 1710 } while (read_seqcount_retry(&tk_core.seq, seq)); 1711 1712 return ret; 1713 } 1714 1715 /** 1716 * read_persistent_clock64 - Return time from the persistent clock. 1717 * @ts: Pointer to the storage for the readout value 1718 * 1719 * Weak dummy function for arches that do not yet support it. 1720 * Reads the time from the battery backed persistent clock. 1721 * Returns a timespec with tv_sec=0 and tv_nsec=0 if unsupported. 1722 * 1723 * XXX - Do be sure to remove it once all arches implement it. 1724 */ 1725 void __weak read_persistent_clock64(struct timespec64 *ts) 1726 { 1727 ts->tv_sec = 0; 1728 ts->tv_nsec = 0; 1729 } 1730 1731 /** 1732 * read_persistent_wall_and_boot_offset - Read persistent clock, and also offset 1733 * from the boot. 1734 * @wall_time: current time as returned by persistent clock 1735 * @boot_offset: offset that is defined as wall_time - boot_time 1736 * 1737 * Weak dummy function for arches that do not yet support it. 1738 * 1739 * The default function calculates offset based on the current value of 1740 * local_clock(). This way architectures that support sched_clock() but don't 1741 * support dedicated boot time clock will provide the best estimate of the 1742 * boot time. 1743 */ 1744 void __weak __init 1745 read_persistent_wall_and_boot_offset(struct timespec64 *wall_time, 1746 struct timespec64 *boot_offset) 1747 { 1748 read_persistent_clock64(wall_time); 1749 *boot_offset = ns_to_timespec64(local_clock()); 1750 } 1751 1752 static __init void tkd_basic_setup(struct tk_data *tkd, enum timekeeper_ids tk_id, bool valid) 1753 { 1754 raw_spin_lock_init(&tkd->lock); 1755 seqcount_raw_spinlock_init(&tkd->seq, &tkd->lock); 1756 tkd->timekeeper.id = tkd->shadow_timekeeper.id = tk_id; 1757 tkd->timekeeper.clock_valid = tkd->shadow_timekeeper.clock_valid = valid; 1758 } 1759 1760 /* 1761 * Flag reflecting whether timekeeping_resume() has injected sleeptime. 1762 * 1763 * The flag starts of false and is only set when a suspend reaches 1764 * timekeeping_suspend(), timekeeping_resume() sets it to false when the 1765 * timekeeper clocksource is not stopping across suspend and has been 1766 * used to update sleep time. If the timekeeper clocksource has stopped 1767 * then the flag stays true and is used by the RTC resume code to decide 1768 * whether sleeptime must be injected and if so the flag gets false then. 1769 * 1770 * If a suspend fails before reaching timekeeping_resume() then the flag 1771 * stays false and prevents erroneous sleeptime injection. 1772 */ 1773 static bool suspend_timing_needed; 1774 1775 /* Flag for if there is a persistent clock on this platform */ 1776 static bool persistent_clock_exists; 1777 1778 /* 1779 * timekeeping_init - Initializes the clocksource and common timekeeping values 1780 */ 1781 void __init timekeeping_init(void) 1782 { 1783 struct timespec64 wall_time, boot_offset, wall_to_mono; 1784 struct timekeeper *tks = &tk_core.shadow_timekeeper; 1785 struct clocksource *clock; 1786 1787 tkd_basic_setup(&tk_core, TIMEKEEPER_CORE, true); 1788 tk_aux_setup(); 1789 1790 read_persistent_wall_and_boot_offset(&wall_time, &boot_offset); 1791 if (timespec64_valid_settod(&wall_time) && 1792 timespec64_to_ns(&wall_time) > 0) { 1793 persistent_clock_exists = true; 1794 } else if (timespec64_to_ns(&wall_time) != 0) { 1795 pr_warn("Persistent clock returned invalid value"); 1796 wall_time = (struct timespec64){0}; 1797 } 1798 1799 if (timespec64_compare(&wall_time, &boot_offset) < 0) 1800 boot_offset = (struct timespec64){0}; 1801 1802 /* 1803 * We want set wall_to_mono, so the following is true: 1804 * wall time + wall_to_mono = boot time 1805 */ 1806 wall_to_mono = timespec64_sub(boot_offset, wall_time); 1807 1808 guard(raw_spinlock_irqsave)(&tk_core.lock); 1809 1810 ntp_init(); 1811 1812 clock = clocksource_default_clock(); 1813 if (clock->enable) 1814 clock->enable(clock); 1815 tk_setup_internals(tks, clock); 1816 1817 tk_set_xtime(tks, &wall_time); 1818 tks->raw_sec = 0; 1819 1820 tk_set_wall_to_mono(tks, wall_to_mono); 1821 1822 timekeeping_update_from_shadow(&tk_core, TK_CLOCK_WAS_SET); 1823 } 1824 1825 /* time in seconds when suspend began for persistent clock */ 1826 static struct timespec64 timekeeping_suspend_time; 1827 1828 /** 1829 * __timekeeping_inject_sleeptime - Internal function to add sleep interval 1830 * @tk: Pointer to the timekeeper to be updated 1831 * @delta: Pointer to the delta value in timespec64 format 1832 * 1833 * Takes a timespec offset measuring a suspend interval and properly 1834 * adds the sleep offset to the timekeeping variables. 1835 */ 1836 static void __timekeeping_inject_sleeptime(struct timekeeper *tk, 1837 const struct timespec64 *delta) 1838 { 1839 if (!timespec64_valid_strict(delta)) { 1840 printk_deferred(KERN_WARNING 1841 "__timekeeping_inject_sleeptime: Invalid " 1842 "sleep delta value!\n"); 1843 return; 1844 } 1845 tk_xtime_add(tk, delta); 1846 tk_set_wall_to_mono(tk, timespec64_sub(tk->wall_to_monotonic, *delta)); 1847 tk_update_sleep_time(tk, timespec64_to_ktime(*delta)); 1848 tk_debug_account_sleep_time(delta); 1849 } 1850 1851 #if defined(CONFIG_PM_SLEEP) && defined(CONFIG_RTC_HCTOSYS_DEVICE) 1852 /* 1853 * We have three kinds of time sources to use for sleep time 1854 * injection, the preference order is: 1855 * 1) non-stop clocksource 1856 * 2) persistent clock (ie: RTC accessible when irqs are off) 1857 * 3) RTC 1858 * 1859 * 1) and 2) are used by timekeeping, 3) by RTC subsystem. 1860 * If system has neither 1) nor 2), 3) will be used finally. 1861 * 1862 * 1863 * If timekeeping has injected sleeptime via either 1) or 2), 1864 * 3) becomes needless, so in this case we don't need to call 1865 * rtc_resume(), and this is what timekeeping_rtc_skipresume() 1866 * means. 1867 */ 1868 bool timekeeping_rtc_skipresume(void) 1869 { 1870 return !suspend_timing_needed; 1871 } 1872 1873 /* 1874 * 1) can be determined whether to use or not only when doing 1875 * timekeeping_resume() which is invoked after rtc_suspend(), 1876 * so we can't skip rtc_suspend() surely if system has 1). 1877 * 1878 * But if system has 2), 2) will definitely be used, so in this 1879 * case we don't need to call rtc_suspend(), and this is what 1880 * timekeeping_rtc_skipsuspend() means. 1881 */ 1882 bool timekeeping_rtc_skipsuspend(void) 1883 { 1884 return persistent_clock_exists; 1885 } 1886 1887 /** 1888 * timekeeping_inject_sleeptime64 - Adds suspend interval to timeekeeping values 1889 * @delta: pointer to a timespec64 delta value 1890 * 1891 * This hook is for architectures that cannot support read_persistent_clock64 1892 * because their RTC/persistent clock is only accessible when irqs are enabled. 1893 * and also don't have an effective nonstop clocksource. 1894 * 1895 * This function should only be called by rtc_resume(), and allows 1896 * a suspend offset to be injected into the timekeeping values. 1897 */ 1898 void timekeeping_inject_sleeptime64(const struct timespec64 *delta) 1899 { 1900 scoped_guard(raw_spinlock_irqsave, &tk_core.lock) { 1901 struct timekeeper *tks = &tk_core.shadow_timekeeper; 1902 1903 suspend_timing_needed = false; 1904 timekeeping_forward_now(tks); 1905 __timekeeping_inject_sleeptime(tks, delta); 1906 timekeeping_update_from_shadow(&tk_core, TK_UPDATE_ALL); 1907 } 1908 1909 /* Signal hrtimers about time change */ 1910 clock_was_set(CLOCK_SET_WALL | CLOCK_SET_BOOT); 1911 } 1912 #endif 1913 1914 /** 1915 * timekeeping_resume - Resumes the generic timekeeping subsystem. 1916 */ 1917 void timekeeping_resume(void) 1918 { 1919 struct timekeeper *tks = &tk_core.shadow_timekeeper; 1920 struct clocksource *clock = tks->tkr_mono.clock; 1921 struct timespec64 ts_new, ts_delta; 1922 bool inject_sleeptime = false; 1923 u64 cycle_now, nsec; 1924 unsigned long flags; 1925 1926 read_persistent_clock64(&ts_new); 1927 1928 clockevents_resume(); 1929 clocksource_resume(); 1930 1931 raw_spin_lock_irqsave(&tk_core.lock, flags); 1932 1933 /* 1934 * After system resumes, we need to calculate the suspended time and 1935 * compensate it for the OS time. There are 3 sources that could be 1936 * used: Nonstop clocksource during suspend, persistent clock and rtc 1937 * device. 1938 * 1939 * One specific platform may have 1 or 2 or all of them, and the 1940 * preference will be: 1941 * suspend-nonstop clocksource -> persistent clock -> rtc 1942 * The less preferred source will only be tried if there is no better 1943 * usable source. The rtc part is handled separately in rtc core code. 1944 */ 1945 cycle_now = tk_clock_read(&tks->tkr_mono); 1946 nsec = clocksource_stop_suspend_timing(clock, cycle_now); 1947 if (nsec > 0) { 1948 ts_delta = ns_to_timespec64(nsec); 1949 inject_sleeptime = true; 1950 } else if (timespec64_compare(&ts_new, &timekeeping_suspend_time) > 0) { 1951 ts_delta = timespec64_sub(ts_new, timekeeping_suspend_time); 1952 inject_sleeptime = true; 1953 } 1954 1955 if (inject_sleeptime) { 1956 suspend_timing_needed = false; 1957 __timekeeping_inject_sleeptime(tks, &ts_delta); 1958 } 1959 1960 /* Re-base the last cycle value */ 1961 tks->tkr_mono.cycle_last = cycle_now; 1962 tks->tkr_raw.cycle_last = cycle_now; 1963 1964 tks->ntp_error = 0; 1965 timekeeping_suspended = 0; 1966 timekeeping_update_from_shadow(&tk_core, TK_CLOCK_WAS_SET); 1967 raw_spin_unlock_irqrestore(&tk_core.lock, flags); 1968 1969 touch_softlockup_watchdog(); 1970 1971 /* Resume the clockevent device(s) and hrtimers */ 1972 tick_resume(); 1973 /* Notify timerfd as resume is equivalent to clock_was_set() */ 1974 timerfd_resume(); 1975 } 1976 1977 int timekeeping_suspend(void) 1978 { 1979 struct timekeeper *tks = &tk_core.shadow_timekeeper; 1980 struct timespec64 delta, delta_delta; 1981 static struct timespec64 old_delta; 1982 struct clocksource *curr_clock; 1983 unsigned long flags; 1984 u64 cycle_now; 1985 1986 read_persistent_clock64(&timekeeping_suspend_time); 1987 1988 /* 1989 * On some systems the persistent_clock can not be detected at 1990 * timekeeping_init by its return value, so if we see a valid 1991 * value returned, update the persistent_clock_exists flag. 1992 */ 1993 if (timekeeping_suspend_time.tv_sec || timekeeping_suspend_time.tv_nsec) 1994 persistent_clock_exists = true; 1995 1996 suspend_timing_needed = true; 1997 1998 raw_spin_lock_irqsave(&tk_core.lock, flags); 1999 timekeeping_forward_now(tks); 2000 timekeeping_suspended = 1; 2001 2002 /* 2003 * Since we've called forward_now, cycle_last stores the value 2004 * just read from the current clocksource. Save this to potentially 2005 * use in suspend timing. 2006 */ 2007 curr_clock = tks->tkr_mono.clock; 2008 cycle_now = tks->tkr_mono.cycle_last; 2009 clocksource_start_suspend_timing(curr_clock, cycle_now); 2010 2011 if (persistent_clock_exists) { 2012 /* 2013 * To avoid drift caused by repeated suspend/resumes, 2014 * which each can add ~1 second drift error, 2015 * try to compensate so the difference in system time 2016 * and persistent_clock time stays close to constant. 2017 */ 2018 delta = timespec64_sub(tk_xtime(tks), timekeeping_suspend_time); 2019 delta_delta = timespec64_sub(delta, old_delta); 2020 if (abs(delta_delta.tv_sec) >= 2) { 2021 /* 2022 * if delta_delta is too large, assume time correction 2023 * has occurred and set old_delta to the current delta. 2024 */ 2025 old_delta = delta; 2026 } else { 2027 /* Otherwise try to adjust old_system to compensate */ 2028 timekeeping_suspend_time = 2029 timespec64_add(timekeeping_suspend_time, delta_delta); 2030 } 2031 } 2032 2033 timekeeping_update_from_shadow(&tk_core, 0); 2034 halt_fast_timekeeper(tks); 2035 raw_spin_unlock_irqrestore(&tk_core.lock, flags); 2036 2037 tick_suspend(); 2038 clocksource_suspend(); 2039 clockevents_suspend(); 2040 2041 return 0; 2042 } 2043 2044 /* sysfs resume/suspend bits for timekeeping */ 2045 static struct syscore_ops timekeeping_syscore_ops = { 2046 .resume = timekeeping_resume, 2047 .suspend = timekeeping_suspend, 2048 }; 2049 2050 static int __init timekeeping_init_ops(void) 2051 { 2052 register_syscore_ops(&timekeeping_syscore_ops); 2053 return 0; 2054 } 2055 device_initcall(timekeeping_init_ops); 2056 2057 /* 2058 * Apply a multiplier adjustment to the timekeeper 2059 */ 2060 static __always_inline void timekeeping_apply_adjustment(struct timekeeper *tk, 2061 s64 offset, 2062 s32 mult_adj) 2063 { 2064 s64 interval = tk->cycle_interval; 2065 2066 if (mult_adj == 0) { 2067 return; 2068 } else if (mult_adj == -1) { 2069 interval = -interval; 2070 offset = -offset; 2071 } else if (mult_adj != 1) { 2072 interval *= mult_adj; 2073 offset *= mult_adj; 2074 } 2075 2076 /* 2077 * So the following can be confusing. 2078 * 2079 * To keep things simple, lets assume mult_adj == 1 for now. 2080 * 2081 * When mult_adj != 1, remember that the interval and offset values 2082 * have been appropriately scaled so the math is the same. 2083 * 2084 * The basic idea here is that we're increasing the multiplier 2085 * by one, this causes the xtime_interval to be incremented by 2086 * one cycle_interval. This is because: 2087 * xtime_interval = cycle_interval * mult 2088 * So if mult is being incremented by one: 2089 * xtime_interval = cycle_interval * (mult + 1) 2090 * Its the same as: 2091 * xtime_interval = (cycle_interval * mult) + cycle_interval 2092 * Which can be shortened to: 2093 * xtime_interval += cycle_interval 2094 * 2095 * So offset stores the non-accumulated cycles. Thus the current 2096 * time (in shifted nanoseconds) is: 2097 * now = (offset * adj) + xtime_nsec 2098 * Now, even though we're adjusting the clock frequency, we have 2099 * to keep time consistent. In other words, we can't jump back 2100 * in time, and we also want to avoid jumping forward in time. 2101 * 2102 * So given the same offset value, we need the time to be the same 2103 * both before and after the freq adjustment. 2104 * now = (offset * adj_1) + xtime_nsec_1 2105 * now = (offset * adj_2) + xtime_nsec_2 2106 * So: 2107 * (offset * adj_1) + xtime_nsec_1 = 2108 * (offset * adj_2) + xtime_nsec_2 2109 * And we know: 2110 * adj_2 = adj_1 + 1 2111 * So: 2112 * (offset * adj_1) + xtime_nsec_1 = 2113 * (offset * (adj_1+1)) + xtime_nsec_2 2114 * (offset * adj_1) + xtime_nsec_1 = 2115 * (offset * adj_1) + offset + xtime_nsec_2 2116 * Canceling the sides: 2117 * xtime_nsec_1 = offset + xtime_nsec_2 2118 * Which gives us: 2119 * xtime_nsec_2 = xtime_nsec_1 - offset 2120 * Which simplifies to: 2121 * xtime_nsec -= offset 2122 */ 2123 if ((mult_adj > 0) && (tk->tkr_mono.mult + mult_adj < mult_adj)) { 2124 /* NTP adjustment caused clocksource mult overflow */ 2125 WARN_ON_ONCE(1); 2126 return; 2127 } 2128 2129 tk->tkr_mono.mult += mult_adj; 2130 tk->xtime_interval += interval; 2131 tk->tkr_mono.xtime_nsec -= offset; 2132 } 2133 2134 /* 2135 * Adjust the timekeeper's multiplier to the correct frequency 2136 * and also to reduce the accumulated error value. 2137 */ 2138 static void timekeeping_adjust(struct timekeeper *tk, s64 offset) 2139 { 2140 u64 ntp_tl = ntp_tick_length(tk->id); 2141 u32 mult; 2142 2143 /* 2144 * Determine the multiplier from the current NTP tick length. 2145 * Avoid expensive division when the tick length doesn't change. 2146 */ 2147 if (likely(tk->ntp_tick == ntp_tl)) { 2148 mult = tk->tkr_mono.mult - tk->ntp_err_mult; 2149 } else { 2150 tk->ntp_tick = ntp_tl; 2151 mult = div64_u64((tk->ntp_tick >> tk->ntp_error_shift) - 2152 tk->xtime_remainder, tk->cycle_interval); 2153 } 2154 2155 /* 2156 * If the clock is behind the NTP time, increase the multiplier by 1 2157 * to catch up with it. If it's ahead and there was a remainder in the 2158 * tick division, the clock will slow down. Otherwise it will stay 2159 * ahead until the tick length changes to a non-divisible value. 2160 */ 2161 tk->ntp_err_mult = tk->ntp_error > 0 ? 1 : 0; 2162 mult += tk->ntp_err_mult; 2163 2164 timekeeping_apply_adjustment(tk, offset, mult - tk->tkr_mono.mult); 2165 2166 if (unlikely(tk->tkr_mono.clock->maxadj && 2167 (abs(tk->tkr_mono.mult - tk->tkr_mono.clock->mult) 2168 > tk->tkr_mono.clock->maxadj))) { 2169 printk_once(KERN_WARNING 2170 "Adjusting %s more than 11%% (%ld vs %ld)\n", 2171 tk->tkr_mono.clock->name, (long)tk->tkr_mono.mult, 2172 (long)tk->tkr_mono.clock->mult + tk->tkr_mono.clock->maxadj); 2173 } 2174 2175 /* 2176 * It may be possible that when we entered this function, xtime_nsec 2177 * was very small. Further, if we're slightly speeding the clocksource 2178 * in the code above, its possible the required corrective factor to 2179 * xtime_nsec could cause it to underflow. 2180 * 2181 * Now, since we have already accumulated the second and the NTP 2182 * subsystem has been notified via second_overflow(), we need to skip 2183 * the next update. 2184 */ 2185 if (unlikely((s64)tk->tkr_mono.xtime_nsec < 0)) { 2186 tk->tkr_mono.xtime_nsec += (u64)NSEC_PER_SEC << 2187 tk->tkr_mono.shift; 2188 tk->xtime_sec--; 2189 tk->skip_second_overflow = 1; 2190 } 2191 } 2192 2193 /* 2194 * accumulate_nsecs_to_secs - Accumulates nsecs into secs 2195 * 2196 * Helper function that accumulates the nsecs greater than a second 2197 * from the xtime_nsec field to the xtime_secs field. 2198 * It also calls into the NTP code to handle leapsecond processing. 2199 */ 2200 static inline unsigned int accumulate_nsecs_to_secs(struct timekeeper *tk) 2201 { 2202 u64 nsecps = (u64)NSEC_PER_SEC << tk->tkr_mono.shift; 2203 unsigned int clock_set = 0; 2204 2205 while (tk->tkr_mono.xtime_nsec >= nsecps) { 2206 int leap; 2207 2208 tk->tkr_mono.xtime_nsec -= nsecps; 2209 tk->xtime_sec++; 2210 2211 /* 2212 * Skip NTP update if this second was accumulated before, 2213 * i.e. xtime_nsec underflowed in timekeeping_adjust() 2214 */ 2215 if (unlikely(tk->skip_second_overflow)) { 2216 tk->skip_second_overflow = 0; 2217 continue; 2218 } 2219 2220 /* Figure out if its a leap sec and apply if needed */ 2221 leap = second_overflow(tk->id, tk->xtime_sec); 2222 if (unlikely(leap)) { 2223 struct timespec64 ts; 2224 2225 tk->xtime_sec += leap; 2226 2227 ts.tv_sec = leap; 2228 ts.tv_nsec = 0; 2229 tk_set_wall_to_mono(tk, 2230 timespec64_sub(tk->wall_to_monotonic, ts)); 2231 2232 __timekeeping_set_tai_offset(tk, tk->tai_offset - leap); 2233 2234 clock_set = TK_CLOCK_WAS_SET; 2235 } 2236 } 2237 return clock_set; 2238 } 2239 2240 /* 2241 * logarithmic_accumulation - shifted accumulation of cycles 2242 * 2243 * This functions accumulates a shifted interval of cycles into 2244 * a shifted interval nanoseconds. Allows for O(log) accumulation 2245 * loop. 2246 * 2247 * Returns the unconsumed cycles. 2248 */ 2249 static u64 logarithmic_accumulation(struct timekeeper *tk, u64 offset, 2250 u32 shift, unsigned int *clock_set) 2251 { 2252 u64 interval = tk->cycle_interval << shift; 2253 u64 snsec_per_sec; 2254 2255 /* If the offset is smaller than a shifted interval, do nothing */ 2256 if (offset < interval) 2257 return offset; 2258 2259 /* Accumulate one shifted interval */ 2260 offset -= interval; 2261 tk->tkr_mono.cycle_last += interval; 2262 tk->tkr_raw.cycle_last += interval; 2263 2264 tk->tkr_mono.xtime_nsec += tk->xtime_interval << shift; 2265 *clock_set |= accumulate_nsecs_to_secs(tk); 2266 2267 /* Accumulate raw time */ 2268 tk->tkr_raw.xtime_nsec += tk->raw_interval << shift; 2269 snsec_per_sec = (u64)NSEC_PER_SEC << tk->tkr_raw.shift; 2270 while (tk->tkr_raw.xtime_nsec >= snsec_per_sec) { 2271 tk->tkr_raw.xtime_nsec -= snsec_per_sec; 2272 tk->raw_sec++; 2273 } 2274 2275 /* Accumulate error between NTP and clock interval */ 2276 tk->ntp_error += tk->ntp_tick << shift; 2277 tk->ntp_error -= (tk->xtime_interval + tk->xtime_remainder) << 2278 (tk->ntp_error_shift + shift); 2279 2280 return offset; 2281 } 2282 2283 /* 2284 * timekeeping_advance - Updates the timekeeper to the current time and 2285 * current NTP tick length 2286 */ 2287 static bool __timekeeping_advance(struct tk_data *tkd, enum timekeeping_adv_mode mode) 2288 { 2289 struct timekeeper *tk = &tkd->shadow_timekeeper; 2290 struct timekeeper *real_tk = &tkd->timekeeper; 2291 unsigned int clock_set = 0; 2292 int shift = 0, maxshift; 2293 u64 offset, orig_offset; 2294 2295 /* Make sure we're fully resumed: */ 2296 if (unlikely(timekeeping_suspended)) 2297 return false; 2298 2299 offset = clocksource_delta(tk_clock_read(&tk->tkr_mono), 2300 tk->tkr_mono.cycle_last, tk->tkr_mono.mask, 2301 tk->tkr_mono.clock->max_raw_delta); 2302 orig_offset = offset; 2303 /* Check if there's really nothing to do */ 2304 if (offset < real_tk->cycle_interval && mode == TK_ADV_TICK) 2305 return false; 2306 2307 /* 2308 * With NO_HZ we may have to accumulate many cycle_intervals 2309 * (think "ticks") worth of time at once. To do this efficiently, 2310 * we calculate the largest doubling multiple of cycle_intervals 2311 * that is smaller than the offset. We then accumulate that 2312 * chunk in one go, and then try to consume the next smaller 2313 * doubled multiple. 2314 */ 2315 shift = ilog2(offset) - ilog2(tk->cycle_interval); 2316 shift = max(0, shift); 2317 /* Bound shift to one less than what overflows tick_length */ 2318 maxshift = (64 - (ilog2(ntp_tick_length(tk->id)) + 1)) - 1; 2319 shift = min(shift, maxshift); 2320 while (offset >= tk->cycle_interval) { 2321 offset = logarithmic_accumulation(tk, offset, shift, &clock_set); 2322 if (offset < tk->cycle_interval<<shift) 2323 shift--; 2324 } 2325 2326 /* Adjust the multiplier to correct NTP error */ 2327 timekeeping_adjust(tk, offset); 2328 2329 /* 2330 * Finally, make sure that after the rounding 2331 * xtime_nsec isn't larger than NSEC_PER_SEC 2332 */ 2333 clock_set |= accumulate_nsecs_to_secs(tk); 2334 2335 /* 2336 * To avoid inconsistencies caused adjtimex TK_ADV_FREQ calls 2337 * making small negative adjustments to the base xtime_nsec 2338 * value, only update the coarse clocks if we accumulated time 2339 */ 2340 if (orig_offset != offset) 2341 tk_update_coarse_nsecs(tk); 2342 2343 timekeeping_update_from_shadow(tkd, clock_set); 2344 2345 return !!clock_set; 2346 } 2347 2348 static bool timekeeping_advance(enum timekeeping_adv_mode mode) 2349 { 2350 guard(raw_spinlock_irqsave)(&tk_core.lock); 2351 return __timekeeping_advance(&tk_core, mode); 2352 } 2353 2354 /** 2355 * update_wall_time - Uses the current clocksource to increment the wall time 2356 * 2357 * It also updates the enabled auxiliary clock timekeepers 2358 */ 2359 void update_wall_time(void) 2360 { 2361 if (timekeeping_advance(TK_ADV_TICK)) 2362 clock_was_set_delayed(); 2363 tk_aux_advance(); 2364 } 2365 2366 /** 2367 * getboottime64 - Return the real time of system boot. 2368 * @ts: pointer to the timespec64 to be set 2369 * 2370 * Returns the wall-time of boot in a timespec64. 2371 * 2372 * This is based on the wall_to_monotonic offset and the total suspend 2373 * time. Calls to settimeofday will affect the value returned (which 2374 * basically means that however wrong your real time clock is at boot time, 2375 * you get the right time here). 2376 */ 2377 void getboottime64(struct timespec64 *ts) 2378 { 2379 struct timekeeper *tk = &tk_core.timekeeper; 2380 ktime_t t = ktime_sub(tk->offs_real, tk->offs_boot); 2381 2382 *ts = ktime_to_timespec64(t); 2383 } 2384 EXPORT_SYMBOL_GPL(getboottime64); 2385 2386 void ktime_get_coarse_real_ts64(struct timespec64 *ts) 2387 { 2388 struct timekeeper *tk = &tk_core.timekeeper; 2389 unsigned int seq; 2390 2391 do { 2392 seq = read_seqcount_begin(&tk_core.seq); 2393 2394 *ts = tk_xtime_coarse(tk); 2395 } while (read_seqcount_retry(&tk_core.seq, seq)); 2396 } 2397 EXPORT_SYMBOL(ktime_get_coarse_real_ts64); 2398 2399 /** 2400 * ktime_get_coarse_real_ts64_mg - return latter of coarse grained time or floor 2401 * @ts: timespec64 to be filled 2402 * 2403 * Fetch the global mg_floor value, convert it to realtime and compare it 2404 * to the current coarse-grained time. Fill @ts with whichever is 2405 * latest. Note that this is a filesystem-specific interface and should be 2406 * avoided outside of that context. 2407 */ 2408 void ktime_get_coarse_real_ts64_mg(struct timespec64 *ts) 2409 { 2410 struct timekeeper *tk = &tk_core.timekeeper; 2411 u64 floor = atomic64_read(&mg_floor); 2412 ktime_t f_real, offset, coarse; 2413 unsigned int seq; 2414 2415 do { 2416 seq = read_seqcount_begin(&tk_core.seq); 2417 *ts = tk_xtime_coarse(tk); 2418 offset = tk_core.timekeeper.offs_real; 2419 } while (read_seqcount_retry(&tk_core.seq, seq)); 2420 2421 coarse = timespec64_to_ktime(*ts); 2422 f_real = ktime_add(floor, offset); 2423 if (ktime_after(f_real, coarse)) 2424 *ts = ktime_to_timespec64(f_real); 2425 } 2426 2427 /** 2428 * ktime_get_real_ts64_mg - attempt to update floor value and return result 2429 * @ts: pointer to the timespec to be set 2430 * 2431 * Get a monotonic fine-grained time value and attempt to swap it into 2432 * mg_floor. If that succeeds then accept the new floor value. If it fails 2433 * then another task raced in during the interim time and updated the 2434 * floor. Since any update to the floor must be later than the previous 2435 * floor, either outcome is acceptable. 2436 * 2437 * Typically this will be called after calling ktime_get_coarse_real_ts64_mg(), 2438 * and determining that the resulting coarse-grained timestamp did not effect 2439 * a change in ctime. Any more recent floor value would effect a change to 2440 * ctime, so there is no need to retry the atomic64_try_cmpxchg() on failure. 2441 * 2442 * @ts will be filled with the latest floor value, regardless of the outcome of 2443 * the cmpxchg. Note that this is a filesystem specific interface and should be 2444 * avoided outside of that context. 2445 */ 2446 void ktime_get_real_ts64_mg(struct timespec64 *ts) 2447 { 2448 struct timekeeper *tk = &tk_core.timekeeper; 2449 ktime_t old = atomic64_read(&mg_floor); 2450 ktime_t offset, mono; 2451 unsigned int seq; 2452 u64 nsecs; 2453 2454 do { 2455 seq = read_seqcount_begin(&tk_core.seq); 2456 2457 ts->tv_sec = tk->xtime_sec; 2458 mono = tk->tkr_mono.base; 2459 nsecs = timekeeping_get_ns(&tk->tkr_mono); 2460 offset = tk_core.timekeeper.offs_real; 2461 } while (read_seqcount_retry(&tk_core.seq, seq)); 2462 2463 mono = ktime_add_ns(mono, nsecs); 2464 2465 /* 2466 * Attempt to update the floor with the new time value. As any 2467 * update must be later then the existing floor, and would effect 2468 * a change to ctime from the perspective of the current task, 2469 * accept the resulting floor value regardless of the outcome of 2470 * the swap. 2471 */ 2472 if (atomic64_try_cmpxchg(&mg_floor, &old, mono)) { 2473 ts->tv_nsec = 0; 2474 timespec64_add_ns(ts, nsecs); 2475 timekeeping_inc_mg_floor_swaps(); 2476 } else { 2477 /* 2478 * Another task changed mg_floor since "old" was fetched. 2479 * "old" has been updated with the latest value of "mg_floor". 2480 * That value is newer than the previous floor value, which 2481 * is enough to effect a change to ctime. Accept it. 2482 */ 2483 *ts = ktime_to_timespec64(ktime_add(old, offset)); 2484 } 2485 } 2486 2487 void ktime_get_coarse_ts64(struct timespec64 *ts) 2488 { 2489 struct timekeeper *tk = &tk_core.timekeeper; 2490 struct timespec64 now, mono; 2491 unsigned int seq; 2492 2493 do { 2494 seq = read_seqcount_begin(&tk_core.seq); 2495 2496 now = tk_xtime_coarse(tk); 2497 mono = tk->wall_to_monotonic; 2498 } while (read_seqcount_retry(&tk_core.seq, seq)); 2499 2500 set_normalized_timespec64(ts, now.tv_sec + mono.tv_sec, 2501 now.tv_nsec + mono.tv_nsec); 2502 } 2503 EXPORT_SYMBOL(ktime_get_coarse_ts64); 2504 2505 /* 2506 * Must hold jiffies_lock 2507 */ 2508 void do_timer(unsigned long ticks) 2509 { 2510 jiffies_64 += ticks; 2511 calc_global_load(); 2512 } 2513 2514 /** 2515 * ktime_get_update_offsets_now - hrtimer helper 2516 * @cwsseq: pointer to check and store the clock was set sequence number 2517 * @offs_real: pointer to storage for monotonic -> realtime offset 2518 * @offs_boot: pointer to storage for monotonic -> boottime offset 2519 * @offs_tai: pointer to storage for monotonic -> clock tai offset 2520 * 2521 * Returns current monotonic time and updates the offsets if the 2522 * sequence number in @cwsseq and timekeeper.clock_was_set_seq are 2523 * different. 2524 * 2525 * Called from hrtimer_interrupt() or retrigger_next_event() 2526 */ 2527 ktime_t ktime_get_update_offsets_now(unsigned int *cwsseq, ktime_t *offs_real, 2528 ktime_t *offs_boot, ktime_t *offs_tai) 2529 { 2530 struct timekeeper *tk = &tk_core.timekeeper; 2531 unsigned int seq; 2532 ktime_t base; 2533 u64 nsecs; 2534 2535 do { 2536 seq = read_seqcount_begin(&tk_core.seq); 2537 2538 base = tk->tkr_mono.base; 2539 nsecs = timekeeping_get_ns(&tk->tkr_mono); 2540 base = ktime_add_ns(base, nsecs); 2541 2542 if (*cwsseq != tk->clock_was_set_seq) { 2543 *cwsseq = tk->clock_was_set_seq; 2544 *offs_real = tk->offs_real; 2545 *offs_boot = tk->offs_boot; 2546 *offs_tai = tk->offs_tai; 2547 } 2548 2549 /* Handle leapsecond insertion adjustments */ 2550 if (unlikely(base >= tk->next_leap_ktime)) 2551 *offs_real = ktime_sub(tk->offs_real, ktime_set(1, 0)); 2552 2553 } while (read_seqcount_retry(&tk_core.seq, seq)); 2554 2555 return base; 2556 } 2557 2558 /* 2559 * timekeeping_validate_timex - Ensures the timex is ok for use in do_adjtimex 2560 */ 2561 static int timekeeping_validate_timex(const struct __kernel_timex *txc, bool aux_clock) 2562 { 2563 if (txc->modes & ADJ_ADJTIME) { 2564 /* singleshot must not be used with any other mode bits */ 2565 if (!(txc->modes & ADJ_OFFSET_SINGLESHOT)) 2566 return -EINVAL; 2567 if (!(txc->modes & ADJ_OFFSET_READONLY) && 2568 !capable(CAP_SYS_TIME)) 2569 return -EPERM; 2570 } else { 2571 /* In order to modify anything, you gotta be super-user! */ 2572 if (txc->modes && !capable(CAP_SYS_TIME)) 2573 return -EPERM; 2574 /* 2575 * if the quartz is off by more than 10% then 2576 * something is VERY wrong! 2577 */ 2578 if (txc->modes & ADJ_TICK && 2579 (txc->tick < 900000/USER_HZ || 2580 txc->tick > 1100000/USER_HZ)) 2581 return -EINVAL; 2582 } 2583 2584 if (txc->modes & ADJ_SETOFFSET) { 2585 /* In order to inject time, you gotta be super-user! */ 2586 if (!capable(CAP_SYS_TIME)) 2587 return -EPERM; 2588 2589 /* 2590 * Validate if a timespec/timeval used to inject a time 2591 * offset is valid. Offsets can be positive or negative, so 2592 * we don't check tv_sec. The value of the timeval/timespec 2593 * is the sum of its fields,but *NOTE*: 2594 * The field tv_usec/tv_nsec must always be non-negative and 2595 * we can't have more nanoseconds/microseconds than a second. 2596 */ 2597 if (txc->time.tv_usec < 0) 2598 return -EINVAL; 2599 2600 if (txc->modes & ADJ_NANO) { 2601 if (txc->time.tv_usec >= NSEC_PER_SEC) 2602 return -EINVAL; 2603 } else { 2604 if (txc->time.tv_usec >= USEC_PER_SEC) 2605 return -EINVAL; 2606 } 2607 } 2608 2609 /* 2610 * Check for potential multiplication overflows that can 2611 * only happen on 64-bit systems: 2612 */ 2613 if ((txc->modes & ADJ_FREQUENCY) && (BITS_PER_LONG == 64)) { 2614 if (LLONG_MIN / PPM_SCALE > txc->freq) 2615 return -EINVAL; 2616 if (LLONG_MAX / PPM_SCALE < txc->freq) 2617 return -EINVAL; 2618 } 2619 2620 if (aux_clock) { 2621 /* Auxiliary clocks are similar to TAI and do not have leap seconds */ 2622 if (txc->status & (STA_INS | STA_DEL)) 2623 return -EINVAL; 2624 2625 /* No TAI offset setting */ 2626 if (txc->modes & ADJ_TAI) 2627 return -EINVAL; 2628 2629 /* No PPS support either */ 2630 if (txc->status & (STA_PPSFREQ | STA_PPSTIME)) 2631 return -EINVAL; 2632 } 2633 2634 return 0; 2635 } 2636 2637 /** 2638 * random_get_entropy_fallback - Returns the raw clock source value, 2639 * used by random.c for platforms with no valid random_get_entropy(). 2640 */ 2641 unsigned long random_get_entropy_fallback(void) 2642 { 2643 struct tk_read_base *tkr = &tk_core.timekeeper.tkr_mono; 2644 struct clocksource *clock = READ_ONCE(tkr->clock); 2645 2646 if (unlikely(timekeeping_suspended || !clock)) 2647 return 0; 2648 return clock->read(clock); 2649 } 2650 EXPORT_SYMBOL_GPL(random_get_entropy_fallback); 2651 2652 struct adjtimex_result { 2653 struct audit_ntp_data ad; 2654 struct timespec64 delta; 2655 bool clock_set; 2656 }; 2657 2658 static int __do_adjtimex(struct tk_data *tkd, struct __kernel_timex *txc, 2659 struct adjtimex_result *result) 2660 { 2661 struct timekeeper *tks = &tkd->shadow_timekeeper; 2662 bool aux_clock = !timekeeper_is_core_tk(tks); 2663 struct timespec64 ts; 2664 s32 orig_tai, tai; 2665 int ret; 2666 2667 /* Validate the data before disabling interrupts */ 2668 ret = timekeeping_validate_timex(txc, aux_clock); 2669 if (ret) 2670 return ret; 2671 add_device_randomness(txc, sizeof(*txc)); 2672 2673 if (!aux_clock) 2674 ktime_get_real_ts64(&ts); 2675 else 2676 tk_get_aux_ts64(tkd->timekeeper.id, &ts); 2677 2678 add_device_randomness(&ts, sizeof(ts)); 2679 2680 guard(raw_spinlock_irqsave)(&tkd->lock); 2681 2682 if (!tks->clock_valid) 2683 return -ENODEV; 2684 2685 if (txc->modes & ADJ_SETOFFSET) { 2686 result->delta.tv_sec = txc->time.tv_sec; 2687 result->delta.tv_nsec = txc->time.tv_usec; 2688 if (!(txc->modes & ADJ_NANO)) 2689 result->delta.tv_nsec *= 1000; 2690 ret = __timekeeping_inject_offset(tkd, &result->delta); 2691 if (ret) 2692 return ret; 2693 result->clock_set = true; 2694 } 2695 2696 orig_tai = tai = tks->tai_offset; 2697 ret = ntp_adjtimex(tks->id, txc, &ts, &tai, &result->ad); 2698 2699 if (tai != orig_tai) { 2700 __timekeeping_set_tai_offset(tks, tai); 2701 timekeeping_update_from_shadow(tkd, TK_CLOCK_WAS_SET); 2702 result->clock_set = true; 2703 } else { 2704 tk_update_leap_state_all(&tk_core); 2705 } 2706 2707 /* Update the multiplier immediately if frequency was set directly */ 2708 if (txc->modes & (ADJ_FREQUENCY | ADJ_TICK)) 2709 result->clock_set |= __timekeeping_advance(tkd, TK_ADV_FREQ); 2710 2711 return ret; 2712 } 2713 2714 /** 2715 * do_adjtimex() - Accessor function to NTP __do_adjtimex function 2716 * @txc: Pointer to kernel_timex structure containing NTP parameters 2717 */ 2718 int do_adjtimex(struct __kernel_timex *txc) 2719 { 2720 struct adjtimex_result result = { }; 2721 int ret; 2722 2723 ret = __do_adjtimex(&tk_core, txc, &result); 2724 if (ret < 0) 2725 return ret; 2726 2727 if (txc->modes & ADJ_SETOFFSET) 2728 audit_tk_injoffset(result.delta); 2729 2730 audit_ntp_log(&result.ad); 2731 2732 if (result.clock_set) 2733 clock_was_set(CLOCK_SET_WALL); 2734 2735 ntp_notify_cmos_timer(result.delta.tv_sec != 0); 2736 2737 return ret; 2738 } 2739 2740 /* 2741 * Invoked from NTP with the time keeper lock held, so lockless access is 2742 * fine. 2743 */ 2744 long ktime_get_ntp_seconds(unsigned int id) 2745 { 2746 return timekeeper_data[id].timekeeper.xtime_sec; 2747 } 2748 2749 #ifdef CONFIG_NTP_PPS 2750 /** 2751 * hardpps() - Accessor function to NTP __hardpps function 2752 * @phase_ts: Pointer to timespec64 structure representing phase timestamp 2753 * @raw_ts: Pointer to timespec64 structure representing raw timestamp 2754 */ 2755 void hardpps(const struct timespec64 *phase_ts, const struct timespec64 *raw_ts) 2756 { 2757 guard(raw_spinlock_irqsave)(&tk_core.lock); 2758 __hardpps(phase_ts, raw_ts); 2759 } 2760 EXPORT_SYMBOL(hardpps); 2761 #endif /* CONFIG_NTP_PPS */ 2762 2763 #ifdef CONFIG_POSIX_AUX_CLOCKS 2764 #include "posix-timers.h" 2765 2766 /* 2767 * Bitmap for the activated auxiliary timekeepers to allow lockless quick 2768 * checks in the hot paths without touching extra cache lines. If set, then 2769 * the state of the corresponding timekeeper has to be re-checked under 2770 * timekeeper::lock. 2771 */ 2772 static unsigned long aux_timekeepers; 2773 2774 static inline unsigned int clockid_to_tkid(unsigned int id) 2775 { 2776 return TIMEKEEPER_AUX_FIRST + id - CLOCK_AUX; 2777 } 2778 2779 static inline struct tk_data *aux_get_tk_data(clockid_t id) 2780 { 2781 if (!clockid_aux_valid(id)) 2782 return NULL; 2783 return &timekeeper_data[clockid_to_tkid(id)]; 2784 } 2785 2786 /* Invoked from timekeeping after a clocksource change */ 2787 static void tk_aux_update_clocksource(void) 2788 { 2789 unsigned long active = READ_ONCE(aux_timekeepers); 2790 unsigned int id; 2791 2792 for_each_set_bit(id, &active, BITS_PER_LONG) { 2793 struct tk_data *tkd = &timekeeper_data[id + TIMEKEEPER_AUX_FIRST]; 2794 struct timekeeper *tks = &tkd->shadow_timekeeper; 2795 2796 guard(raw_spinlock_irqsave)(&tkd->lock); 2797 if (!tks->clock_valid) 2798 continue; 2799 2800 timekeeping_forward_now(tks); 2801 tk_setup_internals(tks, tk_core.timekeeper.tkr_mono.clock); 2802 timekeeping_update_from_shadow(tkd, TK_UPDATE_ALL); 2803 } 2804 } 2805 2806 static void tk_aux_advance(void) 2807 { 2808 unsigned long active = READ_ONCE(aux_timekeepers); 2809 unsigned int id; 2810 2811 /* Lockless quick check to avoid extra cache lines */ 2812 for_each_set_bit(id, &active, BITS_PER_LONG) { 2813 struct tk_data *aux_tkd = &timekeeper_data[id + TIMEKEEPER_AUX_FIRST]; 2814 2815 guard(raw_spinlock)(&aux_tkd->lock); 2816 if (aux_tkd->shadow_timekeeper.clock_valid) 2817 __timekeeping_advance(aux_tkd, TK_ADV_TICK); 2818 } 2819 } 2820 2821 /** 2822 * ktime_get_aux - Get time for a AUX clock 2823 * @id: ID of the clock to read (CLOCK_AUX...) 2824 * @kt: Pointer to ktime_t to store the time stamp 2825 * 2826 * Returns: True if the timestamp is valid, false otherwise 2827 */ 2828 bool ktime_get_aux(clockid_t id, ktime_t *kt) 2829 { 2830 struct tk_data *aux_tkd = aux_get_tk_data(id); 2831 struct timekeeper *aux_tk; 2832 unsigned int seq; 2833 ktime_t base; 2834 u64 nsecs; 2835 2836 WARN_ON(timekeeping_suspended); 2837 2838 if (!aux_tkd) 2839 return false; 2840 2841 aux_tk = &aux_tkd->timekeeper; 2842 do { 2843 seq = read_seqcount_begin(&aux_tkd->seq); 2844 if (!aux_tk->clock_valid) 2845 return false; 2846 2847 base = ktime_add(aux_tk->tkr_mono.base, aux_tk->offs_aux); 2848 nsecs = timekeeping_get_ns(&aux_tk->tkr_mono); 2849 } while (read_seqcount_retry(&aux_tkd->seq, seq)); 2850 2851 *kt = ktime_add_ns(base, nsecs); 2852 return true; 2853 } 2854 EXPORT_SYMBOL_GPL(ktime_get_aux); 2855 2856 /** 2857 * ktime_get_aux_ts64 - Get time for a AUX clock 2858 * @id: ID of the clock to read (CLOCK_AUX...) 2859 * @ts: Pointer to timespec64 to store the time stamp 2860 * 2861 * Returns: True if the timestamp is valid, false otherwise 2862 */ 2863 bool ktime_get_aux_ts64(clockid_t id, struct timespec64 *ts) 2864 { 2865 ktime_t now; 2866 2867 if (!ktime_get_aux(id, &now)) 2868 return false; 2869 *ts = ktime_to_timespec64(now); 2870 return true; 2871 } 2872 EXPORT_SYMBOL_GPL(ktime_get_aux_ts64); 2873 2874 static int aux_get_res(clockid_t id, struct timespec64 *tp) 2875 { 2876 if (!clockid_aux_valid(id)) 2877 return -ENODEV; 2878 2879 tp->tv_sec = 0; 2880 tp->tv_nsec = 1; 2881 return 0; 2882 } 2883 2884 static int aux_get_timespec(clockid_t id, struct timespec64 *tp) 2885 { 2886 return ktime_get_aux_ts64(id, tp) ? 0 : -ENODEV; 2887 } 2888 2889 static int aux_clock_set(const clockid_t id, const struct timespec64 *tnew) 2890 { 2891 struct tk_data *aux_tkd = aux_get_tk_data(id); 2892 struct timekeeper *aux_tks; 2893 ktime_t tnow, nsecs; 2894 2895 if (!timespec64_valid_settod(tnew)) 2896 return -EINVAL; 2897 if (!aux_tkd) 2898 return -ENODEV; 2899 2900 aux_tks = &aux_tkd->shadow_timekeeper; 2901 2902 guard(raw_spinlock_irq)(&aux_tkd->lock); 2903 if (!aux_tks->clock_valid) 2904 return -ENODEV; 2905 2906 /* Forward the timekeeper base time */ 2907 timekeeping_forward_now(aux_tks); 2908 /* 2909 * Get the updated base time. tkr_mono.base has not been 2910 * updated yet, so do that first. That makes the update 2911 * in timekeeping_update_from_shadow() redundant, but 2912 * that's harmless. After that @tnow can be calculated 2913 * by using tkr_mono::cycle_last, which has been set 2914 * by timekeeping_forward_now(). 2915 */ 2916 tk_update_ktime_data(aux_tks); 2917 nsecs = timekeeping_cycles_to_ns(&aux_tks->tkr_mono, aux_tks->tkr_mono.cycle_last); 2918 tnow = ktime_add(aux_tks->tkr_mono.base, nsecs); 2919 2920 /* 2921 * Calculate the new AUX offset as delta to @tnow ("monotonic"). 2922 * That avoids all the tk::xtime back and forth conversions as 2923 * xtime ("realtime") is not applicable for auxiliary clocks and 2924 * kept in sync with "monotonic". 2925 */ 2926 aux_tks->offs_aux = ktime_sub(timespec64_to_ktime(*tnew), tnow); 2927 2928 timekeeping_update_from_shadow(aux_tkd, TK_UPDATE_ALL); 2929 return 0; 2930 } 2931 2932 static int aux_clock_adj(const clockid_t id, struct __kernel_timex *txc) 2933 { 2934 struct tk_data *aux_tkd = aux_get_tk_data(id); 2935 struct adjtimex_result result = { }; 2936 2937 if (!aux_tkd) 2938 return -ENODEV; 2939 2940 /* 2941 * @result is ignored for now as there are neither hrtimers nor a 2942 * RTC related to auxiliary clocks for now. 2943 */ 2944 return __do_adjtimex(aux_tkd, txc, &result); 2945 } 2946 2947 const struct k_clock clock_aux = { 2948 .clock_getres = aux_get_res, 2949 .clock_get_timespec = aux_get_timespec, 2950 .clock_set = aux_clock_set, 2951 .clock_adj = aux_clock_adj, 2952 }; 2953 2954 static void aux_clock_enable(clockid_t id) 2955 { 2956 struct tk_read_base *tkr_raw = &tk_core.timekeeper.tkr_raw; 2957 struct tk_data *aux_tkd = aux_get_tk_data(id); 2958 struct timekeeper *aux_tks = &aux_tkd->shadow_timekeeper; 2959 2960 /* Prevent the core timekeeper from changing. */ 2961 guard(raw_spinlock_irq)(&tk_core.lock); 2962 2963 /* 2964 * Setup the auxiliary clock assuming that the raw core timekeeper 2965 * clock frequency conversion is close enough. Userspace has to 2966 * adjust for the deviation via clock_adjtime(2). 2967 */ 2968 guard(raw_spinlock_nested)(&aux_tkd->lock); 2969 2970 /* Remove leftovers of a previous registration */ 2971 memset(aux_tks, 0, sizeof(*aux_tks)); 2972 /* Restore the timekeeper id */ 2973 aux_tks->id = aux_tkd->timekeeper.id; 2974 /* Setup the timekeeper based on the current system clocksource */ 2975 tk_setup_internals(aux_tks, tkr_raw->clock); 2976 2977 /* Mark it valid and set it live */ 2978 aux_tks->clock_valid = true; 2979 timekeeping_update_from_shadow(aux_tkd, TK_UPDATE_ALL); 2980 } 2981 2982 static void aux_clock_disable(clockid_t id) 2983 { 2984 struct tk_data *aux_tkd = aux_get_tk_data(id); 2985 2986 guard(raw_spinlock_irq)(&aux_tkd->lock); 2987 aux_tkd->shadow_timekeeper.clock_valid = false; 2988 timekeeping_update_from_shadow(aux_tkd, TK_UPDATE_ALL); 2989 } 2990 2991 static DEFINE_MUTEX(aux_clock_mutex); 2992 2993 static ssize_t aux_clock_enable_store(struct kobject *kobj, struct kobj_attribute *attr, 2994 const char *buf, size_t count) 2995 { 2996 /* Lazy atoi() as name is "0..7" */ 2997 int id = kobj->name[0] & 0x7; 2998 bool enable; 2999 3000 if (!capable(CAP_SYS_TIME)) 3001 return -EPERM; 3002 3003 if (kstrtobool(buf, &enable) < 0) 3004 return -EINVAL; 3005 3006 guard(mutex)(&aux_clock_mutex); 3007 if (enable == test_bit(id, &aux_timekeepers)) 3008 return count; 3009 3010 if (enable) { 3011 aux_clock_enable(CLOCK_AUX + id); 3012 set_bit(id, &aux_timekeepers); 3013 } else { 3014 aux_clock_disable(CLOCK_AUX + id); 3015 clear_bit(id, &aux_timekeepers); 3016 } 3017 return count; 3018 } 3019 3020 static ssize_t aux_clock_enable_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) 3021 { 3022 unsigned long active = READ_ONCE(aux_timekeepers); 3023 /* Lazy atoi() as name is "0..7" */ 3024 int id = kobj->name[0] & 0x7; 3025 3026 return sysfs_emit(buf, "%d\n", test_bit(id, &active)); 3027 } 3028 3029 static struct kobj_attribute aux_clock_enable_attr = __ATTR_RW(aux_clock_enable); 3030 3031 static struct attribute *aux_clock_enable_attrs[] = { 3032 &aux_clock_enable_attr.attr, 3033 NULL 3034 }; 3035 3036 static const struct attribute_group aux_clock_enable_attr_group = { 3037 .attrs = aux_clock_enable_attrs, 3038 }; 3039 3040 static int __init tk_aux_sysfs_init(void) 3041 { 3042 struct kobject *auxo, *tko = kobject_create_and_add("time", kernel_kobj); 3043 3044 if (!tko) 3045 return -ENOMEM; 3046 3047 auxo = kobject_create_and_add("aux_clocks", tko); 3048 if (!auxo) { 3049 kobject_put(tko); 3050 return -ENOMEM; 3051 } 3052 3053 for (int i = 0; i <= MAX_AUX_CLOCKS; i++) { 3054 char id[2] = { [0] = '0' + i, }; 3055 struct kobject *clk = kobject_create_and_add(id, auxo); 3056 3057 if (!clk) 3058 return -ENOMEM; 3059 3060 int ret = sysfs_create_group(clk, &aux_clock_enable_attr_group); 3061 3062 if (ret) 3063 return ret; 3064 } 3065 return 0; 3066 } 3067 late_initcall(tk_aux_sysfs_init); 3068 3069 static __init void tk_aux_setup(void) 3070 { 3071 for (int i = TIMEKEEPER_AUX_FIRST; i <= TIMEKEEPER_AUX_LAST; i++) 3072 tkd_basic_setup(&timekeeper_data[i], i, false); 3073 } 3074 #endif /* CONFIG_POSIX_AUX_CLOCKS */ 3075