1 /*- 2 * SPDX-License-Identifier: Beerware 3 * 4 * ---------------------------------------------------------------------------- 5 * "THE BEER-WARE LICENSE" (Revision 42): 6 * <phk@FreeBSD.ORG> wrote this file. As long as you retain this notice you 7 * can do whatever you want with this stuff. If we meet some day, and you think 8 * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp 9 * ---------------------------------------------------------------------------- 10 * 11 * Copyright (c) 2011, 2015, 2016 The FreeBSD Foundation 12 * All rights reserved. 13 * 14 * Portions of this software were developed by Julien Ridoux at the University 15 * of Melbourne under sponsorship from the FreeBSD Foundation. 16 * 17 * Portions of this software were developed by Konstantin Belousov 18 * under sponsorship from the FreeBSD Foundation. 19 */ 20 21 #include <sys/cdefs.h> 22 __FBSDID("$FreeBSD$"); 23 24 #include "opt_ntp.h" 25 #include "opt_ffclock.h" 26 27 #include <sys/param.h> 28 #include <sys/kernel.h> 29 #include <sys/limits.h> 30 #include <sys/lock.h> 31 #include <sys/mutex.h> 32 #include <sys/proc.h> 33 #include <sys/sbuf.h> 34 #include <sys/sleepqueue.h> 35 #include <sys/sysctl.h> 36 #include <sys/syslog.h> 37 #include <sys/systm.h> 38 #include <sys/timeffc.h> 39 #include <sys/timepps.h> 40 #include <sys/timetc.h> 41 #include <sys/timex.h> 42 #include <sys/vdso.h> 43 44 /* 45 * A large step happens on boot. This constant detects such steps. 46 * It is relatively small so that ntp_update_second gets called enough 47 * in the typical 'missed a couple of seconds' case, but doesn't loop 48 * forever when the time step is large. 49 */ 50 #define LARGE_STEP 200 51 52 /* 53 * Implement a dummy timecounter which we can use until we get a real one 54 * in the air. This allows the console and other early stuff to use 55 * time services. 56 */ 57 58 static u_int 59 dummy_get_timecount(struct timecounter *tc) 60 { 61 static u_int now; 62 63 return (++now); 64 } 65 66 static struct timecounter dummy_timecounter = { 67 dummy_get_timecount, 0, ~0u, 1000000, "dummy", -1000000 68 }; 69 70 struct timehands { 71 /* These fields must be initialized by the driver. */ 72 struct timecounter *th_counter; 73 int64_t th_adjustment; 74 uint64_t th_scale; 75 u_int th_large_delta; 76 u_int th_offset_count; 77 struct bintime th_offset; 78 struct bintime th_bintime; 79 struct timeval th_microtime; 80 struct timespec th_nanotime; 81 struct bintime th_boottime; 82 /* Fields not to be copied in tc_windup start with th_generation. */ 83 u_int th_generation; 84 struct timehands *th_next; 85 }; 86 87 static struct timehands ths[16] = { 88 [0] = { 89 .th_counter = &dummy_timecounter, 90 .th_scale = (uint64_t)-1 / 1000000, 91 .th_large_delta = 1000000, 92 .th_offset = { .sec = 1 }, 93 .th_generation = 1, 94 }, 95 }; 96 97 static struct timehands *volatile timehands = &ths[0]; 98 struct timecounter *timecounter = &dummy_timecounter; 99 static struct timecounter *timecounters = &dummy_timecounter; 100 101 int tc_min_ticktock_freq = 1; 102 103 volatile time_t time_second = 1; 104 volatile time_t time_uptime = 1; 105 106 static int sysctl_kern_boottime(SYSCTL_HANDLER_ARGS); 107 SYSCTL_PROC(_kern, KERN_BOOTTIME, boottime, 108 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, 109 sysctl_kern_boottime, "S,timeval", 110 "System boottime"); 111 112 SYSCTL_NODE(_kern, OID_AUTO, timecounter, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 113 ""); 114 static SYSCTL_NODE(_kern_timecounter, OID_AUTO, tc, 115 CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 116 ""); 117 118 static int timestepwarnings; 119 SYSCTL_INT(_kern_timecounter, OID_AUTO, stepwarnings, CTLFLAG_RW, 120 ×tepwarnings, 0, "Log time steps"); 121 122 static int timehands_count = 2; 123 SYSCTL_INT(_kern_timecounter, OID_AUTO, timehands_count, 124 CTLFLAG_RDTUN | CTLFLAG_NOFETCH, 125 &timehands_count, 0, "Count of timehands in rotation"); 126 127 struct bintime bt_timethreshold; 128 struct bintime bt_tickthreshold; 129 sbintime_t sbt_timethreshold; 130 sbintime_t sbt_tickthreshold; 131 struct bintime tc_tick_bt; 132 sbintime_t tc_tick_sbt; 133 int tc_precexp; 134 int tc_timepercentage = TC_DEFAULTPERC; 135 static int sysctl_kern_timecounter_adjprecision(SYSCTL_HANDLER_ARGS); 136 SYSCTL_PROC(_kern_timecounter, OID_AUTO, alloweddeviation, 137 CTLTYPE_INT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, 0, 0, 138 sysctl_kern_timecounter_adjprecision, "I", 139 "Allowed time interval deviation in percents"); 140 141 volatile int rtc_generation = 1; 142 143 static int tc_chosen; /* Non-zero if a specific tc was chosen via sysctl. */ 144 145 static void tc_windup(struct bintime *new_boottimebin); 146 static void cpu_tick_calibrate(int); 147 148 void dtrace_getnanotime(struct timespec *tsp); 149 void dtrace_getnanouptime(struct timespec *tsp); 150 151 static int 152 sysctl_kern_boottime(SYSCTL_HANDLER_ARGS) 153 { 154 struct timeval boottime; 155 156 getboottime(&boottime); 157 158 /* i386 is the only arch which uses a 32bits time_t */ 159 #ifdef __amd64__ 160 #ifdef SCTL_MASK32 161 int tv[2]; 162 163 if (req->flags & SCTL_MASK32) { 164 tv[0] = boottime.tv_sec; 165 tv[1] = boottime.tv_usec; 166 return (SYSCTL_OUT(req, tv, sizeof(tv))); 167 } 168 #endif 169 #endif 170 return (SYSCTL_OUT(req, &boottime, sizeof(boottime))); 171 } 172 173 static int 174 sysctl_kern_timecounter_get(SYSCTL_HANDLER_ARGS) 175 { 176 u_int ncount; 177 struct timecounter *tc = arg1; 178 179 ncount = tc->tc_get_timecount(tc); 180 return (sysctl_handle_int(oidp, &ncount, 0, req)); 181 } 182 183 static int 184 sysctl_kern_timecounter_freq(SYSCTL_HANDLER_ARGS) 185 { 186 uint64_t freq; 187 struct timecounter *tc = arg1; 188 189 freq = tc->tc_frequency; 190 return (sysctl_handle_64(oidp, &freq, 0, req)); 191 } 192 193 /* 194 * Return the difference between the timehands' counter value now and what 195 * was when we copied it to the timehands' offset_count. 196 */ 197 static __inline u_int 198 tc_delta(struct timehands *th) 199 { 200 struct timecounter *tc; 201 202 tc = th->th_counter; 203 return ((tc->tc_get_timecount(tc) - th->th_offset_count) & 204 tc->tc_counter_mask); 205 } 206 207 /* 208 * Functions for reading the time. We have to loop until we are sure that 209 * the timehands that we operated on was not updated under our feet. See 210 * the comment in <sys/time.h> for a description of these 12 functions. 211 */ 212 213 static __inline void 214 bintime_off(struct bintime *bt, u_int off) 215 { 216 struct timehands *th; 217 struct bintime *btp; 218 uint64_t scale, x; 219 u_int delta, gen, large_delta; 220 221 do { 222 th = timehands; 223 gen = atomic_load_acq_int(&th->th_generation); 224 btp = (struct bintime *)((vm_offset_t)th + off); 225 *bt = *btp; 226 scale = th->th_scale; 227 delta = tc_delta(th); 228 large_delta = th->th_large_delta; 229 atomic_thread_fence_acq(); 230 } while (gen == 0 || gen != th->th_generation); 231 232 if (__predict_false(delta >= large_delta)) { 233 /* Avoid overflow for scale * delta. */ 234 x = (scale >> 32) * delta; 235 bt->sec += x >> 32; 236 bintime_addx(bt, x << 32); 237 bintime_addx(bt, (scale & 0xffffffff) * delta); 238 } else { 239 bintime_addx(bt, scale * delta); 240 } 241 } 242 #define GETTHBINTIME(dst, member) \ 243 do { \ 244 _Static_assert(_Generic(((struct timehands *)NULL)->member, \ 245 struct bintime: 1, default: 0) == 1, \ 246 "struct timehands member is not of struct bintime type"); \ 247 bintime_off(dst, __offsetof(struct timehands, member)); \ 248 } while (0) 249 250 static __inline void 251 getthmember(void *out, size_t out_size, u_int off) 252 { 253 struct timehands *th; 254 u_int gen; 255 256 do { 257 th = timehands; 258 gen = atomic_load_acq_int(&th->th_generation); 259 memcpy(out, (char *)th + off, out_size); 260 atomic_thread_fence_acq(); 261 } while (gen == 0 || gen != th->th_generation); 262 } 263 #define GETTHMEMBER(dst, member) \ 264 do { \ 265 _Static_assert(_Generic(*dst, \ 266 __typeof(((struct timehands *)NULL)->member): 1, \ 267 default: 0) == 1, \ 268 "*dst and struct timehands member have different types"); \ 269 getthmember(dst, sizeof(*dst), __offsetof(struct timehands, \ 270 member)); \ 271 } while (0) 272 273 #ifdef FFCLOCK 274 void 275 fbclock_binuptime(struct bintime *bt) 276 { 277 278 GETTHBINTIME(bt, th_offset); 279 } 280 281 void 282 fbclock_nanouptime(struct timespec *tsp) 283 { 284 struct bintime bt; 285 286 fbclock_binuptime(&bt); 287 bintime2timespec(&bt, tsp); 288 } 289 290 void 291 fbclock_microuptime(struct timeval *tvp) 292 { 293 struct bintime bt; 294 295 fbclock_binuptime(&bt); 296 bintime2timeval(&bt, tvp); 297 } 298 299 void 300 fbclock_bintime(struct bintime *bt) 301 { 302 303 GETTHBINTIME(bt, th_bintime); 304 } 305 306 void 307 fbclock_nanotime(struct timespec *tsp) 308 { 309 struct bintime bt; 310 311 fbclock_bintime(&bt); 312 bintime2timespec(&bt, tsp); 313 } 314 315 void 316 fbclock_microtime(struct timeval *tvp) 317 { 318 struct bintime bt; 319 320 fbclock_bintime(&bt); 321 bintime2timeval(&bt, tvp); 322 } 323 324 void 325 fbclock_getbinuptime(struct bintime *bt) 326 { 327 328 GETTHMEMBER(bt, th_offset); 329 } 330 331 void 332 fbclock_getnanouptime(struct timespec *tsp) 333 { 334 struct bintime bt; 335 336 GETTHMEMBER(&bt, th_offset); 337 bintime2timespec(&bt, tsp); 338 } 339 340 void 341 fbclock_getmicrouptime(struct timeval *tvp) 342 { 343 struct bintime bt; 344 345 GETTHMEMBER(&bt, th_offset); 346 bintime2timeval(&bt, tvp); 347 } 348 349 void 350 fbclock_getbintime(struct bintime *bt) 351 { 352 353 GETTHMEMBER(bt, th_bintime); 354 } 355 356 void 357 fbclock_getnanotime(struct timespec *tsp) 358 { 359 360 GETTHMEMBER(tsp, th_nanotime); 361 } 362 363 void 364 fbclock_getmicrotime(struct timeval *tvp) 365 { 366 367 GETTHMEMBER(tvp, th_microtime); 368 } 369 #else /* !FFCLOCK */ 370 371 void 372 binuptime(struct bintime *bt) 373 { 374 375 GETTHBINTIME(bt, th_offset); 376 } 377 378 void 379 nanouptime(struct timespec *tsp) 380 { 381 struct bintime bt; 382 383 binuptime(&bt); 384 bintime2timespec(&bt, tsp); 385 } 386 387 void 388 microuptime(struct timeval *tvp) 389 { 390 struct bintime bt; 391 392 binuptime(&bt); 393 bintime2timeval(&bt, tvp); 394 } 395 396 void 397 bintime(struct bintime *bt) 398 { 399 400 GETTHBINTIME(bt, th_bintime); 401 } 402 403 void 404 nanotime(struct timespec *tsp) 405 { 406 struct bintime bt; 407 408 bintime(&bt); 409 bintime2timespec(&bt, tsp); 410 } 411 412 void 413 microtime(struct timeval *tvp) 414 { 415 struct bintime bt; 416 417 bintime(&bt); 418 bintime2timeval(&bt, tvp); 419 } 420 421 void 422 getbinuptime(struct bintime *bt) 423 { 424 425 GETTHMEMBER(bt, th_offset); 426 } 427 428 void 429 getnanouptime(struct timespec *tsp) 430 { 431 struct bintime bt; 432 433 GETTHMEMBER(&bt, th_offset); 434 bintime2timespec(&bt, tsp); 435 } 436 437 void 438 getmicrouptime(struct timeval *tvp) 439 { 440 struct bintime bt; 441 442 GETTHMEMBER(&bt, th_offset); 443 bintime2timeval(&bt, tvp); 444 } 445 446 void 447 getbintime(struct bintime *bt) 448 { 449 450 GETTHMEMBER(bt, th_bintime); 451 } 452 453 void 454 getnanotime(struct timespec *tsp) 455 { 456 457 GETTHMEMBER(tsp, th_nanotime); 458 } 459 460 void 461 getmicrotime(struct timeval *tvp) 462 { 463 464 GETTHMEMBER(tvp, th_microtime); 465 } 466 #endif /* FFCLOCK */ 467 468 void 469 getboottime(struct timeval *boottime) 470 { 471 struct bintime boottimebin; 472 473 getboottimebin(&boottimebin); 474 bintime2timeval(&boottimebin, boottime); 475 } 476 477 void 478 getboottimebin(struct bintime *boottimebin) 479 { 480 481 GETTHMEMBER(boottimebin, th_boottime); 482 } 483 484 #ifdef FFCLOCK 485 /* 486 * Support for feed-forward synchronization algorithms. This is heavily inspired 487 * by the timehands mechanism but kept independent from it. *_windup() functions 488 * have some connection to avoid accessing the timecounter hardware more than 489 * necessary. 490 */ 491 492 /* Feed-forward clock estimates kept updated by the synchronization daemon. */ 493 struct ffclock_estimate ffclock_estimate; 494 struct bintime ffclock_boottime; /* Feed-forward boot time estimate. */ 495 uint32_t ffclock_status; /* Feed-forward clock status. */ 496 int8_t ffclock_updated; /* New estimates are available. */ 497 struct mtx ffclock_mtx; /* Mutex on ffclock_estimate. */ 498 499 struct fftimehands { 500 struct ffclock_estimate cest; 501 struct bintime tick_time; 502 struct bintime tick_time_lerp; 503 ffcounter tick_ffcount; 504 uint64_t period_lerp; 505 volatile uint8_t gen; 506 struct fftimehands *next; 507 }; 508 509 #define NUM_ELEMENTS(x) (sizeof(x) / sizeof(*x)) 510 511 static struct fftimehands ffth[10]; 512 static struct fftimehands *volatile fftimehands = ffth; 513 514 static void 515 ffclock_init(void) 516 { 517 struct fftimehands *cur; 518 struct fftimehands *last; 519 520 memset(ffth, 0, sizeof(ffth)); 521 522 last = ffth + NUM_ELEMENTS(ffth) - 1; 523 for (cur = ffth; cur < last; cur++) 524 cur->next = cur + 1; 525 last->next = ffth; 526 527 ffclock_updated = 0; 528 ffclock_status = FFCLOCK_STA_UNSYNC; 529 mtx_init(&ffclock_mtx, "ffclock lock", NULL, MTX_DEF); 530 } 531 532 /* 533 * Reset the feed-forward clock estimates. Called from inittodr() to get things 534 * kick started and uses the timecounter nominal frequency as a first period 535 * estimate. Note: this function may be called several time just after boot. 536 * Note: this is the only function that sets the value of boot time for the 537 * monotonic (i.e. uptime) version of the feed-forward clock. 538 */ 539 void 540 ffclock_reset_clock(struct timespec *ts) 541 { 542 struct timecounter *tc; 543 struct ffclock_estimate cest; 544 545 tc = timehands->th_counter; 546 memset(&cest, 0, sizeof(struct ffclock_estimate)); 547 548 timespec2bintime(ts, &ffclock_boottime); 549 timespec2bintime(ts, &(cest.update_time)); 550 ffclock_read_counter(&cest.update_ffcount); 551 cest.leapsec_next = 0; 552 cest.period = ((1ULL << 63) / tc->tc_frequency) << 1; 553 cest.errb_abs = 0; 554 cest.errb_rate = 0; 555 cest.status = FFCLOCK_STA_UNSYNC; 556 cest.leapsec_total = 0; 557 cest.leapsec = 0; 558 559 mtx_lock(&ffclock_mtx); 560 bcopy(&cest, &ffclock_estimate, sizeof(struct ffclock_estimate)); 561 ffclock_updated = INT8_MAX; 562 mtx_unlock(&ffclock_mtx); 563 564 printf("ffclock reset: %s (%llu Hz), time = %ld.%09lu\n", tc->tc_name, 565 (unsigned long long)tc->tc_frequency, (long)ts->tv_sec, 566 (unsigned long)ts->tv_nsec); 567 } 568 569 /* 570 * Sub-routine to convert a time interval measured in RAW counter units to time 571 * in seconds stored in bintime format. 572 * NOTE: bintime_mul requires u_int, but the value of the ffcounter may be 573 * larger than the max value of u_int (on 32 bit architecture). Loop to consume 574 * extra cycles. 575 */ 576 static void 577 ffclock_convert_delta(ffcounter ffdelta, uint64_t period, struct bintime *bt) 578 { 579 struct bintime bt2; 580 ffcounter delta, delta_max; 581 582 delta_max = (1ULL << (8 * sizeof(unsigned int))) - 1; 583 bintime_clear(bt); 584 do { 585 if (ffdelta > delta_max) 586 delta = delta_max; 587 else 588 delta = ffdelta; 589 bt2.sec = 0; 590 bt2.frac = period; 591 bintime_mul(&bt2, (unsigned int)delta); 592 bintime_add(bt, &bt2); 593 ffdelta -= delta; 594 } while (ffdelta > 0); 595 } 596 597 /* 598 * Update the fftimehands. 599 * Push the tick ffcount and time(s) forward based on current clock estimate. 600 * The conversion from ffcounter to bintime relies on the difference clock 601 * principle, whose accuracy relies on computing small time intervals. If a new 602 * clock estimate has been passed by the synchronisation daemon, make it 603 * current, and compute the linear interpolation for monotonic time if needed. 604 */ 605 static void 606 ffclock_windup(unsigned int delta) 607 { 608 struct ffclock_estimate *cest; 609 struct fftimehands *ffth; 610 struct bintime bt, gap_lerp; 611 ffcounter ffdelta; 612 uint64_t frac; 613 unsigned int polling; 614 uint8_t forward_jump, ogen; 615 616 /* 617 * Pick the next timehand, copy current ffclock estimates and move tick 618 * times and counter forward. 619 */ 620 forward_jump = 0; 621 ffth = fftimehands->next; 622 ogen = ffth->gen; 623 ffth->gen = 0; 624 cest = &ffth->cest; 625 bcopy(&fftimehands->cest, cest, sizeof(struct ffclock_estimate)); 626 ffdelta = (ffcounter)delta; 627 ffth->period_lerp = fftimehands->period_lerp; 628 629 ffth->tick_time = fftimehands->tick_time; 630 ffclock_convert_delta(ffdelta, cest->period, &bt); 631 bintime_add(&ffth->tick_time, &bt); 632 633 ffth->tick_time_lerp = fftimehands->tick_time_lerp; 634 ffclock_convert_delta(ffdelta, ffth->period_lerp, &bt); 635 bintime_add(&ffth->tick_time_lerp, &bt); 636 637 ffth->tick_ffcount = fftimehands->tick_ffcount + ffdelta; 638 639 /* 640 * Assess the status of the clock, if the last update is too old, it is 641 * likely the synchronisation daemon is dead and the clock is free 642 * running. 643 */ 644 if (ffclock_updated == 0) { 645 ffdelta = ffth->tick_ffcount - cest->update_ffcount; 646 ffclock_convert_delta(ffdelta, cest->period, &bt); 647 if (bt.sec > 2 * FFCLOCK_SKM_SCALE) 648 ffclock_status |= FFCLOCK_STA_UNSYNC; 649 } 650 651 /* 652 * If available, grab updated clock estimates and make them current. 653 * Recompute time at this tick using the updated estimates. The clock 654 * estimates passed the feed-forward synchronisation daemon may result 655 * in time conversion that is not monotonically increasing (just after 656 * the update). time_lerp is a particular linear interpolation over the 657 * synchronisation algo polling period that ensures monotonicity for the 658 * clock ids requesting it. 659 */ 660 if (ffclock_updated > 0) { 661 bcopy(&ffclock_estimate, cest, sizeof(struct ffclock_estimate)); 662 ffdelta = ffth->tick_ffcount - cest->update_ffcount; 663 ffth->tick_time = cest->update_time; 664 ffclock_convert_delta(ffdelta, cest->period, &bt); 665 bintime_add(&ffth->tick_time, &bt); 666 667 /* ffclock_reset sets ffclock_updated to INT8_MAX */ 668 if (ffclock_updated == INT8_MAX) 669 ffth->tick_time_lerp = ffth->tick_time; 670 671 if (bintime_cmp(&ffth->tick_time, &ffth->tick_time_lerp, >)) 672 forward_jump = 1; 673 else 674 forward_jump = 0; 675 676 bintime_clear(&gap_lerp); 677 if (forward_jump) { 678 gap_lerp = ffth->tick_time; 679 bintime_sub(&gap_lerp, &ffth->tick_time_lerp); 680 } else { 681 gap_lerp = ffth->tick_time_lerp; 682 bintime_sub(&gap_lerp, &ffth->tick_time); 683 } 684 685 /* 686 * The reset from the RTC clock may be far from accurate, and 687 * reducing the gap between real time and interpolated time 688 * could take a very long time if the interpolated clock insists 689 * on strict monotonicity. The clock is reset under very strict 690 * conditions (kernel time is known to be wrong and 691 * synchronization daemon has been restarted recently. 692 * ffclock_boottime absorbs the jump to ensure boot time is 693 * correct and uptime functions stay consistent. 694 */ 695 if (((ffclock_status & FFCLOCK_STA_UNSYNC) == FFCLOCK_STA_UNSYNC) && 696 ((cest->status & FFCLOCK_STA_UNSYNC) == 0) && 697 ((cest->status & FFCLOCK_STA_WARMUP) == FFCLOCK_STA_WARMUP)) { 698 if (forward_jump) 699 bintime_add(&ffclock_boottime, &gap_lerp); 700 else 701 bintime_sub(&ffclock_boottime, &gap_lerp); 702 ffth->tick_time_lerp = ffth->tick_time; 703 bintime_clear(&gap_lerp); 704 } 705 706 ffclock_status = cest->status; 707 ffth->period_lerp = cest->period; 708 709 /* 710 * Compute corrected period used for the linear interpolation of 711 * time. The rate of linear interpolation is capped to 5000PPM 712 * (5ms/s). 713 */ 714 if (bintime_isset(&gap_lerp)) { 715 ffdelta = cest->update_ffcount; 716 ffdelta -= fftimehands->cest.update_ffcount; 717 ffclock_convert_delta(ffdelta, cest->period, &bt); 718 polling = bt.sec; 719 bt.sec = 0; 720 bt.frac = 5000000 * (uint64_t)18446744073LL; 721 bintime_mul(&bt, polling); 722 if (bintime_cmp(&gap_lerp, &bt, >)) 723 gap_lerp = bt; 724 725 /* Approximate 1 sec by 1-(1/2^64) to ease arithmetic */ 726 frac = 0; 727 if (gap_lerp.sec > 0) { 728 frac -= 1; 729 frac /= ffdelta / gap_lerp.sec; 730 } 731 frac += gap_lerp.frac / ffdelta; 732 733 if (forward_jump) 734 ffth->period_lerp += frac; 735 else 736 ffth->period_lerp -= frac; 737 } 738 739 ffclock_updated = 0; 740 } 741 if (++ogen == 0) 742 ogen = 1; 743 ffth->gen = ogen; 744 fftimehands = ffth; 745 } 746 747 /* 748 * Adjust the fftimehands when the timecounter is changed. Stating the obvious, 749 * the old and new hardware counter cannot be read simultaneously. tc_windup() 750 * does read the two counters 'back to back', but a few cycles are effectively 751 * lost, and not accumulated in tick_ffcount. This is a fairly radical 752 * operation for a feed-forward synchronization daemon, and it is its job to not 753 * pushing irrelevant data to the kernel. Because there is no locking here, 754 * simply force to ignore pending or next update to give daemon a chance to 755 * realize the counter has changed. 756 */ 757 static void 758 ffclock_change_tc(struct timehands *th) 759 { 760 struct fftimehands *ffth; 761 struct ffclock_estimate *cest; 762 struct timecounter *tc; 763 uint8_t ogen; 764 765 tc = th->th_counter; 766 ffth = fftimehands->next; 767 ogen = ffth->gen; 768 ffth->gen = 0; 769 770 cest = &ffth->cest; 771 bcopy(&(fftimehands->cest), cest, sizeof(struct ffclock_estimate)); 772 cest->period = ((1ULL << 63) / tc->tc_frequency ) << 1; 773 cest->errb_abs = 0; 774 cest->errb_rate = 0; 775 cest->status |= FFCLOCK_STA_UNSYNC; 776 777 ffth->tick_ffcount = fftimehands->tick_ffcount; 778 ffth->tick_time_lerp = fftimehands->tick_time_lerp; 779 ffth->tick_time = fftimehands->tick_time; 780 ffth->period_lerp = cest->period; 781 782 /* Do not lock but ignore next update from synchronization daemon. */ 783 ffclock_updated--; 784 785 if (++ogen == 0) 786 ogen = 1; 787 ffth->gen = ogen; 788 fftimehands = ffth; 789 } 790 791 /* 792 * Retrieve feed-forward counter and time of last kernel tick. 793 */ 794 void 795 ffclock_last_tick(ffcounter *ffcount, struct bintime *bt, uint32_t flags) 796 { 797 struct fftimehands *ffth; 798 uint8_t gen; 799 800 /* 801 * No locking but check generation has not changed. Also need to make 802 * sure ffdelta is positive, i.e. ffcount > tick_ffcount. 803 */ 804 do { 805 ffth = fftimehands; 806 gen = ffth->gen; 807 if ((flags & FFCLOCK_LERP) == FFCLOCK_LERP) 808 *bt = ffth->tick_time_lerp; 809 else 810 *bt = ffth->tick_time; 811 *ffcount = ffth->tick_ffcount; 812 } while (gen == 0 || gen != ffth->gen); 813 } 814 815 /* 816 * Absolute clock conversion. Low level function to convert ffcounter to 817 * bintime. The ffcounter is converted using the current ffclock period estimate 818 * or the "interpolated period" to ensure monotonicity. 819 * NOTE: this conversion may have been deferred, and the clock updated since the 820 * hardware counter has been read. 821 */ 822 void 823 ffclock_convert_abs(ffcounter ffcount, struct bintime *bt, uint32_t flags) 824 { 825 struct fftimehands *ffth; 826 struct bintime bt2; 827 ffcounter ffdelta; 828 uint8_t gen; 829 830 /* 831 * No locking but check generation has not changed. Also need to make 832 * sure ffdelta is positive, i.e. ffcount > tick_ffcount. 833 */ 834 do { 835 ffth = fftimehands; 836 gen = ffth->gen; 837 if (ffcount > ffth->tick_ffcount) 838 ffdelta = ffcount - ffth->tick_ffcount; 839 else 840 ffdelta = ffth->tick_ffcount - ffcount; 841 842 if ((flags & FFCLOCK_LERP) == FFCLOCK_LERP) { 843 *bt = ffth->tick_time_lerp; 844 ffclock_convert_delta(ffdelta, ffth->period_lerp, &bt2); 845 } else { 846 *bt = ffth->tick_time; 847 ffclock_convert_delta(ffdelta, ffth->cest.period, &bt2); 848 } 849 850 if (ffcount > ffth->tick_ffcount) 851 bintime_add(bt, &bt2); 852 else 853 bintime_sub(bt, &bt2); 854 } while (gen == 0 || gen != ffth->gen); 855 } 856 857 /* 858 * Difference clock conversion. 859 * Low level function to Convert a time interval measured in RAW counter units 860 * into bintime. The difference clock allows measuring small intervals much more 861 * reliably than the absolute clock. 862 */ 863 void 864 ffclock_convert_diff(ffcounter ffdelta, struct bintime *bt) 865 { 866 struct fftimehands *ffth; 867 uint8_t gen; 868 869 /* No locking but check generation has not changed. */ 870 do { 871 ffth = fftimehands; 872 gen = ffth->gen; 873 ffclock_convert_delta(ffdelta, ffth->cest.period, bt); 874 } while (gen == 0 || gen != ffth->gen); 875 } 876 877 /* 878 * Access to current ffcounter value. 879 */ 880 void 881 ffclock_read_counter(ffcounter *ffcount) 882 { 883 struct timehands *th; 884 struct fftimehands *ffth; 885 unsigned int gen, delta; 886 887 /* 888 * ffclock_windup() called from tc_windup(), safe to rely on 889 * th->th_generation only, for correct delta and ffcounter. 890 */ 891 do { 892 th = timehands; 893 gen = atomic_load_acq_int(&th->th_generation); 894 ffth = fftimehands; 895 delta = tc_delta(th); 896 *ffcount = ffth->tick_ffcount; 897 atomic_thread_fence_acq(); 898 } while (gen == 0 || gen != th->th_generation); 899 900 *ffcount += delta; 901 } 902 903 void 904 binuptime(struct bintime *bt) 905 { 906 907 binuptime_fromclock(bt, sysclock_active); 908 } 909 910 void 911 nanouptime(struct timespec *tsp) 912 { 913 914 nanouptime_fromclock(tsp, sysclock_active); 915 } 916 917 void 918 microuptime(struct timeval *tvp) 919 { 920 921 microuptime_fromclock(tvp, sysclock_active); 922 } 923 924 void 925 bintime(struct bintime *bt) 926 { 927 928 bintime_fromclock(bt, sysclock_active); 929 } 930 931 void 932 nanotime(struct timespec *tsp) 933 { 934 935 nanotime_fromclock(tsp, sysclock_active); 936 } 937 938 void 939 microtime(struct timeval *tvp) 940 { 941 942 microtime_fromclock(tvp, sysclock_active); 943 } 944 945 void 946 getbinuptime(struct bintime *bt) 947 { 948 949 getbinuptime_fromclock(bt, sysclock_active); 950 } 951 952 void 953 getnanouptime(struct timespec *tsp) 954 { 955 956 getnanouptime_fromclock(tsp, sysclock_active); 957 } 958 959 void 960 getmicrouptime(struct timeval *tvp) 961 { 962 963 getmicrouptime_fromclock(tvp, sysclock_active); 964 } 965 966 void 967 getbintime(struct bintime *bt) 968 { 969 970 getbintime_fromclock(bt, sysclock_active); 971 } 972 973 void 974 getnanotime(struct timespec *tsp) 975 { 976 977 getnanotime_fromclock(tsp, sysclock_active); 978 } 979 980 void 981 getmicrotime(struct timeval *tvp) 982 { 983 984 getmicrouptime_fromclock(tvp, sysclock_active); 985 } 986 987 #endif /* FFCLOCK */ 988 989 /* 990 * This is a clone of getnanotime and used for walltimestamps. 991 * The dtrace_ prefix prevents fbt from creating probes for 992 * it so walltimestamp can be safely used in all fbt probes. 993 */ 994 void 995 dtrace_getnanotime(struct timespec *tsp) 996 { 997 998 GETTHMEMBER(tsp, th_nanotime); 999 } 1000 1001 /* 1002 * This is a clone of getnanouptime used for time since boot. 1003 * The dtrace_ prefix prevents fbt from creating probes for 1004 * it so an uptime that can be safely used in all fbt probes. 1005 */ 1006 void 1007 dtrace_getnanouptime(struct timespec *tsp) 1008 { 1009 struct bintime bt; 1010 1011 GETTHMEMBER(&bt, th_offset); 1012 bintime2timespec(&bt, tsp); 1013 } 1014 1015 /* 1016 * System clock currently providing time to the system. Modifiable via sysctl 1017 * when the FFCLOCK option is defined. 1018 */ 1019 int sysclock_active = SYSCLOCK_FBCK; 1020 1021 /* Internal NTP status and error estimates. */ 1022 extern int time_status; 1023 extern long time_esterror; 1024 1025 /* 1026 * Take a snapshot of sysclock data which can be used to compare system clocks 1027 * and generate timestamps after the fact. 1028 */ 1029 void 1030 sysclock_getsnapshot(struct sysclock_snap *clock_snap, int fast) 1031 { 1032 struct fbclock_info *fbi; 1033 struct timehands *th; 1034 struct bintime bt; 1035 unsigned int delta, gen; 1036 #ifdef FFCLOCK 1037 ffcounter ffcount; 1038 struct fftimehands *ffth; 1039 struct ffclock_info *ffi; 1040 struct ffclock_estimate cest; 1041 1042 ffi = &clock_snap->ff_info; 1043 #endif 1044 1045 fbi = &clock_snap->fb_info; 1046 delta = 0; 1047 1048 do { 1049 th = timehands; 1050 gen = atomic_load_acq_int(&th->th_generation); 1051 fbi->th_scale = th->th_scale; 1052 fbi->tick_time = th->th_offset; 1053 #ifdef FFCLOCK 1054 ffth = fftimehands; 1055 ffi->tick_time = ffth->tick_time_lerp; 1056 ffi->tick_time_lerp = ffth->tick_time_lerp; 1057 ffi->period = ffth->cest.period; 1058 ffi->period_lerp = ffth->period_lerp; 1059 clock_snap->ffcount = ffth->tick_ffcount; 1060 cest = ffth->cest; 1061 #endif 1062 if (!fast) 1063 delta = tc_delta(th); 1064 atomic_thread_fence_acq(); 1065 } while (gen == 0 || gen != th->th_generation); 1066 1067 clock_snap->delta = delta; 1068 clock_snap->sysclock_active = sysclock_active; 1069 1070 /* Record feedback clock status and error. */ 1071 clock_snap->fb_info.status = time_status; 1072 /* XXX: Very crude estimate of feedback clock error. */ 1073 bt.sec = time_esterror / 1000000; 1074 bt.frac = ((time_esterror - bt.sec) * 1000000) * 1075 (uint64_t)18446744073709ULL; 1076 clock_snap->fb_info.error = bt; 1077 1078 #ifdef FFCLOCK 1079 if (!fast) 1080 clock_snap->ffcount += delta; 1081 1082 /* Record feed-forward clock leap second adjustment. */ 1083 ffi->leapsec_adjustment = cest.leapsec_total; 1084 if (clock_snap->ffcount > cest.leapsec_next) 1085 ffi->leapsec_adjustment -= cest.leapsec; 1086 1087 /* Record feed-forward clock status and error. */ 1088 clock_snap->ff_info.status = cest.status; 1089 ffcount = clock_snap->ffcount - cest.update_ffcount; 1090 ffclock_convert_delta(ffcount, cest.period, &bt); 1091 /* 18446744073709 = int(2^64/1e12), err_bound_rate in [ps/s]. */ 1092 bintime_mul(&bt, cest.errb_rate * (uint64_t)18446744073709ULL); 1093 /* 18446744073 = int(2^64 / 1e9), since err_abs in [ns]. */ 1094 bintime_addx(&bt, cest.errb_abs * (uint64_t)18446744073ULL); 1095 clock_snap->ff_info.error = bt; 1096 #endif 1097 } 1098 1099 /* 1100 * Convert a sysclock snapshot into a struct bintime based on the specified 1101 * clock source and flags. 1102 */ 1103 int 1104 sysclock_snap2bintime(struct sysclock_snap *cs, struct bintime *bt, 1105 int whichclock, uint32_t flags) 1106 { 1107 struct bintime boottimebin; 1108 #ifdef FFCLOCK 1109 struct bintime bt2; 1110 uint64_t period; 1111 #endif 1112 1113 switch (whichclock) { 1114 case SYSCLOCK_FBCK: 1115 *bt = cs->fb_info.tick_time; 1116 1117 /* If snapshot was created with !fast, delta will be >0. */ 1118 if (cs->delta > 0) 1119 bintime_addx(bt, cs->fb_info.th_scale * cs->delta); 1120 1121 if ((flags & FBCLOCK_UPTIME) == 0) { 1122 getboottimebin(&boottimebin); 1123 bintime_add(bt, &boottimebin); 1124 } 1125 break; 1126 #ifdef FFCLOCK 1127 case SYSCLOCK_FFWD: 1128 if (flags & FFCLOCK_LERP) { 1129 *bt = cs->ff_info.tick_time_lerp; 1130 period = cs->ff_info.period_lerp; 1131 } else { 1132 *bt = cs->ff_info.tick_time; 1133 period = cs->ff_info.period; 1134 } 1135 1136 /* If snapshot was created with !fast, delta will be >0. */ 1137 if (cs->delta > 0) { 1138 ffclock_convert_delta(cs->delta, period, &bt2); 1139 bintime_add(bt, &bt2); 1140 } 1141 1142 /* Leap second adjustment. */ 1143 if (flags & FFCLOCK_LEAPSEC) 1144 bt->sec -= cs->ff_info.leapsec_adjustment; 1145 1146 /* Boot time adjustment, for uptime/monotonic clocks. */ 1147 if (flags & FFCLOCK_UPTIME) 1148 bintime_sub(bt, &ffclock_boottime); 1149 break; 1150 #endif 1151 default: 1152 return (EINVAL); 1153 break; 1154 } 1155 1156 return (0); 1157 } 1158 1159 /* 1160 * Initialize a new timecounter and possibly use it. 1161 */ 1162 void 1163 tc_init(struct timecounter *tc) 1164 { 1165 u_int u; 1166 struct sysctl_oid *tc_root; 1167 1168 u = tc->tc_frequency / tc->tc_counter_mask; 1169 /* XXX: We need some margin here, 10% is a guess */ 1170 u *= 11; 1171 u /= 10; 1172 if (u > hz && tc->tc_quality >= 0) { 1173 tc->tc_quality = -2000; 1174 if (bootverbose) { 1175 printf("Timecounter \"%s\" frequency %ju Hz", 1176 tc->tc_name, (uintmax_t)tc->tc_frequency); 1177 printf(" -- Insufficient hz, needs at least %u\n", u); 1178 } 1179 } else if (tc->tc_quality >= 0 || bootverbose) { 1180 printf("Timecounter \"%s\" frequency %ju Hz quality %d\n", 1181 tc->tc_name, (uintmax_t)tc->tc_frequency, 1182 tc->tc_quality); 1183 } 1184 1185 tc->tc_next = timecounters; 1186 timecounters = tc; 1187 /* 1188 * Set up sysctl tree for this counter. 1189 */ 1190 tc_root = SYSCTL_ADD_NODE_WITH_LABEL(NULL, 1191 SYSCTL_STATIC_CHILDREN(_kern_timecounter_tc), OID_AUTO, tc->tc_name, 1192 CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 1193 "timecounter description", "timecounter"); 1194 SYSCTL_ADD_UINT(NULL, SYSCTL_CHILDREN(tc_root), OID_AUTO, 1195 "mask", CTLFLAG_RD, &(tc->tc_counter_mask), 0, 1196 "mask for implemented bits"); 1197 SYSCTL_ADD_PROC(NULL, SYSCTL_CHILDREN(tc_root), OID_AUTO, 1198 "counter", CTLTYPE_UINT | CTLFLAG_RD | CTLFLAG_MPSAFE, tc, 1199 sizeof(*tc), sysctl_kern_timecounter_get, "IU", 1200 "current timecounter value"); 1201 SYSCTL_ADD_PROC(NULL, SYSCTL_CHILDREN(tc_root), OID_AUTO, 1202 "frequency", CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_MPSAFE, tc, 1203 sizeof(*tc), sysctl_kern_timecounter_freq, "QU", 1204 "timecounter frequency"); 1205 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(tc_root), OID_AUTO, 1206 "quality", CTLFLAG_RD, &(tc->tc_quality), 0, 1207 "goodness of time counter"); 1208 /* 1209 * Do not automatically switch if the current tc was specifically 1210 * chosen. Never automatically use a timecounter with negative quality. 1211 * Even though we run on the dummy counter, switching here may be 1212 * worse since this timecounter may not be monotonic. 1213 */ 1214 if (tc_chosen) 1215 return; 1216 if (tc->tc_quality < 0) 1217 return; 1218 if (tc->tc_quality < timecounter->tc_quality) 1219 return; 1220 if (tc->tc_quality == timecounter->tc_quality && 1221 tc->tc_frequency < timecounter->tc_frequency) 1222 return; 1223 (void)tc->tc_get_timecount(tc); 1224 timecounter = tc; 1225 } 1226 1227 /* Report the frequency of the current timecounter. */ 1228 uint64_t 1229 tc_getfrequency(void) 1230 { 1231 1232 return (timehands->th_counter->tc_frequency); 1233 } 1234 1235 static bool 1236 sleeping_on_old_rtc(struct thread *td) 1237 { 1238 1239 /* 1240 * td_rtcgen is modified by curthread when it is running, 1241 * and by other threads in this function. By finding the thread 1242 * on a sleepqueue and holding the lock on the sleepqueue 1243 * chain, we guarantee that the thread is not running and that 1244 * modifying td_rtcgen is safe. Setting td_rtcgen to zero informs 1245 * the thread that it was woken due to a real-time clock adjustment. 1246 * (The declaration of td_rtcgen refers to this comment.) 1247 */ 1248 if (td->td_rtcgen != 0 && td->td_rtcgen != rtc_generation) { 1249 td->td_rtcgen = 0; 1250 return (true); 1251 } 1252 return (false); 1253 } 1254 1255 static struct mtx tc_setclock_mtx; 1256 MTX_SYSINIT(tc_setclock_init, &tc_setclock_mtx, "tcsetc", MTX_SPIN); 1257 1258 /* 1259 * Step our concept of UTC. This is done by modifying our estimate of 1260 * when we booted. 1261 */ 1262 void 1263 tc_setclock(struct timespec *ts) 1264 { 1265 struct timespec tbef, taft; 1266 struct bintime bt, bt2; 1267 1268 timespec2bintime(ts, &bt); 1269 nanotime(&tbef); 1270 mtx_lock_spin(&tc_setclock_mtx); 1271 cpu_tick_calibrate(1); 1272 binuptime(&bt2); 1273 bintime_sub(&bt, &bt2); 1274 1275 /* XXX fiddle all the little crinkly bits around the fiords... */ 1276 tc_windup(&bt); 1277 mtx_unlock_spin(&tc_setclock_mtx); 1278 1279 /* Avoid rtc_generation == 0, since td_rtcgen == 0 is special. */ 1280 atomic_add_rel_int(&rtc_generation, 2); 1281 sleepq_chains_remove_matching(sleeping_on_old_rtc); 1282 if (timestepwarnings) { 1283 nanotime(&taft); 1284 log(LOG_INFO, 1285 "Time stepped from %jd.%09ld to %jd.%09ld (%jd.%09ld)\n", 1286 (intmax_t)tbef.tv_sec, tbef.tv_nsec, 1287 (intmax_t)taft.tv_sec, taft.tv_nsec, 1288 (intmax_t)ts->tv_sec, ts->tv_nsec); 1289 } 1290 } 1291 1292 /* 1293 * Initialize the next struct timehands in the ring and make 1294 * it the active timehands. Along the way we might switch to a different 1295 * timecounter and/or do seconds processing in NTP. Slightly magic. 1296 */ 1297 static void 1298 tc_windup(struct bintime *new_boottimebin) 1299 { 1300 struct bintime bt; 1301 struct timehands *th, *tho; 1302 uint64_t scale; 1303 u_int delta, ncount, ogen; 1304 int i; 1305 time_t t; 1306 1307 /* 1308 * Make the next timehands a copy of the current one, but do 1309 * not overwrite the generation or next pointer. While we 1310 * update the contents, the generation must be zero. We need 1311 * to ensure that the zero generation is visible before the 1312 * data updates become visible, which requires release fence. 1313 * For similar reasons, re-reading of the generation after the 1314 * data is read should use acquire fence. 1315 */ 1316 tho = timehands; 1317 th = tho->th_next; 1318 ogen = th->th_generation; 1319 th->th_generation = 0; 1320 atomic_thread_fence_rel(); 1321 memcpy(th, tho, offsetof(struct timehands, th_generation)); 1322 if (new_boottimebin != NULL) 1323 th->th_boottime = *new_boottimebin; 1324 1325 /* 1326 * Capture a timecounter delta on the current timecounter and if 1327 * changing timecounters, a counter value from the new timecounter. 1328 * Update the offset fields accordingly. 1329 */ 1330 delta = tc_delta(th); 1331 if (th->th_counter != timecounter) 1332 ncount = timecounter->tc_get_timecount(timecounter); 1333 else 1334 ncount = 0; 1335 #ifdef FFCLOCK 1336 ffclock_windup(delta); 1337 #endif 1338 th->th_offset_count += delta; 1339 th->th_offset_count &= th->th_counter->tc_counter_mask; 1340 while (delta > th->th_counter->tc_frequency) { 1341 /* Eat complete unadjusted seconds. */ 1342 delta -= th->th_counter->tc_frequency; 1343 th->th_offset.sec++; 1344 } 1345 if ((delta > th->th_counter->tc_frequency / 2) && 1346 (th->th_scale * delta < ((uint64_t)1 << 63))) { 1347 /* The product th_scale * delta just barely overflows. */ 1348 th->th_offset.sec++; 1349 } 1350 bintime_addx(&th->th_offset, th->th_scale * delta); 1351 1352 /* 1353 * Hardware latching timecounters may not generate interrupts on 1354 * PPS events, so instead we poll them. There is a finite risk that 1355 * the hardware might capture a count which is later than the one we 1356 * got above, and therefore possibly in the next NTP second which might 1357 * have a different rate than the current NTP second. It doesn't 1358 * matter in practice. 1359 */ 1360 if (tho->th_counter->tc_poll_pps) 1361 tho->th_counter->tc_poll_pps(tho->th_counter); 1362 1363 /* 1364 * Deal with NTP second processing. The for loop normally 1365 * iterates at most once, but in extreme situations it might 1366 * keep NTP sane if timeouts are not run for several seconds. 1367 * At boot, the time step can be large when the TOD hardware 1368 * has been read, so on really large steps, we call 1369 * ntp_update_second only twice. We need to call it twice in 1370 * case we missed a leap second. 1371 */ 1372 bt = th->th_offset; 1373 bintime_add(&bt, &th->th_boottime); 1374 i = bt.sec - tho->th_microtime.tv_sec; 1375 if (i > LARGE_STEP) 1376 i = 2; 1377 for (; i > 0; i--) { 1378 t = bt.sec; 1379 ntp_update_second(&th->th_adjustment, &bt.sec); 1380 if (bt.sec != t) 1381 th->th_boottime.sec += bt.sec - t; 1382 } 1383 /* Update the UTC timestamps used by the get*() functions. */ 1384 th->th_bintime = bt; 1385 bintime2timeval(&bt, &th->th_microtime); 1386 bintime2timespec(&bt, &th->th_nanotime); 1387 1388 /* Now is a good time to change timecounters. */ 1389 if (th->th_counter != timecounter) { 1390 #ifndef __arm__ 1391 if ((timecounter->tc_flags & TC_FLAGS_C2STOP) != 0) 1392 cpu_disable_c2_sleep++; 1393 if ((th->th_counter->tc_flags & TC_FLAGS_C2STOP) != 0) 1394 cpu_disable_c2_sleep--; 1395 #endif 1396 th->th_counter = timecounter; 1397 th->th_offset_count = ncount; 1398 tc_min_ticktock_freq = max(1, timecounter->tc_frequency / 1399 (((uint64_t)timecounter->tc_counter_mask + 1) / 3)); 1400 #ifdef FFCLOCK 1401 ffclock_change_tc(th); 1402 #endif 1403 } 1404 1405 /*- 1406 * Recalculate the scaling factor. We want the number of 1/2^64 1407 * fractions of a second per period of the hardware counter, taking 1408 * into account the th_adjustment factor which the NTP PLL/adjtime(2) 1409 * processing provides us with. 1410 * 1411 * The th_adjustment is nanoseconds per second with 32 bit binary 1412 * fraction and we want 64 bit binary fraction of second: 1413 * 1414 * x = a * 2^32 / 10^9 = a * 4.294967296 1415 * 1416 * The range of th_adjustment is +/- 5000PPM so inside a 64bit int 1417 * we can only multiply by about 850 without overflowing, that 1418 * leaves no suitably precise fractions for multiply before divide. 1419 * 1420 * Divide before multiply with a fraction of 2199/512 results in a 1421 * systematic undercompensation of 10PPM of th_adjustment. On a 1422 * 5000PPM adjustment this is a 0.05PPM error. This is acceptable. 1423 * 1424 * We happily sacrifice the lowest of the 64 bits of our result 1425 * to the goddess of code clarity. 1426 * 1427 */ 1428 scale = (uint64_t)1 << 63; 1429 scale += (th->th_adjustment / 1024) * 2199; 1430 scale /= th->th_counter->tc_frequency; 1431 th->th_scale = scale * 2; 1432 th->th_large_delta = MIN(((uint64_t)1 << 63) / scale, UINT_MAX); 1433 1434 /* 1435 * Now that the struct timehands is again consistent, set the new 1436 * generation number, making sure to not make it zero. 1437 */ 1438 if (++ogen == 0) 1439 ogen = 1; 1440 atomic_store_rel_int(&th->th_generation, ogen); 1441 1442 /* Go live with the new struct timehands. */ 1443 #ifdef FFCLOCK 1444 switch (sysclock_active) { 1445 case SYSCLOCK_FBCK: 1446 #endif 1447 time_second = th->th_microtime.tv_sec; 1448 time_uptime = th->th_offset.sec; 1449 #ifdef FFCLOCK 1450 break; 1451 case SYSCLOCK_FFWD: 1452 time_second = fftimehands->tick_time_lerp.sec; 1453 time_uptime = fftimehands->tick_time_lerp.sec - ffclock_boottime.sec; 1454 break; 1455 } 1456 #endif 1457 1458 timehands = th; 1459 timekeep_push_vdso(); 1460 } 1461 1462 /* Report or change the active timecounter hardware. */ 1463 static int 1464 sysctl_kern_timecounter_hardware(SYSCTL_HANDLER_ARGS) 1465 { 1466 char newname[32]; 1467 struct timecounter *newtc, *tc; 1468 int error; 1469 1470 tc = timecounter; 1471 strlcpy(newname, tc->tc_name, sizeof(newname)); 1472 1473 error = sysctl_handle_string(oidp, &newname[0], sizeof(newname), req); 1474 if (error != 0 || req->newptr == NULL) 1475 return (error); 1476 /* Record that the tc in use now was specifically chosen. */ 1477 tc_chosen = 1; 1478 if (strcmp(newname, tc->tc_name) == 0) 1479 return (0); 1480 for (newtc = timecounters; newtc != NULL; newtc = newtc->tc_next) { 1481 if (strcmp(newname, newtc->tc_name) != 0) 1482 continue; 1483 1484 /* Warm up new timecounter. */ 1485 (void)newtc->tc_get_timecount(newtc); 1486 1487 timecounter = newtc; 1488 1489 /* 1490 * The vdso timehands update is deferred until the next 1491 * 'tc_windup()'. 1492 * 1493 * This is prudent given that 'timekeep_push_vdso()' does not 1494 * use any locking and that it can be called in hard interrupt 1495 * context via 'tc_windup()'. 1496 */ 1497 return (0); 1498 } 1499 return (EINVAL); 1500 } 1501 1502 SYSCTL_PROC(_kern_timecounter, OID_AUTO, hardware, 1503 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0, 1504 sysctl_kern_timecounter_hardware, "A", 1505 "Timecounter hardware selected"); 1506 1507 /* Report the available timecounter hardware. */ 1508 static int 1509 sysctl_kern_timecounter_choice(SYSCTL_HANDLER_ARGS) 1510 { 1511 struct sbuf sb; 1512 struct timecounter *tc; 1513 int error; 1514 1515 sbuf_new_for_sysctl(&sb, NULL, 0, req); 1516 for (tc = timecounters; tc != NULL; tc = tc->tc_next) { 1517 if (tc != timecounters) 1518 sbuf_putc(&sb, ' '); 1519 sbuf_printf(&sb, "%s(%d)", tc->tc_name, tc->tc_quality); 1520 } 1521 error = sbuf_finish(&sb); 1522 sbuf_delete(&sb); 1523 return (error); 1524 } 1525 1526 SYSCTL_PROC(_kern_timecounter, OID_AUTO, choice, 1527 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 0, 1528 sysctl_kern_timecounter_choice, "A", 1529 "Timecounter hardware detected"); 1530 1531 /* 1532 * RFC 2783 PPS-API implementation. 1533 */ 1534 1535 /* 1536 * Return true if the driver is aware of the abi version extensions in the 1537 * pps_state structure, and it supports at least the given abi version number. 1538 */ 1539 static inline int 1540 abi_aware(struct pps_state *pps, int vers) 1541 { 1542 1543 return ((pps->kcmode & KCMODE_ABIFLAG) && pps->driver_abi >= vers); 1544 } 1545 1546 static int 1547 pps_fetch(struct pps_fetch_args *fapi, struct pps_state *pps) 1548 { 1549 int err, timo; 1550 pps_seq_t aseq, cseq; 1551 struct timeval tv; 1552 1553 if (fapi->tsformat && fapi->tsformat != PPS_TSFMT_TSPEC) 1554 return (EINVAL); 1555 1556 /* 1557 * If no timeout is requested, immediately return whatever values were 1558 * most recently captured. If timeout seconds is -1, that's a request 1559 * to block without a timeout. WITNESS won't let us sleep forever 1560 * without a lock (we really don't need a lock), so just repeatedly 1561 * sleep a long time. 1562 */ 1563 if (fapi->timeout.tv_sec || fapi->timeout.tv_nsec) { 1564 if (fapi->timeout.tv_sec == -1) 1565 timo = 0x7fffffff; 1566 else { 1567 tv.tv_sec = fapi->timeout.tv_sec; 1568 tv.tv_usec = fapi->timeout.tv_nsec / 1000; 1569 timo = tvtohz(&tv); 1570 } 1571 aseq = atomic_load_int(&pps->ppsinfo.assert_sequence); 1572 cseq = atomic_load_int(&pps->ppsinfo.clear_sequence); 1573 while (aseq == atomic_load_int(&pps->ppsinfo.assert_sequence) && 1574 cseq == atomic_load_int(&pps->ppsinfo.clear_sequence)) { 1575 if (abi_aware(pps, 1) && pps->driver_mtx != NULL) { 1576 if (pps->flags & PPSFLAG_MTX_SPIN) { 1577 err = msleep_spin(pps, pps->driver_mtx, 1578 "ppsfch", timo); 1579 } else { 1580 err = msleep(pps, pps->driver_mtx, PCATCH, 1581 "ppsfch", timo); 1582 } 1583 } else { 1584 err = tsleep(pps, PCATCH, "ppsfch", timo); 1585 } 1586 if (err == EWOULDBLOCK) { 1587 if (fapi->timeout.tv_sec == -1) { 1588 continue; 1589 } else { 1590 return (ETIMEDOUT); 1591 } 1592 } else if (err != 0) { 1593 return (err); 1594 } 1595 } 1596 } 1597 1598 pps->ppsinfo.current_mode = pps->ppsparam.mode; 1599 fapi->pps_info_buf = pps->ppsinfo; 1600 1601 return (0); 1602 } 1603 1604 int 1605 pps_ioctl(u_long cmd, caddr_t data, struct pps_state *pps) 1606 { 1607 pps_params_t *app; 1608 struct pps_fetch_args *fapi; 1609 #ifdef FFCLOCK 1610 struct pps_fetch_ffc_args *fapi_ffc; 1611 #endif 1612 #ifdef PPS_SYNC 1613 struct pps_kcbind_args *kapi; 1614 #endif 1615 1616 KASSERT(pps != NULL, ("NULL pps pointer in pps_ioctl")); 1617 switch (cmd) { 1618 case PPS_IOC_CREATE: 1619 return (0); 1620 case PPS_IOC_DESTROY: 1621 return (0); 1622 case PPS_IOC_SETPARAMS: 1623 app = (pps_params_t *)data; 1624 if (app->mode & ~pps->ppscap) 1625 return (EINVAL); 1626 #ifdef FFCLOCK 1627 /* Ensure only a single clock is selected for ffc timestamp. */ 1628 if ((app->mode & PPS_TSCLK_MASK) == PPS_TSCLK_MASK) 1629 return (EINVAL); 1630 #endif 1631 pps->ppsparam = *app; 1632 return (0); 1633 case PPS_IOC_GETPARAMS: 1634 app = (pps_params_t *)data; 1635 *app = pps->ppsparam; 1636 app->api_version = PPS_API_VERS_1; 1637 return (0); 1638 case PPS_IOC_GETCAP: 1639 *(int*)data = pps->ppscap; 1640 return (0); 1641 case PPS_IOC_FETCH: 1642 fapi = (struct pps_fetch_args *)data; 1643 return (pps_fetch(fapi, pps)); 1644 #ifdef FFCLOCK 1645 case PPS_IOC_FETCH_FFCOUNTER: 1646 fapi_ffc = (struct pps_fetch_ffc_args *)data; 1647 if (fapi_ffc->tsformat && fapi_ffc->tsformat != 1648 PPS_TSFMT_TSPEC) 1649 return (EINVAL); 1650 if (fapi_ffc->timeout.tv_sec || fapi_ffc->timeout.tv_nsec) 1651 return (EOPNOTSUPP); 1652 pps->ppsinfo_ffc.current_mode = pps->ppsparam.mode; 1653 fapi_ffc->pps_info_buf_ffc = pps->ppsinfo_ffc; 1654 /* Overwrite timestamps if feedback clock selected. */ 1655 switch (pps->ppsparam.mode & PPS_TSCLK_MASK) { 1656 case PPS_TSCLK_FBCK: 1657 fapi_ffc->pps_info_buf_ffc.assert_timestamp = 1658 pps->ppsinfo.assert_timestamp; 1659 fapi_ffc->pps_info_buf_ffc.clear_timestamp = 1660 pps->ppsinfo.clear_timestamp; 1661 break; 1662 case PPS_TSCLK_FFWD: 1663 break; 1664 default: 1665 break; 1666 } 1667 return (0); 1668 #endif /* FFCLOCK */ 1669 case PPS_IOC_KCBIND: 1670 #ifdef PPS_SYNC 1671 kapi = (struct pps_kcbind_args *)data; 1672 /* XXX Only root should be able to do this */ 1673 if (kapi->tsformat && kapi->tsformat != PPS_TSFMT_TSPEC) 1674 return (EINVAL); 1675 if (kapi->kernel_consumer != PPS_KC_HARDPPS) 1676 return (EINVAL); 1677 if (kapi->edge & ~pps->ppscap) 1678 return (EINVAL); 1679 pps->kcmode = (kapi->edge & KCMODE_EDGEMASK) | 1680 (pps->kcmode & KCMODE_ABIFLAG); 1681 return (0); 1682 #else 1683 return (EOPNOTSUPP); 1684 #endif 1685 default: 1686 return (ENOIOCTL); 1687 } 1688 } 1689 1690 void 1691 pps_init(struct pps_state *pps) 1692 { 1693 pps->ppscap |= PPS_TSFMT_TSPEC | PPS_CANWAIT; 1694 if (pps->ppscap & PPS_CAPTUREASSERT) 1695 pps->ppscap |= PPS_OFFSETASSERT; 1696 if (pps->ppscap & PPS_CAPTURECLEAR) 1697 pps->ppscap |= PPS_OFFSETCLEAR; 1698 #ifdef FFCLOCK 1699 pps->ppscap |= PPS_TSCLK_MASK; 1700 #endif 1701 pps->kcmode &= ~KCMODE_ABIFLAG; 1702 } 1703 1704 void 1705 pps_init_abi(struct pps_state *pps) 1706 { 1707 1708 pps_init(pps); 1709 if (pps->driver_abi > 0) { 1710 pps->kcmode |= KCMODE_ABIFLAG; 1711 pps->kernel_abi = PPS_ABI_VERSION; 1712 } 1713 } 1714 1715 void 1716 pps_capture(struct pps_state *pps) 1717 { 1718 struct timehands *th; 1719 1720 KASSERT(pps != NULL, ("NULL pps pointer in pps_capture")); 1721 th = timehands; 1722 pps->capgen = atomic_load_acq_int(&th->th_generation); 1723 pps->capth = th; 1724 #ifdef FFCLOCK 1725 pps->capffth = fftimehands; 1726 #endif 1727 pps->capcount = th->th_counter->tc_get_timecount(th->th_counter); 1728 atomic_thread_fence_acq(); 1729 if (pps->capgen != th->th_generation) 1730 pps->capgen = 0; 1731 } 1732 1733 void 1734 pps_event(struct pps_state *pps, int event) 1735 { 1736 struct bintime bt; 1737 struct timespec ts, *tsp, *osp; 1738 u_int tcount, *pcount; 1739 int foff; 1740 pps_seq_t *pseq; 1741 #ifdef FFCLOCK 1742 struct timespec *tsp_ffc; 1743 pps_seq_t *pseq_ffc; 1744 ffcounter *ffcount; 1745 #endif 1746 #ifdef PPS_SYNC 1747 int fhard; 1748 #endif 1749 1750 KASSERT(pps != NULL, ("NULL pps pointer in pps_event")); 1751 /* Nothing to do if not currently set to capture this event type. */ 1752 if ((event & pps->ppsparam.mode) == 0) 1753 return; 1754 /* If the timecounter was wound up underneath us, bail out. */ 1755 if (pps->capgen == 0 || pps->capgen != 1756 atomic_load_acq_int(&pps->capth->th_generation)) 1757 return; 1758 1759 /* Things would be easier with arrays. */ 1760 if (event == PPS_CAPTUREASSERT) { 1761 tsp = &pps->ppsinfo.assert_timestamp; 1762 osp = &pps->ppsparam.assert_offset; 1763 foff = pps->ppsparam.mode & PPS_OFFSETASSERT; 1764 #ifdef PPS_SYNC 1765 fhard = pps->kcmode & PPS_CAPTUREASSERT; 1766 #endif 1767 pcount = &pps->ppscount[0]; 1768 pseq = &pps->ppsinfo.assert_sequence; 1769 #ifdef FFCLOCK 1770 ffcount = &pps->ppsinfo_ffc.assert_ffcount; 1771 tsp_ffc = &pps->ppsinfo_ffc.assert_timestamp; 1772 pseq_ffc = &pps->ppsinfo_ffc.assert_sequence; 1773 #endif 1774 } else { 1775 tsp = &pps->ppsinfo.clear_timestamp; 1776 osp = &pps->ppsparam.clear_offset; 1777 foff = pps->ppsparam.mode & PPS_OFFSETCLEAR; 1778 #ifdef PPS_SYNC 1779 fhard = pps->kcmode & PPS_CAPTURECLEAR; 1780 #endif 1781 pcount = &pps->ppscount[1]; 1782 pseq = &pps->ppsinfo.clear_sequence; 1783 #ifdef FFCLOCK 1784 ffcount = &pps->ppsinfo_ffc.clear_ffcount; 1785 tsp_ffc = &pps->ppsinfo_ffc.clear_timestamp; 1786 pseq_ffc = &pps->ppsinfo_ffc.clear_sequence; 1787 #endif 1788 } 1789 1790 /* 1791 * If the timecounter changed, we cannot compare the count values, so 1792 * we have to drop the rest of the PPS-stuff until the next event. 1793 */ 1794 if (pps->ppstc != pps->capth->th_counter) { 1795 pps->ppstc = pps->capth->th_counter; 1796 *pcount = pps->capcount; 1797 pps->ppscount[2] = pps->capcount; 1798 return; 1799 } 1800 1801 /* Convert the count to a timespec. */ 1802 tcount = pps->capcount - pps->capth->th_offset_count; 1803 tcount &= pps->capth->th_counter->tc_counter_mask; 1804 bt = pps->capth->th_bintime; 1805 bintime_addx(&bt, pps->capth->th_scale * tcount); 1806 bintime2timespec(&bt, &ts); 1807 1808 /* If the timecounter was wound up underneath us, bail out. */ 1809 atomic_thread_fence_acq(); 1810 if (pps->capgen != pps->capth->th_generation) 1811 return; 1812 1813 *pcount = pps->capcount; 1814 (*pseq)++; 1815 *tsp = ts; 1816 1817 if (foff) { 1818 timespecadd(tsp, osp, tsp); 1819 if (tsp->tv_nsec < 0) { 1820 tsp->tv_nsec += 1000000000; 1821 tsp->tv_sec -= 1; 1822 } 1823 } 1824 1825 #ifdef FFCLOCK 1826 *ffcount = pps->capffth->tick_ffcount + tcount; 1827 bt = pps->capffth->tick_time; 1828 ffclock_convert_delta(tcount, pps->capffth->cest.period, &bt); 1829 bintime_add(&bt, &pps->capffth->tick_time); 1830 bintime2timespec(&bt, &ts); 1831 (*pseq_ffc)++; 1832 *tsp_ffc = ts; 1833 #endif 1834 1835 #ifdef PPS_SYNC 1836 if (fhard) { 1837 uint64_t scale; 1838 1839 /* 1840 * Feed the NTP PLL/FLL. 1841 * The FLL wants to know how many (hardware) nanoseconds 1842 * elapsed since the previous event. 1843 */ 1844 tcount = pps->capcount - pps->ppscount[2]; 1845 pps->ppscount[2] = pps->capcount; 1846 tcount &= pps->capth->th_counter->tc_counter_mask; 1847 scale = (uint64_t)1 << 63; 1848 scale /= pps->capth->th_counter->tc_frequency; 1849 scale *= 2; 1850 bt.sec = 0; 1851 bt.frac = 0; 1852 bintime_addx(&bt, scale * tcount); 1853 bintime2timespec(&bt, &ts); 1854 hardpps(tsp, ts.tv_nsec + 1000000000 * ts.tv_sec); 1855 } 1856 #endif 1857 1858 /* Wakeup anyone sleeping in pps_fetch(). */ 1859 wakeup(pps); 1860 } 1861 1862 /* 1863 * Timecounters need to be updated every so often to prevent the hardware 1864 * counter from overflowing. Updating also recalculates the cached values 1865 * used by the get*() family of functions, so their precision depends on 1866 * the update frequency. 1867 */ 1868 1869 static int tc_tick; 1870 SYSCTL_INT(_kern_timecounter, OID_AUTO, tick, CTLFLAG_RD, &tc_tick, 0, 1871 "Approximate number of hardclock ticks in a millisecond"); 1872 1873 void 1874 tc_ticktock(int cnt) 1875 { 1876 static int count; 1877 1878 if (mtx_trylock_spin(&tc_setclock_mtx)) { 1879 count += cnt; 1880 if (count >= tc_tick) { 1881 count = 0; 1882 tc_windup(NULL); 1883 } 1884 mtx_unlock_spin(&tc_setclock_mtx); 1885 } 1886 } 1887 1888 static void __inline 1889 tc_adjprecision(void) 1890 { 1891 int t; 1892 1893 if (tc_timepercentage > 0) { 1894 t = (99 + tc_timepercentage) / tc_timepercentage; 1895 tc_precexp = fls(t + (t >> 1)) - 1; 1896 FREQ2BT(hz / tc_tick, &bt_timethreshold); 1897 FREQ2BT(hz, &bt_tickthreshold); 1898 bintime_shift(&bt_timethreshold, tc_precexp); 1899 bintime_shift(&bt_tickthreshold, tc_precexp); 1900 } else { 1901 tc_precexp = 31; 1902 bt_timethreshold.sec = INT_MAX; 1903 bt_timethreshold.frac = ~(uint64_t)0; 1904 bt_tickthreshold = bt_timethreshold; 1905 } 1906 sbt_timethreshold = bttosbt(bt_timethreshold); 1907 sbt_tickthreshold = bttosbt(bt_tickthreshold); 1908 } 1909 1910 static int 1911 sysctl_kern_timecounter_adjprecision(SYSCTL_HANDLER_ARGS) 1912 { 1913 int error, val; 1914 1915 val = tc_timepercentage; 1916 error = sysctl_handle_int(oidp, &val, 0, req); 1917 if (error != 0 || req->newptr == NULL) 1918 return (error); 1919 tc_timepercentage = val; 1920 if (cold) 1921 goto done; 1922 tc_adjprecision(); 1923 done: 1924 return (0); 1925 } 1926 1927 /* Set up the requested number of timehands. */ 1928 static void 1929 inittimehands(void *dummy) 1930 { 1931 struct timehands *thp; 1932 int i; 1933 1934 TUNABLE_INT_FETCH("kern.timecounter.timehands_count", 1935 &timehands_count); 1936 if (timehands_count < 1) 1937 timehands_count = 1; 1938 if (timehands_count > nitems(ths)) 1939 timehands_count = nitems(ths); 1940 for (i = 1, thp = &ths[0]; i < timehands_count; thp = &ths[i++]) 1941 thp->th_next = &ths[i]; 1942 thp->th_next = &ths[0]; 1943 } 1944 SYSINIT(timehands, SI_SUB_TUNABLES, SI_ORDER_ANY, inittimehands, NULL); 1945 1946 static void 1947 inittimecounter(void *dummy) 1948 { 1949 u_int p; 1950 int tick_rate; 1951 1952 /* 1953 * Set the initial timeout to 1954 * max(1, <approx. number of hardclock ticks in a millisecond>). 1955 * People should probably not use the sysctl to set the timeout 1956 * to smaller than its initial value, since that value is the 1957 * smallest reasonable one. If they want better timestamps they 1958 * should use the non-"get"* functions. 1959 */ 1960 if (hz > 1000) 1961 tc_tick = (hz + 500) / 1000; 1962 else 1963 tc_tick = 1; 1964 tc_adjprecision(); 1965 FREQ2BT(hz, &tick_bt); 1966 tick_sbt = bttosbt(tick_bt); 1967 tick_rate = hz / tc_tick; 1968 FREQ2BT(tick_rate, &tc_tick_bt); 1969 tc_tick_sbt = bttosbt(tc_tick_bt); 1970 p = (tc_tick * 1000000) / hz; 1971 printf("Timecounters tick every %d.%03u msec\n", p / 1000, p % 1000); 1972 1973 #ifdef FFCLOCK 1974 ffclock_init(); 1975 #endif 1976 1977 /* warm up new timecounter (again) and get rolling. */ 1978 (void)timecounter->tc_get_timecount(timecounter); 1979 mtx_lock_spin(&tc_setclock_mtx); 1980 tc_windup(NULL); 1981 mtx_unlock_spin(&tc_setclock_mtx); 1982 } 1983 1984 SYSINIT(timecounter, SI_SUB_CLOCKS, SI_ORDER_SECOND, inittimecounter, NULL); 1985 1986 /* Cpu tick handling -------------------------------------------------*/ 1987 1988 static int cpu_tick_variable; 1989 static uint64_t cpu_tick_frequency; 1990 1991 DPCPU_DEFINE_STATIC(uint64_t, tc_cpu_ticks_base); 1992 DPCPU_DEFINE_STATIC(unsigned, tc_cpu_ticks_last); 1993 1994 static uint64_t 1995 tc_cpu_ticks(void) 1996 { 1997 struct timecounter *tc; 1998 uint64_t res, *base; 1999 unsigned u, *last; 2000 2001 critical_enter(); 2002 base = DPCPU_PTR(tc_cpu_ticks_base); 2003 last = DPCPU_PTR(tc_cpu_ticks_last); 2004 tc = timehands->th_counter; 2005 u = tc->tc_get_timecount(tc) & tc->tc_counter_mask; 2006 if (u < *last) 2007 *base += (uint64_t)tc->tc_counter_mask + 1; 2008 *last = u; 2009 res = u + *base; 2010 critical_exit(); 2011 return (res); 2012 } 2013 2014 void 2015 cpu_tick_calibration(void) 2016 { 2017 static time_t last_calib; 2018 2019 if (time_uptime != last_calib && !(time_uptime & 0xf)) { 2020 cpu_tick_calibrate(0); 2021 last_calib = time_uptime; 2022 } 2023 } 2024 2025 /* 2026 * This function gets called every 16 seconds on only one designated 2027 * CPU in the system from hardclock() via cpu_tick_calibration()(). 2028 * 2029 * Whenever the real time clock is stepped we get called with reset=1 2030 * to make sure we handle suspend/resume and similar events correctly. 2031 */ 2032 2033 static void 2034 cpu_tick_calibrate(int reset) 2035 { 2036 static uint64_t c_last; 2037 uint64_t c_this, c_delta; 2038 static struct bintime t_last; 2039 struct bintime t_this, t_delta; 2040 uint32_t divi; 2041 2042 if (reset) { 2043 /* The clock was stepped, abort & reset */ 2044 t_last.sec = 0; 2045 return; 2046 } 2047 2048 /* we don't calibrate fixed rate cputicks */ 2049 if (!cpu_tick_variable) 2050 return; 2051 2052 getbinuptime(&t_this); 2053 c_this = cpu_ticks(); 2054 if (t_last.sec != 0) { 2055 c_delta = c_this - c_last; 2056 t_delta = t_this; 2057 bintime_sub(&t_delta, &t_last); 2058 /* 2059 * Headroom: 2060 * 2^(64-20) / 16[s] = 2061 * 2^(44) / 16[s] = 2062 * 17.592.186.044.416 / 16 = 2063 * 1.099.511.627.776 [Hz] 2064 */ 2065 divi = t_delta.sec << 20; 2066 divi |= t_delta.frac >> (64 - 20); 2067 c_delta <<= 20; 2068 c_delta /= divi; 2069 if (c_delta > cpu_tick_frequency) { 2070 if (0 && bootverbose) 2071 printf("cpu_tick increased to %ju Hz\n", 2072 c_delta); 2073 cpu_tick_frequency = c_delta; 2074 } 2075 } 2076 c_last = c_this; 2077 t_last = t_this; 2078 } 2079 2080 void 2081 set_cputicker(cpu_tick_f *func, uint64_t freq, unsigned var) 2082 { 2083 2084 if (func == NULL) { 2085 cpu_ticks = tc_cpu_ticks; 2086 } else { 2087 cpu_tick_frequency = freq; 2088 cpu_tick_variable = var; 2089 cpu_ticks = func; 2090 } 2091 } 2092 2093 uint64_t 2094 cpu_tickrate(void) 2095 { 2096 2097 if (cpu_ticks == tc_cpu_ticks) 2098 return (tc_getfrequency()); 2099 return (cpu_tick_frequency); 2100 } 2101 2102 /* 2103 * We need to be slightly careful converting cputicks to microseconds. 2104 * There is plenty of margin in 64 bits of microseconds (half a million 2105 * years) and in 64 bits at 4 GHz (146 years), but if we do a multiply 2106 * before divide conversion (to retain precision) we find that the 2107 * margin shrinks to 1.5 hours (one millionth of 146y). 2108 * With a three prong approach we never lose significant bits, no 2109 * matter what the cputick rate and length of timeinterval is. 2110 */ 2111 2112 uint64_t 2113 cputick2usec(uint64_t tick) 2114 { 2115 2116 if (tick > 18446744073709551LL) /* floor(2^64 / 1000) */ 2117 return (tick / (cpu_tickrate() / 1000000LL)); 2118 else if (tick > 18446744073709LL) /* floor(2^64 / 1000000) */ 2119 return ((tick * 1000LL) / (cpu_tickrate() / 1000LL)); 2120 else 2121 return ((tick * 1000000LL) / cpu_tickrate()); 2122 } 2123 2124 cpu_tick_f *cpu_ticks = tc_cpu_ticks; 2125 2126 static int vdso_th_enable = 1; 2127 static int 2128 sysctl_fast_gettime(SYSCTL_HANDLER_ARGS) 2129 { 2130 int old_vdso_th_enable, error; 2131 2132 old_vdso_th_enable = vdso_th_enable; 2133 error = sysctl_handle_int(oidp, &old_vdso_th_enable, 0, req); 2134 if (error != 0) 2135 return (error); 2136 vdso_th_enable = old_vdso_th_enable; 2137 return (0); 2138 } 2139 SYSCTL_PROC(_kern_timecounter, OID_AUTO, fast_gettime, 2140 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 2141 NULL, 0, sysctl_fast_gettime, "I", "Enable fast time of day"); 2142 2143 uint32_t 2144 tc_fill_vdso_timehands(struct vdso_timehands *vdso_th) 2145 { 2146 struct timehands *th; 2147 uint32_t enabled; 2148 2149 th = timehands; 2150 vdso_th->th_scale = th->th_scale; 2151 vdso_th->th_offset_count = th->th_offset_count; 2152 vdso_th->th_counter_mask = th->th_counter->tc_counter_mask; 2153 vdso_th->th_offset = th->th_offset; 2154 vdso_th->th_boottime = th->th_boottime; 2155 if (th->th_counter->tc_fill_vdso_timehands != NULL) { 2156 enabled = th->th_counter->tc_fill_vdso_timehands(vdso_th, 2157 th->th_counter); 2158 } else 2159 enabled = 0; 2160 if (!vdso_th_enable) 2161 enabled = 0; 2162 return (enabled); 2163 } 2164 2165 #ifdef COMPAT_FREEBSD32 2166 uint32_t 2167 tc_fill_vdso_timehands32(struct vdso_timehands32 *vdso_th32) 2168 { 2169 struct timehands *th; 2170 uint32_t enabled; 2171 2172 th = timehands; 2173 *(uint64_t *)&vdso_th32->th_scale[0] = th->th_scale; 2174 vdso_th32->th_offset_count = th->th_offset_count; 2175 vdso_th32->th_counter_mask = th->th_counter->tc_counter_mask; 2176 vdso_th32->th_offset.sec = th->th_offset.sec; 2177 *(uint64_t *)&vdso_th32->th_offset.frac[0] = th->th_offset.frac; 2178 vdso_th32->th_boottime.sec = th->th_boottime.sec; 2179 *(uint64_t *)&vdso_th32->th_boottime.frac[0] = th->th_boottime.frac; 2180 if (th->th_counter->tc_fill_vdso_timehands32 != NULL) { 2181 enabled = th->th_counter->tc_fill_vdso_timehands32(vdso_th32, 2182 th->th_counter); 2183 } else 2184 enabled = 0; 2185 if (!vdso_th_enable) 2186 enabled = 0; 2187 return (enabled); 2188 } 2189 #endif 2190 2191 #include "opt_ddb.h" 2192 #ifdef DDB 2193 #include <ddb/ddb.h> 2194 2195 DB_SHOW_COMMAND(timecounter, db_show_timecounter) 2196 { 2197 struct timehands *th; 2198 struct timecounter *tc; 2199 u_int val1, val2; 2200 2201 th = timehands; 2202 tc = th->th_counter; 2203 val1 = tc->tc_get_timecount(tc); 2204 __compiler_membar(); 2205 val2 = tc->tc_get_timecount(tc); 2206 2207 db_printf("timecounter %p %s\n", tc, tc->tc_name); 2208 db_printf(" mask %#x freq %ju qual %d flags %#x priv %p\n", 2209 tc->tc_counter_mask, (uintmax_t)tc->tc_frequency, tc->tc_quality, 2210 tc->tc_flags, tc->tc_priv); 2211 db_printf(" val %#x %#x\n", val1, val2); 2212 db_printf("timehands adj %#jx scale %#jx ldelta %d off_cnt %d gen %d\n", 2213 (uintmax_t)th->th_adjustment, (uintmax_t)th->th_scale, 2214 th->th_large_delta, th->th_offset_count, th->th_generation); 2215 db_printf(" offset %jd %jd boottime %jd %jd\n", 2216 (intmax_t)th->th_offset.sec, (uintmax_t)th->th_offset.frac, 2217 (intmax_t)th->th_boottime.sec, (uintmax_t)th->th_boottime.frac); 2218 } 2219 #endif 2220