1 /*- 2 * SPDX-License-Identifier: Beerware 3 * 4 * ---------------------------------------------------------------------------- 5 * "THE BEER-WARE LICENSE" (Revision 42): 6 * <phk@FreeBSD.ORG> wrote this file. As long as you retain this notice you 7 * can do whatever you want with this stuff. If we meet some day, and you think 8 * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp 9 * ---------------------------------------------------------------------------- 10 * 11 * Copyright (c) 2011, 2015, 2016 The FreeBSD Foundation 12 * All rights reserved. 13 * 14 * Portions of this software were developed by Julien Ridoux at the University 15 * of Melbourne under sponsorship from the FreeBSD Foundation. 16 * 17 * Portions of this software were developed by Konstantin Belousov 18 * under sponsorship from the FreeBSD Foundation. 19 */ 20 21 #include <sys/cdefs.h> 22 __FBSDID("$FreeBSD$"); 23 24 #include "opt_ntp.h" 25 #include "opt_ffclock.h" 26 27 #include <sys/param.h> 28 #include <sys/kernel.h> 29 #include <sys/limits.h> 30 #include <sys/lock.h> 31 #include <sys/mutex.h> 32 #include <sys/proc.h> 33 #include <sys/sbuf.h> 34 #include <sys/sleepqueue.h> 35 #include <sys/sysctl.h> 36 #include <sys/syslog.h> 37 #include <sys/systm.h> 38 #include <sys/timeffc.h> 39 #include <sys/timepps.h> 40 #include <sys/timetc.h> 41 #include <sys/timex.h> 42 #include <sys/vdso.h> 43 44 /* 45 * A large step happens on boot. This constant detects such steps. 46 * It is relatively small so that ntp_update_second gets called enough 47 * in the typical 'missed a couple of seconds' case, but doesn't loop 48 * forever when the time step is large. 49 */ 50 #define LARGE_STEP 200 51 52 /* 53 * Implement a dummy timecounter which we can use until we get a real one 54 * in the air. This allows the console and other early stuff to use 55 * time services. 56 */ 57 58 static u_int 59 dummy_get_timecount(struct timecounter *tc) 60 { 61 static u_int now; 62 63 return (++now); 64 } 65 66 static struct timecounter dummy_timecounter = { 67 dummy_get_timecount, 0, ~0u, 1000000, "dummy", -1000000 68 }; 69 70 struct timehands { 71 /* These fields must be initialized by the driver. */ 72 struct timecounter *th_counter; 73 int64_t th_adjustment; 74 uint64_t th_scale; 75 u_int th_large_delta; 76 u_int th_offset_count; 77 struct bintime th_offset; 78 struct bintime th_bintime; 79 struct timeval th_microtime; 80 struct timespec th_nanotime; 81 struct bintime th_boottime; 82 /* Fields not to be copied in tc_windup start with th_generation. */ 83 u_int th_generation; 84 struct timehands *th_next; 85 }; 86 87 static struct timehands ths[16] = { 88 [0] = { 89 .th_counter = &dummy_timecounter, 90 .th_scale = (uint64_t)-1 / 1000000, 91 .th_large_delta = 1000000, 92 .th_offset = { .sec = 1 }, 93 .th_generation = 1, 94 }, 95 }; 96 97 static struct timehands *volatile timehands = &ths[0]; 98 struct timecounter *timecounter = &dummy_timecounter; 99 static struct timecounter *timecounters = &dummy_timecounter; 100 101 int tc_min_ticktock_freq = 1; 102 103 volatile time_t time_second = 1; 104 volatile time_t time_uptime = 1; 105 106 static int sysctl_kern_boottime(SYSCTL_HANDLER_ARGS); 107 SYSCTL_PROC(_kern, KERN_BOOTTIME, boottime, 108 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, 109 sysctl_kern_boottime, "S,timeval", 110 "System boottime"); 111 112 SYSCTL_NODE(_kern, OID_AUTO, timecounter, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 113 ""); 114 static SYSCTL_NODE(_kern_timecounter, OID_AUTO, tc, 115 CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 116 ""); 117 118 static int timestepwarnings; 119 SYSCTL_INT(_kern_timecounter, OID_AUTO, stepwarnings, CTLFLAG_RW, 120 ×tepwarnings, 0, "Log time steps"); 121 122 static int timehands_count = 2; 123 SYSCTL_INT(_kern_timecounter, OID_AUTO, timehands_count, 124 CTLFLAG_RDTUN | CTLFLAG_NOFETCH, 125 &timehands_count, 0, "Count of timehands in rotation"); 126 127 struct bintime bt_timethreshold; 128 struct bintime bt_tickthreshold; 129 sbintime_t sbt_timethreshold; 130 sbintime_t sbt_tickthreshold; 131 struct bintime tc_tick_bt; 132 sbintime_t tc_tick_sbt; 133 int tc_precexp; 134 int tc_timepercentage = TC_DEFAULTPERC; 135 static int sysctl_kern_timecounter_adjprecision(SYSCTL_HANDLER_ARGS); 136 SYSCTL_PROC(_kern_timecounter, OID_AUTO, alloweddeviation, 137 CTLTYPE_INT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, 0, 0, 138 sysctl_kern_timecounter_adjprecision, "I", 139 "Allowed time interval deviation in percents"); 140 141 volatile int rtc_generation = 1; 142 143 static int tc_chosen; /* Non-zero if a specific tc was chosen via sysctl. */ 144 static char tc_from_tunable[16]; 145 146 static void tc_windup(struct bintime *new_boottimebin); 147 static void cpu_tick_calibrate(int); 148 149 void dtrace_getnanotime(struct timespec *tsp); 150 void dtrace_getnanouptime(struct timespec *tsp); 151 152 static int 153 sysctl_kern_boottime(SYSCTL_HANDLER_ARGS) 154 { 155 struct timeval boottime; 156 157 getboottime(&boottime); 158 159 /* i386 is the only arch which uses a 32bits time_t */ 160 #ifdef __amd64__ 161 #ifdef SCTL_MASK32 162 int tv[2]; 163 164 if (req->flags & SCTL_MASK32) { 165 tv[0] = boottime.tv_sec; 166 tv[1] = boottime.tv_usec; 167 return (SYSCTL_OUT(req, tv, sizeof(tv))); 168 } 169 #endif 170 #endif 171 return (SYSCTL_OUT(req, &boottime, sizeof(boottime))); 172 } 173 174 static int 175 sysctl_kern_timecounter_get(SYSCTL_HANDLER_ARGS) 176 { 177 u_int ncount; 178 struct timecounter *tc = arg1; 179 180 ncount = tc->tc_get_timecount(tc); 181 return (sysctl_handle_int(oidp, &ncount, 0, req)); 182 } 183 184 static int 185 sysctl_kern_timecounter_freq(SYSCTL_HANDLER_ARGS) 186 { 187 uint64_t freq; 188 struct timecounter *tc = arg1; 189 190 freq = tc->tc_frequency; 191 return (sysctl_handle_64(oidp, &freq, 0, req)); 192 } 193 194 /* 195 * Return the difference between the timehands' counter value now and what 196 * was when we copied it to the timehands' offset_count. 197 */ 198 static __inline u_int 199 tc_delta(struct timehands *th) 200 { 201 struct timecounter *tc; 202 203 tc = th->th_counter; 204 return ((tc->tc_get_timecount(tc) - th->th_offset_count) & 205 tc->tc_counter_mask); 206 } 207 208 /* 209 * Functions for reading the time. We have to loop until we are sure that 210 * the timehands that we operated on was not updated under our feet. See 211 * the comment in <sys/time.h> for a description of these 12 functions. 212 */ 213 214 static __inline void 215 bintime_off(struct bintime *bt, u_int off) 216 { 217 struct timehands *th; 218 struct bintime *btp; 219 uint64_t scale, x; 220 u_int delta, gen, large_delta; 221 222 do { 223 th = timehands; 224 gen = atomic_load_acq_int(&th->th_generation); 225 btp = (struct bintime *)((vm_offset_t)th + off); 226 *bt = *btp; 227 scale = th->th_scale; 228 delta = tc_delta(th); 229 large_delta = th->th_large_delta; 230 atomic_thread_fence_acq(); 231 } while (gen == 0 || gen != th->th_generation); 232 233 if (__predict_false(delta >= large_delta)) { 234 /* Avoid overflow for scale * delta. */ 235 x = (scale >> 32) * delta; 236 bt->sec += x >> 32; 237 bintime_addx(bt, x << 32); 238 bintime_addx(bt, (scale & 0xffffffff) * delta); 239 } else { 240 bintime_addx(bt, scale * delta); 241 } 242 } 243 #define GETTHBINTIME(dst, member) \ 244 do { \ 245 _Static_assert(_Generic(((struct timehands *)NULL)->member, \ 246 struct bintime: 1, default: 0) == 1, \ 247 "struct timehands member is not of struct bintime type"); \ 248 bintime_off(dst, __offsetof(struct timehands, member)); \ 249 } while (0) 250 251 static __inline void 252 getthmember(void *out, size_t out_size, u_int off) 253 { 254 struct timehands *th; 255 u_int gen; 256 257 do { 258 th = timehands; 259 gen = atomic_load_acq_int(&th->th_generation); 260 memcpy(out, (char *)th + off, out_size); 261 atomic_thread_fence_acq(); 262 } while (gen == 0 || gen != th->th_generation); 263 } 264 #define GETTHMEMBER(dst, member) \ 265 do { \ 266 _Static_assert(_Generic(*dst, \ 267 __typeof(((struct timehands *)NULL)->member): 1, \ 268 default: 0) == 1, \ 269 "*dst and struct timehands member have different types"); \ 270 getthmember(dst, sizeof(*dst), __offsetof(struct timehands, \ 271 member)); \ 272 } while (0) 273 274 #ifdef FFCLOCK 275 void 276 fbclock_binuptime(struct bintime *bt) 277 { 278 279 GETTHBINTIME(bt, th_offset); 280 } 281 282 void 283 fbclock_nanouptime(struct timespec *tsp) 284 { 285 struct bintime bt; 286 287 fbclock_binuptime(&bt); 288 bintime2timespec(&bt, tsp); 289 } 290 291 void 292 fbclock_microuptime(struct timeval *tvp) 293 { 294 struct bintime bt; 295 296 fbclock_binuptime(&bt); 297 bintime2timeval(&bt, tvp); 298 } 299 300 void 301 fbclock_bintime(struct bintime *bt) 302 { 303 304 GETTHBINTIME(bt, th_bintime); 305 } 306 307 void 308 fbclock_nanotime(struct timespec *tsp) 309 { 310 struct bintime bt; 311 312 fbclock_bintime(&bt); 313 bintime2timespec(&bt, tsp); 314 } 315 316 void 317 fbclock_microtime(struct timeval *tvp) 318 { 319 struct bintime bt; 320 321 fbclock_bintime(&bt); 322 bintime2timeval(&bt, tvp); 323 } 324 325 void 326 fbclock_getbinuptime(struct bintime *bt) 327 { 328 329 GETTHMEMBER(bt, th_offset); 330 } 331 332 void 333 fbclock_getnanouptime(struct timespec *tsp) 334 { 335 struct bintime bt; 336 337 GETTHMEMBER(&bt, th_offset); 338 bintime2timespec(&bt, tsp); 339 } 340 341 void 342 fbclock_getmicrouptime(struct timeval *tvp) 343 { 344 struct bintime bt; 345 346 GETTHMEMBER(&bt, th_offset); 347 bintime2timeval(&bt, tvp); 348 } 349 350 void 351 fbclock_getbintime(struct bintime *bt) 352 { 353 354 GETTHMEMBER(bt, th_bintime); 355 } 356 357 void 358 fbclock_getnanotime(struct timespec *tsp) 359 { 360 361 GETTHMEMBER(tsp, th_nanotime); 362 } 363 364 void 365 fbclock_getmicrotime(struct timeval *tvp) 366 { 367 368 GETTHMEMBER(tvp, th_microtime); 369 } 370 #else /* !FFCLOCK */ 371 372 void 373 binuptime(struct bintime *bt) 374 { 375 376 GETTHBINTIME(bt, th_offset); 377 } 378 379 void 380 nanouptime(struct timespec *tsp) 381 { 382 struct bintime bt; 383 384 binuptime(&bt); 385 bintime2timespec(&bt, tsp); 386 } 387 388 void 389 microuptime(struct timeval *tvp) 390 { 391 struct bintime bt; 392 393 binuptime(&bt); 394 bintime2timeval(&bt, tvp); 395 } 396 397 void 398 bintime(struct bintime *bt) 399 { 400 401 GETTHBINTIME(bt, th_bintime); 402 } 403 404 void 405 nanotime(struct timespec *tsp) 406 { 407 struct bintime bt; 408 409 bintime(&bt); 410 bintime2timespec(&bt, tsp); 411 } 412 413 void 414 microtime(struct timeval *tvp) 415 { 416 struct bintime bt; 417 418 bintime(&bt); 419 bintime2timeval(&bt, tvp); 420 } 421 422 void 423 getbinuptime(struct bintime *bt) 424 { 425 426 GETTHMEMBER(bt, th_offset); 427 } 428 429 void 430 getnanouptime(struct timespec *tsp) 431 { 432 struct bintime bt; 433 434 GETTHMEMBER(&bt, th_offset); 435 bintime2timespec(&bt, tsp); 436 } 437 438 void 439 getmicrouptime(struct timeval *tvp) 440 { 441 struct bintime bt; 442 443 GETTHMEMBER(&bt, th_offset); 444 bintime2timeval(&bt, tvp); 445 } 446 447 void 448 getbintime(struct bintime *bt) 449 { 450 451 GETTHMEMBER(bt, th_bintime); 452 } 453 454 void 455 getnanotime(struct timespec *tsp) 456 { 457 458 GETTHMEMBER(tsp, th_nanotime); 459 } 460 461 void 462 getmicrotime(struct timeval *tvp) 463 { 464 465 GETTHMEMBER(tvp, th_microtime); 466 } 467 #endif /* FFCLOCK */ 468 469 void 470 getboottime(struct timeval *boottime) 471 { 472 struct bintime boottimebin; 473 474 getboottimebin(&boottimebin); 475 bintime2timeval(&boottimebin, boottime); 476 } 477 478 void 479 getboottimebin(struct bintime *boottimebin) 480 { 481 482 GETTHMEMBER(boottimebin, th_boottime); 483 } 484 485 #ifdef FFCLOCK 486 /* 487 * Support for feed-forward synchronization algorithms. This is heavily inspired 488 * by the timehands mechanism but kept independent from it. *_windup() functions 489 * have some connection to avoid accessing the timecounter hardware more than 490 * necessary. 491 */ 492 493 /* Feed-forward clock estimates kept updated by the synchronization daemon. */ 494 struct ffclock_estimate ffclock_estimate; 495 struct bintime ffclock_boottime; /* Feed-forward boot time estimate. */ 496 uint32_t ffclock_status; /* Feed-forward clock status. */ 497 int8_t ffclock_updated; /* New estimates are available. */ 498 struct mtx ffclock_mtx; /* Mutex on ffclock_estimate. */ 499 500 struct fftimehands { 501 struct ffclock_estimate cest; 502 struct bintime tick_time; 503 struct bintime tick_time_lerp; 504 ffcounter tick_ffcount; 505 uint64_t period_lerp; 506 volatile uint8_t gen; 507 struct fftimehands *next; 508 }; 509 510 #define NUM_ELEMENTS(x) (sizeof(x) / sizeof(*x)) 511 512 static struct fftimehands ffth[10]; 513 static struct fftimehands *volatile fftimehands = ffth; 514 515 static void 516 ffclock_init(void) 517 { 518 struct fftimehands *cur; 519 struct fftimehands *last; 520 521 memset(ffth, 0, sizeof(ffth)); 522 523 last = ffth + NUM_ELEMENTS(ffth) - 1; 524 for (cur = ffth; cur < last; cur++) 525 cur->next = cur + 1; 526 last->next = ffth; 527 528 ffclock_updated = 0; 529 ffclock_status = FFCLOCK_STA_UNSYNC; 530 mtx_init(&ffclock_mtx, "ffclock lock", NULL, MTX_DEF); 531 } 532 533 /* 534 * Reset the feed-forward clock estimates. Called from inittodr() to get things 535 * kick started and uses the timecounter nominal frequency as a first period 536 * estimate. Note: this function may be called several time just after boot. 537 * Note: this is the only function that sets the value of boot time for the 538 * monotonic (i.e. uptime) version of the feed-forward clock. 539 */ 540 void 541 ffclock_reset_clock(struct timespec *ts) 542 { 543 struct timecounter *tc; 544 struct ffclock_estimate cest; 545 546 tc = timehands->th_counter; 547 memset(&cest, 0, sizeof(struct ffclock_estimate)); 548 549 timespec2bintime(ts, &ffclock_boottime); 550 timespec2bintime(ts, &(cest.update_time)); 551 ffclock_read_counter(&cest.update_ffcount); 552 cest.leapsec_next = 0; 553 cest.period = ((1ULL << 63) / tc->tc_frequency) << 1; 554 cest.errb_abs = 0; 555 cest.errb_rate = 0; 556 cest.status = FFCLOCK_STA_UNSYNC; 557 cest.leapsec_total = 0; 558 cest.leapsec = 0; 559 560 mtx_lock(&ffclock_mtx); 561 bcopy(&cest, &ffclock_estimate, sizeof(struct ffclock_estimate)); 562 ffclock_updated = INT8_MAX; 563 mtx_unlock(&ffclock_mtx); 564 565 printf("ffclock reset: %s (%llu Hz), time = %ld.%09lu\n", tc->tc_name, 566 (unsigned long long)tc->tc_frequency, (long)ts->tv_sec, 567 (unsigned long)ts->tv_nsec); 568 } 569 570 /* 571 * Sub-routine to convert a time interval measured in RAW counter units to time 572 * in seconds stored in bintime format. 573 * NOTE: bintime_mul requires u_int, but the value of the ffcounter may be 574 * larger than the max value of u_int (on 32 bit architecture). Loop to consume 575 * extra cycles. 576 */ 577 static void 578 ffclock_convert_delta(ffcounter ffdelta, uint64_t period, struct bintime *bt) 579 { 580 struct bintime bt2; 581 ffcounter delta, delta_max; 582 583 delta_max = (1ULL << (8 * sizeof(unsigned int))) - 1; 584 bintime_clear(bt); 585 do { 586 if (ffdelta > delta_max) 587 delta = delta_max; 588 else 589 delta = ffdelta; 590 bt2.sec = 0; 591 bt2.frac = period; 592 bintime_mul(&bt2, (unsigned int)delta); 593 bintime_add(bt, &bt2); 594 ffdelta -= delta; 595 } while (ffdelta > 0); 596 } 597 598 /* 599 * Update the fftimehands. 600 * Push the tick ffcount and time(s) forward based on current clock estimate. 601 * The conversion from ffcounter to bintime relies on the difference clock 602 * principle, whose accuracy relies on computing small time intervals. If a new 603 * clock estimate has been passed by the synchronisation daemon, make it 604 * current, and compute the linear interpolation for monotonic time if needed. 605 */ 606 static void 607 ffclock_windup(unsigned int delta) 608 { 609 struct ffclock_estimate *cest; 610 struct fftimehands *ffth; 611 struct bintime bt, gap_lerp; 612 ffcounter ffdelta; 613 uint64_t frac; 614 unsigned int polling; 615 uint8_t forward_jump, ogen; 616 617 /* 618 * Pick the next timehand, copy current ffclock estimates and move tick 619 * times and counter forward. 620 */ 621 forward_jump = 0; 622 ffth = fftimehands->next; 623 ogen = ffth->gen; 624 ffth->gen = 0; 625 cest = &ffth->cest; 626 bcopy(&fftimehands->cest, cest, sizeof(struct ffclock_estimate)); 627 ffdelta = (ffcounter)delta; 628 ffth->period_lerp = fftimehands->period_lerp; 629 630 ffth->tick_time = fftimehands->tick_time; 631 ffclock_convert_delta(ffdelta, cest->period, &bt); 632 bintime_add(&ffth->tick_time, &bt); 633 634 ffth->tick_time_lerp = fftimehands->tick_time_lerp; 635 ffclock_convert_delta(ffdelta, ffth->period_lerp, &bt); 636 bintime_add(&ffth->tick_time_lerp, &bt); 637 638 ffth->tick_ffcount = fftimehands->tick_ffcount + ffdelta; 639 640 /* 641 * Assess the status of the clock, if the last update is too old, it is 642 * likely the synchronisation daemon is dead and the clock is free 643 * running. 644 */ 645 if (ffclock_updated == 0) { 646 ffdelta = ffth->tick_ffcount - cest->update_ffcount; 647 ffclock_convert_delta(ffdelta, cest->period, &bt); 648 if (bt.sec > 2 * FFCLOCK_SKM_SCALE) 649 ffclock_status |= FFCLOCK_STA_UNSYNC; 650 } 651 652 /* 653 * If available, grab updated clock estimates and make them current. 654 * Recompute time at this tick using the updated estimates. The clock 655 * estimates passed the feed-forward synchronisation daemon may result 656 * in time conversion that is not monotonically increasing (just after 657 * the update). time_lerp is a particular linear interpolation over the 658 * synchronisation algo polling period that ensures monotonicity for the 659 * clock ids requesting it. 660 */ 661 if (ffclock_updated > 0) { 662 bcopy(&ffclock_estimate, cest, sizeof(struct ffclock_estimate)); 663 ffdelta = ffth->tick_ffcount - cest->update_ffcount; 664 ffth->tick_time = cest->update_time; 665 ffclock_convert_delta(ffdelta, cest->period, &bt); 666 bintime_add(&ffth->tick_time, &bt); 667 668 /* ffclock_reset sets ffclock_updated to INT8_MAX */ 669 if (ffclock_updated == INT8_MAX) 670 ffth->tick_time_lerp = ffth->tick_time; 671 672 if (bintime_cmp(&ffth->tick_time, &ffth->tick_time_lerp, >)) 673 forward_jump = 1; 674 else 675 forward_jump = 0; 676 677 bintime_clear(&gap_lerp); 678 if (forward_jump) { 679 gap_lerp = ffth->tick_time; 680 bintime_sub(&gap_lerp, &ffth->tick_time_lerp); 681 } else { 682 gap_lerp = ffth->tick_time_lerp; 683 bintime_sub(&gap_lerp, &ffth->tick_time); 684 } 685 686 /* 687 * The reset from the RTC clock may be far from accurate, and 688 * reducing the gap between real time and interpolated time 689 * could take a very long time if the interpolated clock insists 690 * on strict monotonicity. The clock is reset under very strict 691 * conditions (kernel time is known to be wrong and 692 * synchronization daemon has been restarted recently. 693 * ffclock_boottime absorbs the jump to ensure boot time is 694 * correct and uptime functions stay consistent. 695 */ 696 if (((ffclock_status & FFCLOCK_STA_UNSYNC) == FFCLOCK_STA_UNSYNC) && 697 ((cest->status & FFCLOCK_STA_UNSYNC) == 0) && 698 ((cest->status & FFCLOCK_STA_WARMUP) == FFCLOCK_STA_WARMUP)) { 699 if (forward_jump) 700 bintime_add(&ffclock_boottime, &gap_lerp); 701 else 702 bintime_sub(&ffclock_boottime, &gap_lerp); 703 ffth->tick_time_lerp = ffth->tick_time; 704 bintime_clear(&gap_lerp); 705 } 706 707 ffclock_status = cest->status; 708 ffth->period_lerp = cest->period; 709 710 /* 711 * Compute corrected period used for the linear interpolation of 712 * time. The rate of linear interpolation is capped to 5000PPM 713 * (5ms/s). 714 */ 715 if (bintime_isset(&gap_lerp)) { 716 ffdelta = cest->update_ffcount; 717 ffdelta -= fftimehands->cest.update_ffcount; 718 ffclock_convert_delta(ffdelta, cest->period, &bt); 719 polling = bt.sec; 720 bt.sec = 0; 721 bt.frac = 5000000 * (uint64_t)18446744073LL; 722 bintime_mul(&bt, polling); 723 if (bintime_cmp(&gap_lerp, &bt, >)) 724 gap_lerp = bt; 725 726 /* Approximate 1 sec by 1-(1/2^64) to ease arithmetic */ 727 frac = 0; 728 if (gap_lerp.sec > 0) { 729 frac -= 1; 730 frac /= ffdelta / gap_lerp.sec; 731 } 732 frac += gap_lerp.frac / ffdelta; 733 734 if (forward_jump) 735 ffth->period_lerp += frac; 736 else 737 ffth->period_lerp -= frac; 738 } 739 740 ffclock_updated = 0; 741 } 742 if (++ogen == 0) 743 ogen = 1; 744 ffth->gen = ogen; 745 fftimehands = ffth; 746 } 747 748 /* 749 * Adjust the fftimehands when the timecounter is changed. Stating the obvious, 750 * the old and new hardware counter cannot be read simultaneously. tc_windup() 751 * does read the two counters 'back to back', but a few cycles are effectively 752 * lost, and not accumulated in tick_ffcount. This is a fairly radical 753 * operation for a feed-forward synchronization daemon, and it is its job to not 754 * pushing irrelevant data to the kernel. Because there is no locking here, 755 * simply force to ignore pending or next update to give daemon a chance to 756 * realize the counter has changed. 757 */ 758 static void 759 ffclock_change_tc(struct timehands *th) 760 { 761 struct fftimehands *ffth; 762 struct ffclock_estimate *cest; 763 struct timecounter *tc; 764 uint8_t ogen; 765 766 tc = th->th_counter; 767 ffth = fftimehands->next; 768 ogen = ffth->gen; 769 ffth->gen = 0; 770 771 cest = &ffth->cest; 772 bcopy(&(fftimehands->cest), cest, sizeof(struct ffclock_estimate)); 773 cest->period = ((1ULL << 63) / tc->tc_frequency ) << 1; 774 cest->errb_abs = 0; 775 cest->errb_rate = 0; 776 cest->status |= FFCLOCK_STA_UNSYNC; 777 778 ffth->tick_ffcount = fftimehands->tick_ffcount; 779 ffth->tick_time_lerp = fftimehands->tick_time_lerp; 780 ffth->tick_time = fftimehands->tick_time; 781 ffth->period_lerp = cest->period; 782 783 /* Do not lock but ignore next update from synchronization daemon. */ 784 ffclock_updated--; 785 786 if (++ogen == 0) 787 ogen = 1; 788 ffth->gen = ogen; 789 fftimehands = ffth; 790 } 791 792 /* 793 * Retrieve feed-forward counter and time of last kernel tick. 794 */ 795 void 796 ffclock_last_tick(ffcounter *ffcount, struct bintime *bt, uint32_t flags) 797 { 798 struct fftimehands *ffth; 799 uint8_t gen; 800 801 /* 802 * No locking but check generation has not changed. Also need to make 803 * sure ffdelta is positive, i.e. ffcount > tick_ffcount. 804 */ 805 do { 806 ffth = fftimehands; 807 gen = ffth->gen; 808 if ((flags & FFCLOCK_LERP) == FFCLOCK_LERP) 809 *bt = ffth->tick_time_lerp; 810 else 811 *bt = ffth->tick_time; 812 *ffcount = ffth->tick_ffcount; 813 } while (gen == 0 || gen != ffth->gen); 814 } 815 816 /* 817 * Absolute clock conversion. Low level function to convert ffcounter to 818 * bintime. The ffcounter is converted using the current ffclock period estimate 819 * or the "interpolated period" to ensure monotonicity. 820 * NOTE: this conversion may have been deferred, and the clock updated since the 821 * hardware counter has been read. 822 */ 823 void 824 ffclock_convert_abs(ffcounter ffcount, struct bintime *bt, uint32_t flags) 825 { 826 struct fftimehands *ffth; 827 struct bintime bt2; 828 ffcounter ffdelta; 829 uint8_t gen; 830 831 /* 832 * No locking but check generation has not changed. Also need to make 833 * sure ffdelta is positive, i.e. ffcount > tick_ffcount. 834 */ 835 do { 836 ffth = fftimehands; 837 gen = ffth->gen; 838 if (ffcount > ffth->tick_ffcount) 839 ffdelta = ffcount - ffth->tick_ffcount; 840 else 841 ffdelta = ffth->tick_ffcount - ffcount; 842 843 if ((flags & FFCLOCK_LERP) == FFCLOCK_LERP) { 844 *bt = ffth->tick_time_lerp; 845 ffclock_convert_delta(ffdelta, ffth->period_lerp, &bt2); 846 } else { 847 *bt = ffth->tick_time; 848 ffclock_convert_delta(ffdelta, ffth->cest.period, &bt2); 849 } 850 851 if (ffcount > ffth->tick_ffcount) 852 bintime_add(bt, &bt2); 853 else 854 bintime_sub(bt, &bt2); 855 } while (gen == 0 || gen != ffth->gen); 856 } 857 858 /* 859 * Difference clock conversion. 860 * Low level function to Convert a time interval measured in RAW counter units 861 * into bintime. The difference clock allows measuring small intervals much more 862 * reliably than the absolute clock. 863 */ 864 void 865 ffclock_convert_diff(ffcounter ffdelta, struct bintime *bt) 866 { 867 struct fftimehands *ffth; 868 uint8_t gen; 869 870 /* No locking but check generation has not changed. */ 871 do { 872 ffth = fftimehands; 873 gen = ffth->gen; 874 ffclock_convert_delta(ffdelta, ffth->cest.period, bt); 875 } while (gen == 0 || gen != ffth->gen); 876 } 877 878 /* 879 * Access to current ffcounter value. 880 */ 881 void 882 ffclock_read_counter(ffcounter *ffcount) 883 { 884 struct timehands *th; 885 struct fftimehands *ffth; 886 unsigned int gen, delta; 887 888 /* 889 * ffclock_windup() called from tc_windup(), safe to rely on 890 * th->th_generation only, for correct delta and ffcounter. 891 */ 892 do { 893 th = timehands; 894 gen = atomic_load_acq_int(&th->th_generation); 895 ffth = fftimehands; 896 delta = tc_delta(th); 897 *ffcount = ffth->tick_ffcount; 898 atomic_thread_fence_acq(); 899 } while (gen == 0 || gen != th->th_generation); 900 901 *ffcount += delta; 902 } 903 904 void 905 binuptime(struct bintime *bt) 906 { 907 908 binuptime_fromclock(bt, sysclock_active); 909 } 910 911 void 912 nanouptime(struct timespec *tsp) 913 { 914 915 nanouptime_fromclock(tsp, sysclock_active); 916 } 917 918 void 919 microuptime(struct timeval *tvp) 920 { 921 922 microuptime_fromclock(tvp, sysclock_active); 923 } 924 925 void 926 bintime(struct bintime *bt) 927 { 928 929 bintime_fromclock(bt, sysclock_active); 930 } 931 932 void 933 nanotime(struct timespec *tsp) 934 { 935 936 nanotime_fromclock(tsp, sysclock_active); 937 } 938 939 void 940 microtime(struct timeval *tvp) 941 { 942 943 microtime_fromclock(tvp, sysclock_active); 944 } 945 946 void 947 getbinuptime(struct bintime *bt) 948 { 949 950 getbinuptime_fromclock(bt, sysclock_active); 951 } 952 953 void 954 getnanouptime(struct timespec *tsp) 955 { 956 957 getnanouptime_fromclock(tsp, sysclock_active); 958 } 959 960 void 961 getmicrouptime(struct timeval *tvp) 962 { 963 964 getmicrouptime_fromclock(tvp, sysclock_active); 965 } 966 967 void 968 getbintime(struct bintime *bt) 969 { 970 971 getbintime_fromclock(bt, sysclock_active); 972 } 973 974 void 975 getnanotime(struct timespec *tsp) 976 { 977 978 getnanotime_fromclock(tsp, sysclock_active); 979 } 980 981 void 982 getmicrotime(struct timeval *tvp) 983 { 984 985 getmicrouptime_fromclock(tvp, sysclock_active); 986 } 987 988 #endif /* FFCLOCK */ 989 990 /* 991 * This is a clone of getnanotime and used for walltimestamps. 992 * The dtrace_ prefix prevents fbt from creating probes for 993 * it so walltimestamp can be safely used in all fbt probes. 994 */ 995 void 996 dtrace_getnanotime(struct timespec *tsp) 997 { 998 999 GETTHMEMBER(tsp, th_nanotime); 1000 } 1001 1002 /* 1003 * This is a clone of getnanouptime used for time since boot. 1004 * The dtrace_ prefix prevents fbt from creating probes for 1005 * it so an uptime that can be safely used in all fbt probes. 1006 */ 1007 void 1008 dtrace_getnanouptime(struct timespec *tsp) 1009 { 1010 struct bintime bt; 1011 1012 GETTHMEMBER(&bt, th_offset); 1013 bintime2timespec(&bt, tsp); 1014 } 1015 1016 /* 1017 * System clock currently providing time to the system. Modifiable via sysctl 1018 * when the FFCLOCK option is defined. 1019 */ 1020 int sysclock_active = SYSCLOCK_FBCK; 1021 1022 /* Internal NTP status and error estimates. */ 1023 extern int time_status; 1024 extern long time_esterror; 1025 1026 /* 1027 * Take a snapshot of sysclock data which can be used to compare system clocks 1028 * and generate timestamps after the fact. 1029 */ 1030 void 1031 sysclock_getsnapshot(struct sysclock_snap *clock_snap, int fast) 1032 { 1033 struct fbclock_info *fbi; 1034 struct timehands *th; 1035 struct bintime bt; 1036 unsigned int delta, gen; 1037 #ifdef FFCLOCK 1038 ffcounter ffcount; 1039 struct fftimehands *ffth; 1040 struct ffclock_info *ffi; 1041 struct ffclock_estimate cest; 1042 1043 ffi = &clock_snap->ff_info; 1044 #endif 1045 1046 fbi = &clock_snap->fb_info; 1047 delta = 0; 1048 1049 do { 1050 th = timehands; 1051 gen = atomic_load_acq_int(&th->th_generation); 1052 fbi->th_scale = th->th_scale; 1053 fbi->tick_time = th->th_offset; 1054 #ifdef FFCLOCK 1055 ffth = fftimehands; 1056 ffi->tick_time = ffth->tick_time_lerp; 1057 ffi->tick_time_lerp = ffth->tick_time_lerp; 1058 ffi->period = ffth->cest.period; 1059 ffi->period_lerp = ffth->period_lerp; 1060 clock_snap->ffcount = ffth->tick_ffcount; 1061 cest = ffth->cest; 1062 #endif 1063 if (!fast) 1064 delta = tc_delta(th); 1065 atomic_thread_fence_acq(); 1066 } while (gen == 0 || gen != th->th_generation); 1067 1068 clock_snap->delta = delta; 1069 clock_snap->sysclock_active = sysclock_active; 1070 1071 /* Record feedback clock status and error. */ 1072 clock_snap->fb_info.status = time_status; 1073 /* XXX: Very crude estimate of feedback clock error. */ 1074 bt.sec = time_esterror / 1000000; 1075 bt.frac = ((time_esterror - bt.sec) * 1000000) * 1076 (uint64_t)18446744073709ULL; 1077 clock_snap->fb_info.error = bt; 1078 1079 #ifdef FFCLOCK 1080 if (!fast) 1081 clock_snap->ffcount += delta; 1082 1083 /* Record feed-forward clock leap second adjustment. */ 1084 ffi->leapsec_adjustment = cest.leapsec_total; 1085 if (clock_snap->ffcount > cest.leapsec_next) 1086 ffi->leapsec_adjustment -= cest.leapsec; 1087 1088 /* Record feed-forward clock status and error. */ 1089 clock_snap->ff_info.status = cest.status; 1090 ffcount = clock_snap->ffcount - cest.update_ffcount; 1091 ffclock_convert_delta(ffcount, cest.period, &bt); 1092 /* 18446744073709 = int(2^64/1e12), err_bound_rate in [ps/s]. */ 1093 bintime_mul(&bt, cest.errb_rate * (uint64_t)18446744073709ULL); 1094 /* 18446744073 = int(2^64 / 1e9), since err_abs in [ns]. */ 1095 bintime_addx(&bt, cest.errb_abs * (uint64_t)18446744073ULL); 1096 clock_snap->ff_info.error = bt; 1097 #endif 1098 } 1099 1100 /* 1101 * Convert a sysclock snapshot into a struct bintime based on the specified 1102 * clock source and flags. 1103 */ 1104 int 1105 sysclock_snap2bintime(struct sysclock_snap *cs, struct bintime *bt, 1106 int whichclock, uint32_t flags) 1107 { 1108 struct bintime boottimebin; 1109 #ifdef FFCLOCK 1110 struct bintime bt2; 1111 uint64_t period; 1112 #endif 1113 1114 switch (whichclock) { 1115 case SYSCLOCK_FBCK: 1116 *bt = cs->fb_info.tick_time; 1117 1118 /* If snapshot was created with !fast, delta will be >0. */ 1119 if (cs->delta > 0) 1120 bintime_addx(bt, cs->fb_info.th_scale * cs->delta); 1121 1122 if ((flags & FBCLOCK_UPTIME) == 0) { 1123 getboottimebin(&boottimebin); 1124 bintime_add(bt, &boottimebin); 1125 } 1126 break; 1127 #ifdef FFCLOCK 1128 case SYSCLOCK_FFWD: 1129 if (flags & FFCLOCK_LERP) { 1130 *bt = cs->ff_info.tick_time_lerp; 1131 period = cs->ff_info.period_lerp; 1132 } else { 1133 *bt = cs->ff_info.tick_time; 1134 period = cs->ff_info.period; 1135 } 1136 1137 /* If snapshot was created with !fast, delta will be >0. */ 1138 if (cs->delta > 0) { 1139 ffclock_convert_delta(cs->delta, period, &bt2); 1140 bintime_add(bt, &bt2); 1141 } 1142 1143 /* Leap second adjustment. */ 1144 if (flags & FFCLOCK_LEAPSEC) 1145 bt->sec -= cs->ff_info.leapsec_adjustment; 1146 1147 /* Boot time adjustment, for uptime/monotonic clocks. */ 1148 if (flags & FFCLOCK_UPTIME) 1149 bintime_sub(bt, &ffclock_boottime); 1150 break; 1151 #endif 1152 default: 1153 return (EINVAL); 1154 break; 1155 } 1156 1157 return (0); 1158 } 1159 1160 /* 1161 * Initialize a new timecounter and possibly use it. 1162 */ 1163 void 1164 tc_init(struct timecounter *tc) 1165 { 1166 u_int u; 1167 struct sysctl_oid *tc_root; 1168 1169 u = tc->tc_frequency / tc->tc_counter_mask; 1170 /* XXX: We need some margin here, 10% is a guess */ 1171 u *= 11; 1172 u /= 10; 1173 if (u > hz && tc->tc_quality >= 0) { 1174 tc->tc_quality = -2000; 1175 if (bootverbose) { 1176 printf("Timecounter \"%s\" frequency %ju Hz", 1177 tc->tc_name, (uintmax_t)tc->tc_frequency); 1178 printf(" -- Insufficient hz, needs at least %u\n", u); 1179 } 1180 } else if (tc->tc_quality >= 0 || bootverbose) { 1181 printf("Timecounter \"%s\" frequency %ju Hz quality %d\n", 1182 tc->tc_name, (uintmax_t)tc->tc_frequency, 1183 tc->tc_quality); 1184 } 1185 1186 tc->tc_next = timecounters; 1187 timecounters = tc; 1188 /* 1189 * Set up sysctl tree for this counter. 1190 */ 1191 tc_root = SYSCTL_ADD_NODE_WITH_LABEL(NULL, 1192 SYSCTL_STATIC_CHILDREN(_kern_timecounter_tc), OID_AUTO, tc->tc_name, 1193 CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 1194 "timecounter description", "timecounter"); 1195 SYSCTL_ADD_UINT(NULL, SYSCTL_CHILDREN(tc_root), OID_AUTO, 1196 "mask", CTLFLAG_RD, &(tc->tc_counter_mask), 0, 1197 "mask for implemented bits"); 1198 SYSCTL_ADD_PROC(NULL, SYSCTL_CHILDREN(tc_root), OID_AUTO, 1199 "counter", CTLTYPE_UINT | CTLFLAG_RD | CTLFLAG_MPSAFE, tc, 1200 sizeof(*tc), sysctl_kern_timecounter_get, "IU", 1201 "current timecounter value"); 1202 SYSCTL_ADD_PROC(NULL, SYSCTL_CHILDREN(tc_root), OID_AUTO, 1203 "frequency", CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_MPSAFE, tc, 1204 sizeof(*tc), sysctl_kern_timecounter_freq, "QU", 1205 "timecounter frequency"); 1206 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(tc_root), OID_AUTO, 1207 "quality", CTLFLAG_RD, &(tc->tc_quality), 0, 1208 "goodness of time counter"); 1209 /* 1210 * Do not automatically switch if the current tc was specifically 1211 * chosen. Never automatically use a timecounter with negative quality. 1212 * Even though we run on the dummy counter, switching here may be 1213 * worse since this timecounter may not be monotonic. 1214 */ 1215 if (tc_chosen) 1216 return; 1217 if (tc->tc_quality < 0) 1218 return; 1219 if (tc_from_tunable[0] != '\0' && 1220 strcmp(tc->tc_name, tc_from_tunable) == 0) { 1221 tc_chosen = 1; 1222 tc_from_tunable[0] = '\0'; 1223 } else { 1224 if (tc->tc_quality < timecounter->tc_quality) 1225 return; 1226 if (tc->tc_quality == timecounter->tc_quality && 1227 tc->tc_frequency < timecounter->tc_frequency) 1228 return; 1229 } 1230 (void)tc->tc_get_timecount(tc); 1231 timecounter = tc; 1232 } 1233 1234 /* Report the frequency of the current timecounter. */ 1235 uint64_t 1236 tc_getfrequency(void) 1237 { 1238 1239 return (timehands->th_counter->tc_frequency); 1240 } 1241 1242 static bool 1243 sleeping_on_old_rtc(struct thread *td) 1244 { 1245 1246 /* 1247 * td_rtcgen is modified by curthread when it is running, 1248 * and by other threads in this function. By finding the thread 1249 * on a sleepqueue and holding the lock on the sleepqueue 1250 * chain, we guarantee that the thread is not running and that 1251 * modifying td_rtcgen is safe. Setting td_rtcgen to zero informs 1252 * the thread that it was woken due to a real-time clock adjustment. 1253 * (The declaration of td_rtcgen refers to this comment.) 1254 */ 1255 if (td->td_rtcgen != 0 && td->td_rtcgen != rtc_generation) { 1256 td->td_rtcgen = 0; 1257 return (true); 1258 } 1259 return (false); 1260 } 1261 1262 static struct mtx tc_setclock_mtx; 1263 MTX_SYSINIT(tc_setclock_init, &tc_setclock_mtx, "tcsetc", MTX_SPIN); 1264 1265 /* 1266 * Step our concept of UTC. This is done by modifying our estimate of 1267 * when we booted. 1268 */ 1269 void 1270 tc_setclock(struct timespec *ts) 1271 { 1272 struct timespec tbef, taft; 1273 struct bintime bt, bt2; 1274 1275 timespec2bintime(ts, &bt); 1276 nanotime(&tbef); 1277 mtx_lock_spin(&tc_setclock_mtx); 1278 cpu_tick_calibrate(1); 1279 binuptime(&bt2); 1280 bintime_sub(&bt, &bt2); 1281 1282 /* XXX fiddle all the little crinkly bits around the fiords... */ 1283 tc_windup(&bt); 1284 mtx_unlock_spin(&tc_setclock_mtx); 1285 1286 /* Avoid rtc_generation == 0, since td_rtcgen == 0 is special. */ 1287 atomic_add_rel_int(&rtc_generation, 2); 1288 sleepq_chains_remove_matching(sleeping_on_old_rtc); 1289 if (timestepwarnings) { 1290 nanotime(&taft); 1291 log(LOG_INFO, 1292 "Time stepped from %jd.%09ld to %jd.%09ld (%jd.%09ld)\n", 1293 (intmax_t)tbef.tv_sec, tbef.tv_nsec, 1294 (intmax_t)taft.tv_sec, taft.tv_nsec, 1295 (intmax_t)ts->tv_sec, ts->tv_nsec); 1296 } 1297 } 1298 1299 /* 1300 * Initialize the next struct timehands in the ring and make 1301 * it the active timehands. Along the way we might switch to a different 1302 * timecounter and/or do seconds processing in NTP. Slightly magic. 1303 */ 1304 static void 1305 tc_windup(struct bintime *new_boottimebin) 1306 { 1307 struct bintime bt; 1308 struct timehands *th, *tho; 1309 uint64_t scale; 1310 u_int delta, ncount, ogen; 1311 int i; 1312 time_t t; 1313 1314 /* 1315 * Make the next timehands a copy of the current one, but do 1316 * not overwrite the generation or next pointer. While we 1317 * update the contents, the generation must be zero. We need 1318 * to ensure that the zero generation is visible before the 1319 * data updates become visible, which requires release fence. 1320 * For similar reasons, re-reading of the generation after the 1321 * data is read should use acquire fence. 1322 */ 1323 tho = timehands; 1324 th = tho->th_next; 1325 ogen = th->th_generation; 1326 th->th_generation = 0; 1327 atomic_thread_fence_rel(); 1328 memcpy(th, tho, offsetof(struct timehands, th_generation)); 1329 if (new_boottimebin != NULL) 1330 th->th_boottime = *new_boottimebin; 1331 1332 /* 1333 * Capture a timecounter delta on the current timecounter and if 1334 * changing timecounters, a counter value from the new timecounter. 1335 * Update the offset fields accordingly. 1336 */ 1337 delta = tc_delta(th); 1338 if (th->th_counter != timecounter) 1339 ncount = timecounter->tc_get_timecount(timecounter); 1340 else 1341 ncount = 0; 1342 #ifdef FFCLOCK 1343 ffclock_windup(delta); 1344 #endif 1345 th->th_offset_count += delta; 1346 th->th_offset_count &= th->th_counter->tc_counter_mask; 1347 while (delta > th->th_counter->tc_frequency) { 1348 /* Eat complete unadjusted seconds. */ 1349 delta -= th->th_counter->tc_frequency; 1350 th->th_offset.sec++; 1351 } 1352 if ((delta > th->th_counter->tc_frequency / 2) && 1353 (th->th_scale * delta < ((uint64_t)1 << 63))) { 1354 /* The product th_scale * delta just barely overflows. */ 1355 th->th_offset.sec++; 1356 } 1357 bintime_addx(&th->th_offset, th->th_scale * delta); 1358 1359 /* 1360 * Hardware latching timecounters may not generate interrupts on 1361 * PPS events, so instead we poll them. There is a finite risk that 1362 * the hardware might capture a count which is later than the one we 1363 * got above, and therefore possibly in the next NTP second which might 1364 * have a different rate than the current NTP second. It doesn't 1365 * matter in practice. 1366 */ 1367 if (tho->th_counter->tc_poll_pps) 1368 tho->th_counter->tc_poll_pps(tho->th_counter); 1369 1370 /* 1371 * Deal with NTP second processing. The for loop normally 1372 * iterates at most once, but in extreme situations it might 1373 * keep NTP sane if timeouts are not run for several seconds. 1374 * At boot, the time step can be large when the TOD hardware 1375 * has been read, so on really large steps, we call 1376 * ntp_update_second only twice. We need to call it twice in 1377 * case we missed a leap second. 1378 */ 1379 bt = th->th_offset; 1380 bintime_add(&bt, &th->th_boottime); 1381 i = bt.sec - tho->th_microtime.tv_sec; 1382 if (i > LARGE_STEP) 1383 i = 2; 1384 for (; i > 0; i--) { 1385 t = bt.sec; 1386 ntp_update_second(&th->th_adjustment, &bt.sec); 1387 if (bt.sec != t) 1388 th->th_boottime.sec += bt.sec - t; 1389 } 1390 /* Update the UTC timestamps used by the get*() functions. */ 1391 th->th_bintime = bt; 1392 bintime2timeval(&bt, &th->th_microtime); 1393 bintime2timespec(&bt, &th->th_nanotime); 1394 1395 /* Now is a good time to change timecounters. */ 1396 if (th->th_counter != timecounter) { 1397 #ifndef __arm__ 1398 if ((timecounter->tc_flags & TC_FLAGS_C2STOP) != 0) 1399 cpu_disable_c2_sleep++; 1400 if ((th->th_counter->tc_flags & TC_FLAGS_C2STOP) != 0) 1401 cpu_disable_c2_sleep--; 1402 #endif 1403 th->th_counter = timecounter; 1404 th->th_offset_count = ncount; 1405 tc_min_ticktock_freq = max(1, timecounter->tc_frequency / 1406 (((uint64_t)timecounter->tc_counter_mask + 1) / 3)); 1407 #ifdef FFCLOCK 1408 ffclock_change_tc(th); 1409 #endif 1410 } 1411 1412 /*- 1413 * Recalculate the scaling factor. We want the number of 1/2^64 1414 * fractions of a second per period of the hardware counter, taking 1415 * into account the th_adjustment factor which the NTP PLL/adjtime(2) 1416 * processing provides us with. 1417 * 1418 * The th_adjustment is nanoseconds per second with 32 bit binary 1419 * fraction and we want 64 bit binary fraction of second: 1420 * 1421 * x = a * 2^32 / 10^9 = a * 4.294967296 1422 * 1423 * The range of th_adjustment is +/- 5000PPM so inside a 64bit int 1424 * we can only multiply by about 850 without overflowing, that 1425 * leaves no suitably precise fractions for multiply before divide. 1426 * 1427 * Divide before multiply with a fraction of 2199/512 results in a 1428 * systematic undercompensation of 10PPM of th_adjustment. On a 1429 * 5000PPM adjustment this is a 0.05PPM error. This is acceptable. 1430 * 1431 * We happily sacrifice the lowest of the 64 bits of our result 1432 * to the goddess of code clarity. 1433 * 1434 */ 1435 scale = (uint64_t)1 << 63; 1436 scale += (th->th_adjustment / 1024) * 2199; 1437 scale /= th->th_counter->tc_frequency; 1438 th->th_scale = scale * 2; 1439 th->th_large_delta = MIN(((uint64_t)1 << 63) / scale, UINT_MAX); 1440 1441 /* 1442 * Now that the struct timehands is again consistent, set the new 1443 * generation number, making sure to not make it zero. 1444 */ 1445 if (++ogen == 0) 1446 ogen = 1; 1447 atomic_store_rel_int(&th->th_generation, ogen); 1448 1449 /* Go live with the new struct timehands. */ 1450 #ifdef FFCLOCK 1451 switch (sysclock_active) { 1452 case SYSCLOCK_FBCK: 1453 #endif 1454 time_second = th->th_microtime.tv_sec; 1455 time_uptime = th->th_offset.sec; 1456 #ifdef FFCLOCK 1457 break; 1458 case SYSCLOCK_FFWD: 1459 time_second = fftimehands->tick_time_lerp.sec; 1460 time_uptime = fftimehands->tick_time_lerp.sec - ffclock_boottime.sec; 1461 break; 1462 } 1463 #endif 1464 1465 timehands = th; 1466 timekeep_push_vdso(); 1467 } 1468 1469 /* Report or change the active timecounter hardware. */ 1470 static int 1471 sysctl_kern_timecounter_hardware(SYSCTL_HANDLER_ARGS) 1472 { 1473 char newname[32]; 1474 struct timecounter *newtc, *tc; 1475 int error; 1476 1477 tc = timecounter; 1478 strlcpy(newname, tc->tc_name, sizeof(newname)); 1479 1480 error = sysctl_handle_string(oidp, &newname[0], sizeof(newname), req); 1481 if (error != 0 || req->newptr == NULL) 1482 return (error); 1483 /* Record that the tc in use now was specifically chosen. */ 1484 tc_chosen = 1; 1485 if (strcmp(newname, tc->tc_name) == 0) 1486 return (0); 1487 for (newtc = timecounters; newtc != NULL; newtc = newtc->tc_next) { 1488 if (strcmp(newname, newtc->tc_name) != 0) 1489 continue; 1490 1491 /* Warm up new timecounter. */ 1492 (void)newtc->tc_get_timecount(newtc); 1493 1494 timecounter = newtc; 1495 1496 /* 1497 * The vdso timehands update is deferred until the next 1498 * 'tc_windup()'. 1499 * 1500 * This is prudent given that 'timekeep_push_vdso()' does not 1501 * use any locking and that it can be called in hard interrupt 1502 * context via 'tc_windup()'. 1503 */ 1504 return (0); 1505 } 1506 return (EINVAL); 1507 } 1508 1509 SYSCTL_PROC(_kern_timecounter, OID_AUTO, hardware, 1510 CTLTYPE_STRING | CTLFLAG_RWTUN | CTLFLAG_NOFETCH | CTLFLAG_MPSAFE, 0, 0, 1511 sysctl_kern_timecounter_hardware, "A", 1512 "Timecounter hardware selected"); 1513 1514 /* Report the available timecounter hardware. */ 1515 static int 1516 sysctl_kern_timecounter_choice(SYSCTL_HANDLER_ARGS) 1517 { 1518 struct sbuf sb; 1519 struct timecounter *tc; 1520 int error; 1521 1522 sbuf_new_for_sysctl(&sb, NULL, 0, req); 1523 for (tc = timecounters; tc != NULL; tc = tc->tc_next) { 1524 if (tc != timecounters) 1525 sbuf_putc(&sb, ' '); 1526 sbuf_printf(&sb, "%s(%d)", tc->tc_name, tc->tc_quality); 1527 } 1528 error = sbuf_finish(&sb); 1529 sbuf_delete(&sb); 1530 return (error); 1531 } 1532 1533 SYSCTL_PROC(_kern_timecounter, OID_AUTO, choice, 1534 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 0, 1535 sysctl_kern_timecounter_choice, "A", 1536 "Timecounter hardware detected"); 1537 1538 /* 1539 * RFC 2783 PPS-API implementation. 1540 */ 1541 1542 /* 1543 * Return true if the driver is aware of the abi version extensions in the 1544 * pps_state structure, and it supports at least the given abi version number. 1545 */ 1546 static inline int 1547 abi_aware(struct pps_state *pps, int vers) 1548 { 1549 1550 return ((pps->kcmode & KCMODE_ABIFLAG) && pps->driver_abi >= vers); 1551 } 1552 1553 static int 1554 pps_fetch(struct pps_fetch_args *fapi, struct pps_state *pps) 1555 { 1556 int err, timo; 1557 pps_seq_t aseq, cseq; 1558 struct timeval tv; 1559 1560 if (fapi->tsformat && fapi->tsformat != PPS_TSFMT_TSPEC) 1561 return (EINVAL); 1562 1563 /* 1564 * If no timeout is requested, immediately return whatever values were 1565 * most recently captured. If timeout seconds is -1, that's a request 1566 * to block without a timeout. WITNESS won't let us sleep forever 1567 * without a lock (we really don't need a lock), so just repeatedly 1568 * sleep a long time. 1569 */ 1570 if (fapi->timeout.tv_sec || fapi->timeout.tv_nsec) { 1571 if (fapi->timeout.tv_sec == -1) 1572 timo = 0x7fffffff; 1573 else { 1574 tv.tv_sec = fapi->timeout.tv_sec; 1575 tv.tv_usec = fapi->timeout.tv_nsec / 1000; 1576 timo = tvtohz(&tv); 1577 } 1578 aseq = atomic_load_int(&pps->ppsinfo.assert_sequence); 1579 cseq = atomic_load_int(&pps->ppsinfo.clear_sequence); 1580 while (aseq == atomic_load_int(&pps->ppsinfo.assert_sequence) && 1581 cseq == atomic_load_int(&pps->ppsinfo.clear_sequence)) { 1582 if (abi_aware(pps, 1) && pps->driver_mtx != NULL) { 1583 if (pps->flags & PPSFLAG_MTX_SPIN) { 1584 err = msleep_spin(pps, pps->driver_mtx, 1585 "ppsfch", timo); 1586 } else { 1587 err = msleep(pps, pps->driver_mtx, PCATCH, 1588 "ppsfch", timo); 1589 } 1590 } else { 1591 err = tsleep(pps, PCATCH, "ppsfch", timo); 1592 } 1593 if (err == EWOULDBLOCK) { 1594 if (fapi->timeout.tv_sec == -1) { 1595 continue; 1596 } else { 1597 return (ETIMEDOUT); 1598 } 1599 } else if (err != 0) { 1600 return (err); 1601 } 1602 } 1603 } 1604 1605 pps->ppsinfo.current_mode = pps->ppsparam.mode; 1606 fapi->pps_info_buf = pps->ppsinfo; 1607 1608 return (0); 1609 } 1610 1611 int 1612 pps_ioctl(u_long cmd, caddr_t data, struct pps_state *pps) 1613 { 1614 pps_params_t *app; 1615 struct pps_fetch_args *fapi; 1616 #ifdef FFCLOCK 1617 struct pps_fetch_ffc_args *fapi_ffc; 1618 #endif 1619 #ifdef PPS_SYNC 1620 struct pps_kcbind_args *kapi; 1621 #endif 1622 1623 KASSERT(pps != NULL, ("NULL pps pointer in pps_ioctl")); 1624 switch (cmd) { 1625 case PPS_IOC_CREATE: 1626 return (0); 1627 case PPS_IOC_DESTROY: 1628 return (0); 1629 case PPS_IOC_SETPARAMS: 1630 app = (pps_params_t *)data; 1631 if (app->mode & ~pps->ppscap) 1632 return (EINVAL); 1633 #ifdef FFCLOCK 1634 /* Ensure only a single clock is selected for ffc timestamp. */ 1635 if ((app->mode & PPS_TSCLK_MASK) == PPS_TSCLK_MASK) 1636 return (EINVAL); 1637 #endif 1638 pps->ppsparam = *app; 1639 return (0); 1640 case PPS_IOC_GETPARAMS: 1641 app = (pps_params_t *)data; 1642 *app = pps->ppsparam; 1643 app->api_version = PPS_API_VERS_1; 1644 return (0); 1645 case PPS_IOC_GETCAP: 1646 *(int*)data = pps->ppscap; 1647 return (0); 1648 case PPS_IOC_FETCH: 1649 fapi = (struct pps_fetch_args *)data; 1650 return (pps_fetch(fapi, pps)); 1651 #ifdef FFCLOCK 1652 case PPS_IOC_FETCH_FFCOUNTER: 1653 fapi_ffc = (struct pps_fetch_ffc_args *)data; 1654 if (fapi_ffc->tsformat && fapi_ffc->tsformat != 1655 PPS_TSFMT_TSPEC) 1656 return (EINVAL); 1657 if (fapi_ffc->timeout.tv_sec || fapi_ffc->timeout.tv_nsec) 1658 return (EOPNOTSUPP); 1659 pps->ppsinfo_ffc.current_mode = pps->ppsparam.mode; 1660 fapi_ffc->pps_info_buf_ffc = pps->ppsinfo_ffc; 1661 /* Overwrite timestamps if feedback clock selected. */ 1662 switch (pps->ppsparam.mode & PPS_TSCLK_MASK) { 1663 case PPS_TSCLK_FBCK: 1664 fapi_ffc->pps_info_buf_ffc.assert_timestamp = 1665 pps->ppsinfo.assert_timestamp; 1666 fapi_ffc->pps_info_buf_ffc.clear_timestamp = 1667 pps->ppsinfo.clear_timestamp; 1668 break; 1669 case PPS_TSCLK_FFWD: 1670 break; 1671 default: 1672 break; 1673 } 1674 return (0); 1675 #endif /* FFCLOCK */ 1676 case PPS_IOC_KCBIND: 1677 #ifdef PPS_SYNC 1678 kapi = (struct pps_kcbind_args *)data; 1679 /* XXX Only root should be able to do this */ 1680 if (kapi->tsformat && kapi->tsformat != PPS_TSFMT_TSPEC) 1681 return (EINVAL); 1682 if (kapi->kernel_consumer != PPS_KC_HARDPPS) 1683 return (EINVAL); 1684 if (kapi->edge & ~pps->ppscap) 1685 return (EINVAL); 1686 pps->kcmode = (kapi->edge & KCMODE_EDGEMASK) | 1687 (pps->kcmode & KCMODE_ABIFLAG); 1688 return (0); 1689 #else 1690 return (EOPNOTSUPP); 1691 #endif 1692 default: 1693 return (ENOIOCTL); 1694 } 1695 } 1696 1697 void 1698 pps_init(struct pps_state *pps) 1699 { 1700 pps->ppscap |= PPS_TSFMT_TSPEC | PPS_CANWAIT; 1701 if (pps->ppscap & PPS_CAPTUREASSERT) 1702 pps->ppscap |= PPS_OFFSETASSERT; 1703 if (pps->ppscap & PPS_CAPTURECLEAR) 1704 pps->ppscap |= PPS_OFFSETCLEAR; 1705 #ifdef FFCLOCK 1706 pps->ppscap |= PPS_TSCLK_MASK; 1707 #endif 1708 pps->kcmode &= ~KCMODE_ABIFLAG; 1709 } 1710 1711 void 1712 pps_init_abi(struct pps_state *pps) 1713 { 1714 1715 pps_init(pps); 1716 if (pps->driver_abi > 0) { 1717 pps->kcmode |= KCMODE_ABIFLAG; 1718 pps->kernel_abi = PPS_ABI_VERSION; 1719 } 1720 } 1721 1722 void 1723 pps_capture(struct pps_state *pps) 1724 { 1725 struct timehands *th; 1726 1727 KASSERT(pps != NULL, ("NULL pps pointer in pps_capture")); 1728 th = timehands; 1729 pps->capgen = atomic_load_acq_int(&th->th_generation); 1730 pps->capth = th; 1731 #ifdef FFCLOCK 1732 pps->capffth = fftimehands; 1733 #endif 1734 pps->capcount = th->th_counter->tc_get_timecount(th->th_counter); 1735 atomic_thread_fence_acq(); 1736 if (pps->capgen != th->th_generation) 1737 pps->capgen = 0; 1738 } 1739 1740 void 1741 pps_event(struct pps_state *pps, int event) 1742 { 1743 struct bintime bt; 1744 struct timespec ts, *tsp, *osp; 1745 u_int tcount, *pcount; 1746 int foff; 1747 pps_seq_t *pseq; 1748 #ifdef FFCLOCK 1749 struct timespec *tsp_ffc; 1750 pps_seq_t *pseq_ffc; 1751 ffcounter *ffcount; 1752 #endif 1753 #ifdef PPS_SYNC 1754 int fhard; 1755 #endif 1756 1757 KASSERT(pps != NULL, ("NULL pps pointer in pps_event")); 1758 /* Nothing to do if not currently set to capture this event type. */ 1759 if ((event & pps->ppsparam.mode) == 0) 1760 return; 1761 /* If the timecounter was wound up underneath us, bail out. */ 1762 if (pps->capgen == 0 || pps->capgen != 1763 atomic_load_acq_int(&pps->capth->th_generation)) 1764 return; 1765 1766 /* Things would be easier with arrays. */ 1767 if (event == PPS_CAPTUREASSERT) { 1768 tsp = &pps->ppsinfo.assert_timestamp; 1769 osp = &pps->ppsparam.assert_offset; 1770 foff = pps->ppsparam.mode & PPS_OFFSETASSERT; 1771 #ifdef PPS_SYNC 1772 fhard = pps->kcmode & PPS_CAPTUREASSERT; 1773 #endif 1774 pcount = &pps->ppscount[0]; 1775 pseq = &pps->ppsinfo.assert_sequence; 1776 #ifdef FFCLOCK 1777 ffcount = &pps->ppsinfo_ffc.assert_ffcount; 1778 tsp_ffc = &pps->ppsinfo_ffc.assert_timestamp; 1779 pseq_ffc = &pps->ppsinfo_ffc.assert_sequence; 1780 #endif 1781 } else { 1782 tsp = &pps->ppsinfo.clear_timestamp; 1783 osp = &pps->ppsparam.clear_offset; 1784 foff = pps->ppsparam.mode & PPS_OFFSETCLEAR; 1785 #ifdef PPS_SYNC 1786 fhard = pps->kcmode & PPS_CAPTURECLEAR; 1787 #endif 1788 pcount = &pps->ppscount[1]; 1789 pseq = &pps->ppsinfo.clear_sequence; 1790 #ifdef FFCLOCK 1791 ffcount = &pps->ppsinfo_ffc.clear_ffcount; 1792 tsp_ffc = &pps->ppsinfo_ffc.clear_timestamp; 1793 pseq_ffc = &pps->ppsinfo_ffc.clear_sequence; 1794 #endif 1795 } 1796 1797 /* 1798 * If the timecounter changed, we cannot compare the count values, so 1799 * we have to drop the rest of the PPS-stuff until the next event. 1800 */ 1801 if (pps->ppstc != pps->capth->th_counter) { 1802 pps->ppstc = pps->capth->th_counter; 1803 *pcount = pps->capcount; 1804 pps->ppscount[2] = pps->capcount; 1805 return; 1806 } 1807 1808 /* Convert the count to a timespec. */ 1809 tcount = pps->capcount - pps->capth->th_offset_count; 1810 tcount &= pps->capth->th_counter->tc_counter_mask; 1811 bt = pps->capth->th_bintime; 1812 bintime_addx(&bt, pps->capth->th_scale * tcount); 1813 bintime2timespec(&bt, &ts); 1814 1815 /* If the timecounter was wound up underneath us, bail out. */ 1816 atomic_thread_fence_acq(); 1817 if (pps->capgen != pps->capth->th_generation) 1818 return; 1819 1820 *pcount = pps->capcount; 1821 (*pseq)++; 1822 *tsp = ts; 1823 1824 if (foff) { 1825 timespecadd(tsp, osp, tsp); 1826 if (tsp->tv_nsec < 0) { 1827 tsp->tv_nsec += 1000000000; 1828 tsp->tv_sec -= 1; 1829 } 1830 } 1831 1832 #ifdef FFCLOCK 1833 *ffcount = pps->capffth->tick_ffcount + tcount; 1834 bt = pps->capffth->tick_time; 1835 ffclock_convert_delta(tcount, pps->capffth->cest.period, &bt); 1836 bintime_add(&bt, &pps->capffth->tick_time); 1837 bintime2timespec(&bt, &ts); 1838 (*pseq_ffc)++; 1839 *tsp_ffc = ts; 1840 #endif 1841 1842 #ifdef PPS_SYNC 1843 if (fhard) { 1844 uint64_t scale; 1845 1846 /* 1847 * Feed the NTP PLL/FLL. 1848 * The FLL wants to know how many (hardware) nanoseconds 1849 * elapsed since the previous event. 1850 */ 1851 tcount = pps->capcount - pps->ppscount[2]; 1852 pps->ppscount[2] = pps->capcount; 1853 tcount &= pps->capth->th_counter->tc_counter_mask; 1854 scale = (uint64_t)1 << 63; 1855 scale /= pps->capth->th_counter->tc_frequency; 1856 scale *= 2; 1857 bt.sec = 0; 1858 bt.frac = 0; 1859 bintime_addx(&bt, scale * tcount); 1860 bintime2timespec(&bt, &ts); 1861 hardpps(tsp, ts.tv_nsec + 1000000000 * ts.tv_sec); 1862 } 1863 #endif 1864 1865 /* Wakeup anyone sleeping in pps_fetch(). */ 1866 wakeup(pps); 1867 } 1868 1869 /* 1870 * Timecounters need to be updated every so often to prevent the hardware 1871 * counter from overflowing. Updating also recalculates the cached values 1872 * used by the get*() family of functions, so their precision depends on 1873 * the update frequency. 1874 */ 1875 1876 static int tc_tick; 1877 SYSCTL_INT(_kern_timecounter, OID_AUTO, tick, CTLFLAG_RD, &tc_tick, 0, 1878 "Approximate number of hardclock ticks in a millisecond"); 1879 1880 void 1881 tc_ticktock(int cnt) 1882 { 1883 static int count; 1884 1885 if (mtx_trylock_spin(&tc_setclock_mtx)) { 1886 count += cnt; 1887 if (count >= tc_tick) { 1888 count = 0; 1889 tc_windup(NULL); 1890 } 1891 mtx_unlock_spin(&tc_setclock_mtx); 1892 } 1893 } 1894 1895 static void __inline 1896 tc_adjprecision(void) 1897 { 1898 int t; 1899 1900 if (tc_timepercentage > 0) { 1901 t = (99 + tc_timepercentage) / tc_timepercentage; 1902 tc_precexp = fls(t + (t >> 1)) - 1; 1903 FREQ2BT(hz / tc_tick, &bt_timethreshold); 1904 FREQ2BT(hz, &bt_tickthreshold); 1905 bintime_shift(&bt_timethreshold, tc_precexp); 1906 bintime_shift(&bt_tickthreshold, tc_precexp); 1907 } else { 1908 tc_precexp = 31; 1909 bt_timethreshold.sec = INT_MAX; 1910 bt_timethreshold.frac = ~(uint64_t)0; 1911 bt_tickthreshold = bt_timethreshold; 1912 } 1913 sbt_timethreshold = bttosbt(bt_timethreshold); 1914 sbt_tickthreshold = bttosbt(bt_tickthreshold); 1915 } 1916 1917 static int 1918 sysctl_kern_timecounter_adjprecision(SYSCTL_HANDLER_ARGS) 1919 { 1920 int error, val; 1921 1922 val = tc_timepercentage; 1923 error = sysctl_handle_int(oidp, &val, 0, req); 1924 if (error != 0 || req->newptr == NULL) 1925 return (error); 1926 tc_timepercentage = val; 1927 if (cold) 1928 goto done; 1929 tc_adjprecision(); 1930 done: 1931 return (0); 1932 } 1933 1934 /* Set up the requested number of timehands. */ 1935 static void 1936 inittimehands(void *dummy) 1937 { 1938 struct timehands *thp; 1939 int i; 1940 1941 TUNABLE_INT_FETCH("kern.timecounter.timehands_count", 1942 &timehands_count); 1943 if (timehands_count < 1) 1944 timehands_count = 1; 1945 if (timehands_count > nitems(ths)) 1946 timehands_count = nitems(ths); 1947 for (i = 1, thp = &ths[0]; i < timehands_count; thp = &ths[i++]) 1948 thp->th_next = &ths[i]; 1949 thp->th_next = &ths[0]; 1950 1951 TUNABLE_STR_FETCH("kern.timecounter.hardware", tc_from_tunable, 1952 sizeof(tc_from_tunable)); 1953 } 1954 SYSINIT(timehands, SI_SUB_TUNABLES, SI_ORDER_ANY, inittimehands, NULL); 1955 1956 static void 1957 inittimecounter(void *dummy) 1958 { 1959 u_int p; 1960 int tick_rate; 1961 1962 /* 1963 * Set the initial timeout to 1964 * max(1, <approx. number of hardclock ticks in a millisecond>). 1965 * People should probably not use the sysctl to set the timeout 1966 * to smaller than its initial value, since that value is the 1967 * smallest reasonable one. If they want better timestamps they 1968 * should use the non-"get"* functions. 1969 */ 1970 if (hz > 1000) 1971 tc_tick = (hz + 500) / 1000; 1972 else 1973 tc_tick = 1; 1974 tc_adjprecision(); 1975 FREQ2BT(hz, &tick_bt); 1976 tick_sbt = bttosbt(tick_bt); 1977 tick_rate = hz / tc_tick; 1978 FREQ2BT(tick_rate, &tc_tick_bt); 1979 tc_tick_sbt = bttosbt(tc_tick_bt); 1980 p = (tc_tick * 1000000) / hz; 1981 printf("Timecounters tick every %d.%03u msec\n", p / 1000, p % 1000); 1982 1983 #ifdef FFCLOCK 1984 ffclock_init(); 1985 #endif 1986 1987 /* warm up new timecounter (again) and get rolling. */ 1988 (void)timecounter->tc_get_timecount(timecounter); 1989 mtx_lock_spin(&tc_setclock_mtx); 1990 tc_windup(NULL); 1991 mtx_unlock_spin(&tc_setclock_mtx); 1992 } 1993 1994 SYSINIT(timecounter, SI_SUB_CLOCKS, SI_ORDER_SECOND, inittimecounter, NULL); 1995 1996 /* Cpu tick handling -------------------------------------------------*/ 1997 1998 static int cpu_tick_variable; 1999 static uint64_t cpu_tick_frequency; 2000 2001 DPCPU_DEFINE_STATIC(uint64_t, tc_cpu_ticks_base); 2002 DPCPU_DEFINE_STATIC(unsigned, tc_cpu_ticks_last); 2003 2004 static uint64_t 2005 tc_cpu_ticks(void) 2006 { 2007 struct timecounter *tc; 2008 uint64_t res, *base; 2009 unsigned u, *last; 2010 2011 critical_enter(); 2012 base = DPCPU_PTR(tc_cpu_ticks_base); 2013 last = DPCPU_PTR(tc_cpu_ticks_last); 2014 tc = timehands->th_counter; 2015 u = tc->tc_get_timecount(tc) & tc->tc_counter_mask; 2016 if (u < *last) 2017 *base += (uint64_t)tc->tc_counter_mask + 1; 2018 *last = u; 2019 res = u + *base; 2020 critical_exit(); 2021 return (res); 2022 } 2023 2024 void 2025 cpu_tick_calibration(void) 2026 { 2027 static time_t last_calib; 2028 2029 if (time_uptime != last_calib && !(time_uptime & 0xf)) { 2030 cpu_tick_calibrate(0); 2031 last_calib = time_uptime; 2032 } 2033 } 2034 2035 /* 2036 * This function gets called every 16 seconds on only one designated 2037 * CPU in the system from hardclock() via cpu_tick_calibration()(). 2038 * 2039 * Whenever the real time clock is stepped we get called with reset=1 2040 * to make sure we handle suspend/resume and similar events correctly. 2041 */ 2042 2043 static void 2044 cpu_tick_calibrate(int reset) 2045 { 2046 static uint64_t c_last; 2047 uint64_t c_this, c_delta; 2048 static struct bintime t_last; 2049 struct bintime t_this, t_delta; 2050 uint32_t divi; 2051 2052 if (reset) { 2053 /* The clock was stepped, abort & reset */ 2054 t_last.sec = 0; 2055 return; 2056 } 2057 2058 /* we don't calibrate fixed rate cputicks */ 2059 if (!cpu_tick_variable) 2060 return; 2061 2062 getbinuptime(&t_this); 2063 c_this = cpu_ticks(); 2064 if (t_last.sec != 0) { 2065 c_delta = c_this - c_last; 2066 t_delta = t_this; 2067 bintime_sub(&t_delta, &t_last); 2068 /* 2069 * Headroom: 2070 * 2^(64-20) / 16[s] = 2071 * 2^(44) / 16[s] = 2072 * 17.592.186.044.416 / 16 = 2073 * 1.099.511.627.776 [Hz] 2074 */ 2075 divi = t_delta.sec << 20; 2076 divi |= t_delta.frac >> (64 - 20); 2077 c_delta <<= 20; 2078 c_delta /= divi; 2079 if (c_delta > cpu_tick_frequency) { 2080 if (0 && bootverbose) 2081 printf("cpu_tick increased to %ju Hz\n", 2082 c_delta); 2083 cpu_tick_frequency = c_delta; 2084 } 2085 } 2086 c_last = c_this; 2087 t_last = t_this; 2088 } 2089 2090 void 2091 set_cputicker(cpu_tick_f *func, uint64_t freq, unsigned var) 2092 { 2093 2094 if (func == NULL) { 2095 cpu_ticks = tc_cpu_ticks; 2096 } else { 2097 cpu_tick_frequency = freq; 2098 cpu_tick_variable = var; 2099 cpu_ticks = func; 2100 } 2101 } 2102 2103 uint64_t 2104 cpu_tickrate(void) 2105 { 2106 2107 if (cpu_ticks == tc_cpu_ticks) 2108 return (tc_getfrequency()); 2109 return (cpu_tick_frequency); 2110 } 2111 2112 /* 2113 * We need to be slightly careful converting cputicks to microseconds. 2114 * There is plenty of margin in 64 bits of microseconds (half a million 2115 * years) and in 64 bits at 4 GHz (146 years), but if we do a multiply 2116 * before divide conversion (to retain precision) we find that the 2117 * margin shrinks to 1.5 hours (one millionth of 146y). 2118 * With a three prong approach we never lose significant bits, no 2119 * matter what the cputick rate and length of timeinterval is. 2120 */ 2121 2122 uint64_t 2123 cputick2usec(uint64_t tick) 2124 { 2125 2126 if (tick > 18446744073709551LL) /* floor(2^64 / 1000) */ 2127 return (tick / (cpu_tickrate() / 1000000LL)); 2128 else if (tick > 18446744073709LL) /* floor(2^64 / 1000000) */ 2129 return ((tick * 1000LL) / (cpu_tickrate() / 1000LL)); 2130 else 2131 return ((tick * 1000000LL) / cpu_tickrate()); 2132 } 2133 2134 cpu_tick_f *cpu_ticks = tc_cpu_ticks; 2135 2136 static int vdso_th_enable = 1; 2137 static int 2138 sysctl_fast_gettime(SYSCTL_HANDLER_ARGS) 2139 { 2140 int old_vdso_th_enable, error; 2141 2142 old_vdso_th_enable = vdso_th_enable; 2143 error = sysctl_handle_int(oidp, &old_vdso_th_enable, 0, req); 2144 if (error != 0) 2145 return (error); 2146 vdso_th_enable = old_vdso_th_enable; 2147 return (0); 2148 } 2149 SYSCTL_PROC(_kern_timecounter, OID_AUTO, fast_gettime, 2150 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 2151 NULL, 0, sysctl_fast_gettime, "I", "Enable fast time of day"); 2152 2153 uint32_t 2154 tc_fill_vdso_timehands(struct vdso_timehands *vdso_th) 2155 { 2156 struct timehands *th; 2157 uint32_t enabled; 2158 2159 th = timehands; 2160 vdso_th->th_scale = th->th_scale; 2161 vdso_th->th_offset_count = th->th_offset_count; 2162 vdso_th->th_counter_mask = th->th_counter->tc_counter_mask; 2163 vdso_th->th_offset = th->th_offset; 2164 vdso_th->th_boottime = th->th_boottime; 2165 if (th->th_counter->tc_fill_vdso_timehands != NULL) { 2166 enabled = th->th_counter->tc_fill_vdso_timehands(vdso_th, 2167 th->th_counter); 2168 } else 2169 enabled = 0; 2170 if (!vdso_th_enable) 2171 enabled = 0; 2172 return (enabled); 2173 } 2174 2175 #ifdef COMPAT_FREEBSD32 2176 uint32_t 2177 tc_fill_vdso_timehands32(struct vdso_timehands32 *vdso_th32) 2178 { 2179 struct timehands *th; 2180 uint32_t enabled; 2181 2182 th = timehands; 2183 *(uint64_t *)&vdso_th32->th_scale[0] = th->th_scale; 2184 vdso_th32->th_offset_count = th->th_offset_count; 2185 vdso_th32->th_counter_mask = th->th_counter->tc_counter_mask; 2186 vdso_th32->th_offset.sec = th->th_offset.sec; 2187 *(uint64_t *)&vdso_th32->th_offset.frac[0] = th->th_offset.frac; 2188 vdso_th32->th_boottime.sec = th->th_boottime.sec; 2189 *(uint64_t *)&vdso_th32->th_boottime.frac[0] = th->th_boottime.frac; 2190 if (th->th_counter->tc_fill_vdso_timehands32 != NULL) { 2191 enabled = th->th_counter->tc_fill_vdso_timehands32(vdso_th32, 2192 th->th_counter); 2193 } else 2194 enabled = 0; 2195 if (!vdso_th_enable) 2196 enabled = 0; 2197 return (enabled); 2198 } 2199 #endif 2200 2201 #include "opt_ddb.h" 2202 #ifdef DDB 2203 #include <ddb/ddb.h> 2204 2205 DB_SHOW_COMMAND(timecounter, db_show_timecounter) 2206 { 2207 struct timehands *th; 2208 struct timecounter *tc; 2209 u_int val1, val2; 2210 2211 th = timehands; 2212 tc = th->th_counter; 2213 val1 = tc->tc_get_timecount(tc); 2214 __compiler_membar(); 2215 val2 = tc->tc_get_timecount(tc); 2216 2217 db_printf("timecounter %p %s\n", tc, tc->tc_name); 2218 db_printf(" mask %#x freq %ju qual %d flags %#x priv %p\n", 2219 tc->tc_counter_mask, (uintmax_t)tc->tc_frequency, tc->tc_quality, 2220 tc->tc_flags, tc->tc_priv); 2221 db_printf(" val %#x %#x\n", val1, val2); 2222 db_printf("timehands adj %#jx scale %#jx ldelta %d off_cnt %d gen %d\n", 2223 (uintmax_t)th->th_adjustment, (uintmax_t)th->th_scale, 2224 th->th_large_delta, th->th_offset_count, th->th_generation); 2225 db_printf(" offset %jd %jd boottime %jd %jd\n", 2226 (intmax_t)th->th_offset.sec, (uintmax_t)th->th_offset.frac, 2227 (intmax_t)th->th_boottime.sec, (uintmax_t)th->th_boottime.frac); 2228 } 2229 #endif 2230