1 /*- 2 * SPDX-License-Identifier: Beerware 3 * 4 * ---------------------------------------------------------------------------- 5 * "THE BEER-WARE LICENSE" (Revision 42): 6 * <phk@FreeBSD.ORG> wrote this file. As long as you retain this notice you 7 * can do whatever you want with this stuff. If we meet some day, and you think 8 * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp 9 * ---------------------------------------------------------------------------- 10 * 11 * Copyright (c) 2011, 2015, 2016 The FreeBSD Foundation 12 * All rights reserved. 13 * 14 * Portions of this software were developed by Julien Ridoux at the University 15 * of Melbourne under sponsorship from the FreeBSD Foundation. 16 * 17 * Portions of this software were developed by Konstantin Belousov 18 * under sponsorship from the FreeBSD Foundation. 19 */ 20 21 #include <sys/cdefs.h> 22 __FBSDID("$FreeBSD$"); 23 24 #include "opt_ntp.h" 25 #include "opt_ffclock.h" 26 27 #include <sys/param.h> 28 #include <sys/kernel.h> 29 #include <sys/limits.h> 30 #include <sys/lock.h> 31 #include <sys/mutex.h> 32 #include <sys/proc.h> 33 #include <sys/sbuf.h> 34 #include <sys/sleepqueue.h> 35 #include <sys/sysctl.h> 36 #include <sys/syslog.h> 37 #include <sys/systm.h> 38 #include <sys/timeffc.h> 39 #include <sys/timepps.h> 40 #include <sys/timetc.h> 41 #include <sys/timex.h> 42 #include <sys/vdso.h> 43 44 /* 45 * A large step happens on boot. This constant detects such steps. 46 * It is relatively small so that ntp_update_second gets called enough 47 * in the typical 'missed a couple of seconds' case, but doesn't loop 48 * forever when the time step is large. 49 */ 50 #define LARGE_STEP 200 51 52 /* 53 * Implement a dummy timecounter which we can use until we get a real one 54 * in the air. This allows the console and other early stuff to use 55 * time services. 56 */ 57 58 static u_int 59 dummy_get_timecount(struct timecounter *tc) 60 { 61 static u_int now; 62 63 return (++now); 64 } 65 66 static struct timecounter dummy_timecounter = { 67 dummy_get_timecount, 0, ~0u, 1000000, "dummy", -1000000 68 }; 69 70 struct timehands { 71 /* These fields must be initialized by the driver. */ 72 struct timecounter *th_counter; 73 int64_t th_adjustment; 74 uint64_t th_scale; 75 u_int th_large_delta; 76 u_int th_offset_count; 77 struct bintime th_offset; 78 struct bintime th_bintime; 79 struct timeval th_microtime; 80 struct timespec th_nanotime; 81 struct bintime th_boottime; 82 /* Fields not to be copied in tc_windup start with th_generation. */ 83 u_int th_generation; 84 struct timehands *th_next; 85 }; 86 87 static struct timehands ths[16] = { 88 [0] = { 89 .th_counter = &dummy_timecounter, 90 .th_scale = (uint64_t)-1 / 1000000, 91 .th_large_delta = 1000000, 92 .th_offset = { .sec = 1 }, 93 .th_generation = 1, 94 }, 95 }; 96 97 static struct timehands *volatile timehands = &ths[0]; 98 struct timecounter *timecounter = &dummy_timecounter; 99 static struct timecounter *timecounters = &dummy_timecounter; 100 101 int tc_min_ticktock_freq = 1; 102 103 volatile time_t time_second = 1; 104 volatile time_t time_uptime = 1; 105 106 static int sysctl_kern_boottime(SYSCTL_HANDLER_ARGS); 107 SYSCTL_PROC(_kern, KERN_BOOTTIME, boottime, CTLTYPE_STRUCT|CTLFLAG_RD, 108 NULL, 0, sysctl_kern_boottime, "S,timeval", "System boottime"); 109 110 SYSCTL_NODE(_kern, OID_AUTO, timecounter, CTLFLAG_RW, 0, ""); 111 static SYSCTL_NODE(_kern_timecounter, OID_AUTO, tc, CTLFLAG_RW, 0, ""); 112 113 static int timestepwarnings; 114 SYSCTL_INT(_kern_timecounter, OID_AUTO, stepwarnings, CTLFLAG_RW, 115 ×tepwarnings, 0, "Log time steps"); 116 117 static int timehands_count = 2; 118 SYSCTL_INT(_kern_timecounter, OID_AUTO, timehands_count, 119 CTLFLAG_RDTUN | CTLFLAG_NOFETCH, 120 &timehands_count, 0, "Count of timehands in rotation"); 121 122 struct bintime bt_timethreshold; 123 struct bintime bt_tickthreshold; 124 sbintime_t sbt_timethreshold; 125 sbintime_t sbt_tickthreshold; 126 struct bintime tc_tick_bt; 127 sbintime_t tc_tick_sbt; 128 int tc_precexp; 129 int tc_timepercentage = TC_DEFAULTPERC; 130 static int sysctl_kern_timecounter_adjprecision(SYSCTL_HANDLER_ARGS); 131 SYSCTL_PROC(_kern_timecounter, OID_AUTO, alloweddeviation, 132 CTLTYPE_INT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, 0, 0, 133 sysctl_kern_timecounter_adjprecision, "I", 134 "Allowed time interval deviation in percents"); 135 136 volatile int rtc_generation = 1; 137 138 static int tc_chosen; /* Non-zero if a specific tc was chosen via sysctl. */ 139 140 static void tc_windup(struct bintime *new_boottimebin); 141 static void cpu_tick_calibrate(int); 142 143 void dtrace_getnanotime(struct timespec *tsp); 144 145 static int 146 sysctl_kern_boottime(SYSCTL_HANDLER_ARGS) 147 { 148 struct timeval boottime; 149 150 getboottime(&boottime); 151 152 /* i386 is the only arch which uses a 32bits time_t */ 153 #ifdef __amd64__ 154 #ifdef SCTL_MASK32 155 int tv[2]; 156 157 if (req->flags & SCTL_MASK32) { 158 tv[0] = boottime.tv_sec; 159 tv[1] = boottime.tv_usec; 160 return (SYSCTL_OUT(req, tv, sizeof(tv))); 161 } 162 #endif 163 #endif 164 return (SYSCTL_OUT(req, &boottime, sizeof(boottime))); 165 } 166 167 static int 168 sysctl_kern_timecounter_get(SYSCTL_HANDLER_ARGS) 169 { 170 u_int ncount; 171 struct timecounter *tc = arg1; 172 173 ncount = tc->tc_get_timecount(tc); 174 return (sysctl_handle_int(oidp, &ncount, 0, req)); 175 } 176 177 static int 178 sysctl_kern_timecounter_freq(SYSCTL_HANDLER_ARGS) 179 { 180 uint64_t freq; 181 struct timecounter *tc = arg1; 182 183 freq = tc->tc_frequency; 184 return (sysctl_handle_64(oidp, &freq, 0, req)); 185 } 186 187 /* 188 * Return the difference between the timehands' counter value now and what 189 * was when we copied it to the timehands' offset_count. 190 */ 191 static __inline u_int 192 tc_delta(struct timehands *th) 193 { 194 struct timecounter *tc; 195 196 tc = th->th_counter; 197 return ((tc->tc_get_timecount(tc) - th->th_offset_count) & 198 tc->tc_counter_mask); 199 } 200 201 /* 202 * Functions for reading the time. We have to loop until we are sure that 203 * the timehands that we operated on was not updated under our feet. See 204 * the comment in <sys/time.h> for a description of these 12 functions. 205 */ 206 207 static __inline void 208 bintime_off(struct bintime *bt, u_int off) 209 { 210 struct timehands *th; 211 struct bintime *btp; 212 uint64_t scale, x; 213 u_int delta, gen, large_delta; 214 215 do { 216 th = timehands; 217 gen = atomic_load_acq_int(&th->th_generation); 218 btp = (struct bintime *)((vm_offset_t)th + off); 219 *bt = *btp; 220 scale = th->th_scale; 221 delta = tc_delta(th); 222 large_delta = th->th_large_delta; 223 atomic_thread_fence_acq(); 224 } while (gen == 0 || gen != th->th_generation); 225 226 if (__predict_false(delta >= large_delta)) { 227 /* Avoid overflow for scale * delta. */ 228 x = (scale >> 32) * delta; 229 bt->sec += x >> 32; 230 bintime_addx(bt, x << 32); 231 bintime_addx(bt, (scale & 0xffffffff) * delta); 232 } else { 233 bintime_addx(bt, scale * delta); 234 } 235 } 236 #define GETTHBINTIME(dst, member) \ 237 do { \ 238 _Static_assert(_Generic(((struct timehands *)NULL)->member, \ 239 struct bintime: 1, default: 0) == 1, \ 240 "struct timehands member is not of struct bintime type"); \ 241 bintime_off(dst, __offsetof(struct timehands, member)); \ 242 } while (0) 243 244 static __inline void 245 getthmember(void *out, size_t out_size, u_int off) 246 { 247 struct timehands *th; 248 u_int gen; 249 250 do { 251 th = timehands; 252 gen = atomic_load_acq_int(&th->th_generation); 253 memcpy(out, (char *)th + off, out_size); 254 atomic_thread_fence_acq(); 255 } while (gen == 0 || gen != th->th_generation); 256 } 257 #define GETTHMEMBER(dst, member) \ 258 do { \ 259 _Static_assert(_Generic(*dst, \ 260 __typeof(((struct timehands *)NULL)->member): 1, \ 261 default: 0) == 1, \ 262 "*dst and struct timehands member have different types"); \ 263 getthmember(dst, sizeof(*dst), __offsetof(struct timehands, \ 264 member)); \ 265 } while (0) 266 267 #ifdef FFCLOCK 268 void 269 fbclock_binuptime(struct bintime *bt) 270 { 271 272 GETTHBINTIME(bt, th_offset); 273 } 274 275 void 276 fbclock_nanouptime(struct timespec *tsp) 277 { 278 struct bintime bt; 279 280 fbclock_binuptime(&bt); 281 bintime2timespec(&bt, tsp); 282 } 283 284 void 285 fbclock_microuptime(struct timeval *tvp) 286 { 287 struct bintime bt; 288 289 fbclock_binuptime(&bt); 290 bintime2timeval(&bt, tvp); 291 } 292 293 void 294 fbclock_bintime(struct bintime *bt) 295 { 296 297 GETTHBINTIME(bt, th_bintime); 298 } 299 300 void 301 fbclock_nanotime(struct timespec *tsp) 302 { 303 struct bintime bt; 304 305 fbclock_bintime(&bt); 306 bintime2timespec(&bt, tsp); 307 } 308 309 void 310 fbclock_microtime(struct timeval *tvp) 311 { 312 struct bintime bt; 313 314 fbclock_bintime(&bt); 315 bintime2timeval(&bt, tvp); 316 } 317 318 void 319 fbclock_getbinuptime(struct bintime *bt) 320 { 321 322 GETTHMEMBER(bt, th_offset); 323 } 324 325 void 326 fbclock_getnanouptime(struct timespec *tsp) 327 { 328 struct bintime bt; 329 330 GETTHMEMBER(&bt, th_offset); 331 bintime2timespec(&bt, tsp); 332 } 333 334 void 335 fbclock_getmicrouptime(struct timeval *tvp) 336 { 337 struct bintime bt; 338 339 GETTHMEMBER(&bt, th_offset); 340 bintime2timeval(&bt, tvp); 341 } 342 343 void 344 fbclock_getbintime(struct bintime *bt) 345 { 346 347 GETTHMEMBER(bt, th_bintime); 348 } 349 350 void 351 fbclock_getnanotime(struct timespec *tsp) 352 { 353 354 GETTHMEMBER(tsp, th_nanotime); 355 } 356 357 void 358 fbclock_getmicrotime(struct timeval *tvp) 359 { 360 361 GETTHMEMBER(tvp, th_microtime); 362 } 363 #else /* !FFCLOCK */ 364 365 void 366 binuptime(struct bintime *bt) 367 { 368 369 GETTHBINTIME(bt, th_offset); 370 } 371 372 void 373 nanouptime(struct timespec *tsp) 374 { 375 struct bintime bt; 376 377 binuptime(&bt); 378 bintime2timespec(&bt, tsp); 379 } 380 381 void 382 microuptime(struct timeval *tvp) 383 { 384 struct bintime bt; 385 386 binuptime(&bt); 387 bintime2timeval(&bt, tvp); 388 } 389 390 void 391 bintime(struct bintime *bt) 392 { 393 394 GETTHBINTIME(bt, th_bintime); 395 } 396 397 void 398 nanotime(struct timespec *tsp) 399 { 400 struct bintime bt; 401 402 bintime(&bt); 403 bintime2timespec(&bt, tsp); 404 } 405 406 void 407 microtime(struct timeval *tvp) 408 { 409 struct bintime bt; 410 411 bintime(&bt); 412 bintime2timeval(&bt, tvp); 413 } 414 415 void 416 getbinuptime(struct bintime *bt) 417 { 418 419 GETTHMEMBER(bt, th_offset); 420 } 421 422 void 423 getnanouptime(struct timespec *tsp) 424 { 425 struct bintime bt; 426 427 GETTHMEMBER(&bt, th_offset); 428 bintime2timespec(&bt, tsp); 429 } 430 431 void 432 getmicrouptime(struct timeval *tvp) 433 { 434 struct bintime bt; 435 436 GETTHMEMBER(&bt, th_offset); 437 bintime2timeval(&bt, tvp); 438 } 439 440 void 441 getbintime(struct bintime *bt) 442 { 443 444 GETTHMEMBER(bt, th_bintime); 445 } 446 447 void 448 getnanotime(struct timespec *tsp) 449 { 450 451 GETTHMEMBER(tsp, th_nanotime); 452 } 453 454 void 455 getmicrotime(struct timeval *tvp) 456 { 457 458 GETTHMEMBER(tvp, th_microtime); 459 } 460 #endif /* FFCLOCK */ 461 462 void 463 getboottime(struct timeval *boottime) 464 { 465 struct bintime boottimebin; 466 467 getboottimebin(&boottimebin); 468 bintime2timeval(&boottimebin, boottime); 469 } 470 471 void 472 getboottimebin(struct bintime *boottimebin) 473 { 474 475 GETTHMEMBER(boottimebin, th_boottime); 476 } 477 478 #ifdef FFCLOCK 479 /* 480 * Support for feed-forward synchronization algorithms. This is heavily inspired 481 * by the timehands mechanism but kept independent from it. *_windup() functions 482 * have some connection to avoid accessing the timecounter hardware more than 483 * necessary. 484 */ 485 486 /* Feed-forward clock estimates kept updated by the synchronization daemon. */ 487 struct ffclock_estimate ffclock_estimate; 488 struct bintime ffclock_boottime; /* Feed-forward boot time estimate. */ 489 uint32_t ffclock_status; /* Feed-forward clock status. */ 490 int8_t ffclock_updated; /* New estimates are available. */ 491 struct mtx ffclock_mtx; /* Mutex on ffclock_estimate. */ 492 493 struct fftimehands { 494 struct ffclock_estimate cest; 495 struct bintime tick_time; 496 struct bintime tick_time_lerp; 497 ffcounter tick_ffcount; 498 uint64_t period_lerp; 499 volatile uint8_t gen; 500 struct fftimehands *next; 501 }; 502 503 #define NUM_ELEMENTS(x) (sizeof(x) / sizeof(*x)) 504 505 static struct fftimehands ffth[10]; 506 static struct fftimehands *volatile fftimehands = ffth; 507 508 static void 509 ffclock_init(void) 510 { 511 struct fftimehands *cur; 512 struct fftimehands *last; 513 514 memset(ffth, 0, sizeof(ffth)); 515 516 last = ffth + NUM_ELEMENTS(ffth) - 1; 517 for (cur = ffth; cur < last; cur++) 518 cur->next = cur + 1; 519 last->next = ffth; 520 521 ffclock_updated = 0; 522 ffclock_status = FFCLOCK_STA_UNSYNC; 523 mtx_init(&ffclock_mtx, "ffclock lock", NULL, MTX_DEF); 524 } 525 526 /* 527 * Reset the feed-forward clock estimates. Called from inittodr() to get things 528 * kick started and uses the timecounter nominal frequency as a first period 529 * estimate. Note: this function may be called several time just after boot. 530 * Note: this is the only function that sets the value of boot time for the 531 * monotonic (i.e. uptime) version of the feed-forward clock. 532 */ 533 void 534 ffclock_reset_clock(struct timespec *ts) 535 { 536 struct timecounter *tc; 537 struct ffclock_estimate cest; 538 539 tc = timehands->th_counter; 540 memset(&cest, 0, sizeof(struct ffclock_estimate)); 541 542 timespec2bintime(ts, &ffclock_boottime); 543 timespec2bintime(ts, &(cest.update_time)); 544 ffclock_read_counter(&cest.update_ffcount); 545 cest.leapsec_next = 0; 546 cest.period = ((1ULL << 63) / tc->tc_frequency) << 1; 547 cest.errb_abs = 0; 548 cest.errb_rate = 0; 549 cest.status = FFCLOCK_STA_UNSYNC; 550 cest.leapsec_total = 0; 551 cest.leapsec = 0; 552 553 mtx_lock(&ffclock_mtx); 554 bcopy(&cest, &ffclock_estimate, sizeof(struct ffclock_estimate)); 555 ffclock_updated = INT8_MAX; 556 mtx_unlock(&ffclock_mtx); 557 558 printf("ffclock reset: %s (%llu Hz), time = %ld.%09lu\n", tc->tc_name, 559 (unsigned long long)tc->tc_frequency, (long)ts->tv_sec, 560 (unsigned long)ts->tv_nsec); 561 } 562 563 /* 564 * Sub-routine to convert a time interval measured in RAW counter units to time 565 * in seconds stored in bintime format. 566 * NOTE: bintime_mul requires u_int, but the value of the ffcounter may be 567 * larger than the max value of u_int (on 32 bit architecture). Loop to consume 568 * extra cycles. 569 */ 570 static void 571 ffclock_convert_delta(ffcounter ffdelta, uint64_t period, struct bintime *bt) 572 { 573 struct bintime bt2; 574 ffcounter delta, delta_max; 575 576 delta_max = (1ULL << (8 * sizeof(unsigned int))) - 1; 577 bintime_clear(bt); 578 do { 579 if (ffdelta > delta_max) 580 delta = delta_max; 581 else 582 delta = ffdelta; 583 bt2.sec = 0; 584 bt2.frac = period; 585 bintime_mul(&bt2, (unsigned int)delta); 586 bintime_add(bt, &bt2); 587 ffdelta -= delta; 588 } while (ffdelta > 0); 589 } 590 591 /* 592 * Update the fftimehands. 593 * Push the tick ffcount and time(s) forward based on current clock estimate. 594 * The conversion from ffcounter to bintime relies on the difference clock 595 * principle, whose accuracy relies on computing small time intervals. If a new 596 * clock estimate has been passed by the synchronisation daemon, make it 597 * current, and compute the linear interpolation for monotonic time if needed. 598 */ 599 static void 600 ffclock_windup(unsigned int delta) 601 { 602 struct ffclock_estimate *cest; 603 struct fftimehands *ffth; 604 struct bintime bt, gap_lerp; 605 ffcounter ffdelta; 606 uint64_t frac; 607 unsigned int polling; 608 uint8_t forward_jump, ogen; 609 610 /* 611 * Pick the next timehand, copy current ffclock estimates and move tick 612 * times and counter forward. 613 */ 614 forward_jump = 0; 615 ffth = fftimehands->next; 616 ogen = ffth->gen; 617 ffth->gen = 0; 618 cest = &ffth->cest; 619 bcopy(&fftimehands->cest, cest, sizeof(struct ffclock_estimate)); 620 ffdelta = (ffcounter)delta; 621 ffth->period_lerp = fftimehands->period_lerp; 622 623 ffth->tick_time = fftimehands->tick_time; 624 ffclock_convert_delta(ffdelta, cest->period, &bt); 625 bintime_add(&ffth->tick_time, &bt); 626 627 ffth->tick_time_lerp = fftimehands->tick_time_lerp; 628 ffclock_convert_delta(ffdelta, ffth->period_lerp, &bt); 629 bintime_add(&ffth->tick_time_lerp, &bt); 630 631 ffth->tick_ffcount = fftimehands->tick_ffcount + ffdelta; 632 633 /* 634 * Assess the status of the clock, if the last update is too old, it is 635 * likely the synchronisation daemon is dead and the clock is free 636 * running. 637 */ 638 if (ffclock_updated == 0) { 639 ffdelta = ffth->tick_ffcount - cest->update_ffcount; 640 ffclock_convert_delta(ffdelta, cest->period, &bt); 641 if (bt.sec > 2 * FFCLOCK_SKM_SCALE) 642 ffclock_status |= FFCLOCK_STA_UNSYNC; 643 } 644 645 /* 646 * If available, grab updated clock estimates and make them current. 647 * Recompute time at this tick using the updated estimates. The clock 648 * estimates passed the feed-forward synchronisation daemon may result 649 * in time conversion that is not monotonically increasing (just after 650 * the update). time_lerp is a particular linear interpolation over the 651 * synchronisation algo polling period that ensures monotonicity for the 652 * clock ids requesting it. 653 */ 654 if (ffclock_updated > 0) { 655 bcopy(&ffclock_estimate, cest, sizeof(struct ffclock_estimate)); 656 ffdelta = ffth->tick_ffcount - cest->update_ffcount; 657 ffth->tick_time = cest->update_time; 658 ffclock_convert_delta(ffdelta, cest->period, &bt); 659 bintime_add(&ffth->tick_time, &bt); 660 661 /* ffclock_reset sets ffclock_updated to INT8_MAX */ 662 if (ffclock_updated == INT8_MAX) 663 ffth->tick_time_lerp = ffth->tick_time; 664 665 if (bintime_cmp(&ffth->tick_time, &ffth->tick_time_lerp, >)) 666 forward_jump = 1; 667 else 668 forward_jump = 0; 669 670 bintime_clear(&gap_lerp); 671 if (forward_jump) { 672 gap_lerp = ffth->tick_time; 673 bintime_sub(&gap_lerp, &ffth->tick_time_lerp); 674 } else { 675 gap_lerp = ffth->tick_time_lerp; 676 bintime_sub(&gap_lerp, &ffth->tick_time); 677 } 678 679 /* 680 * The reset from the RTC clock may be far from accurate, and 681 * reducing the gap between real time and interpolated time 682 * could take a very long time if the interpolated clock insists 683 * on strict monotonicity. The clock is reset under very strict 684 * conditions (kernel time is known to be wrong and 685 * synchronization daemon has been restarted recently. 686 * ffclock_boottime absorbs the jump to ensure boot time is 687 * correct and uptime functions stay consistent. 688 */ 689 if (((ffclock_status & FFCLOCK_STA_UNSYNC) == FFCLOCK_STA_UNSYNC) && 690 ((cest->status & FFCLOCK_STA_UNSYNC) == 0) && 691 ((cest->status & FFCLOCK_STA_WARMUP) == FFCLOCK_STA_WARMUP)) { 692 if (forward_jump) 693 bintime_add(&ffclock_boottime, &gap_lerp); 694 else 695 bintime_sub(&ffclock_boottime, &gap_lerp); 696 ffth->tick_time_lerp = ffth->tick_time; 697 bintime_clear(&gap_lerp); 698 } 699 700 ffclock_status = cest->status; 701 ffth->period_lerp = cest->period; 702 703 /* 704 * Compute corrected period used for the linear interpolation of 705 * time. The rate of linear interpolation is capped to 5000PPM 706 * (5ms/s). 707 */ 708 if (bintime_isset(&gap_lerp)) { 709 ffdelta = cest->update_ffcount; 710 ffdelta -= fftimehands->cest.update_ffcount; 711 ffclock_convert_delta(ffdelta, cest->period, &bt); 712 polling = bt.sec; 713 bt.sec = 0; 714 bt.frac = 5000000 * (uint64_t)18446744073LL; 715 bintime_mul(&bt, polling); 716 if (bintime_cmp(&gap_lerp, &bt, >)) 717 gap_lerp = bt; 718 719 /* Approximate 1 sec by 1-(1/2^64) to ease arithmetic */ 720 frac = 0; 721 if (gap_lerp.sec > 0) { 722 frac -= 1; 723 frac /= ffdelta / gap_lerp.sec; 724 } 725 frac += gap_lerp.frac / ffdelta; 726 727 if (forward_jump) 728 ffth->period_lerp += frac; 729 else 730 ffth->period_lerp -= frac; 731 } 732 733 ffclock_updated = 0; 734 } 735 if (++ogen == 0) 736 ogen = 1; 737 ffth->gen = ogen; 738 fftimehands = ffth; 739 } 740 741 /* 742 * Adjust the fftimehands when the timecounter is changed. Stating the obvious, 743 * the old and new hardware counter cannot be read simultaneously. tc_windup() 744 * does read the two counters 'back to back', but a few cycles are effectively 745 * lost, and not accumulated in tick_ffcount. This is a fairly radical 746 * operation for a feed-forward synchronization daemon, and it is its job to not 747 * pushing irrelevant data to the kernel. Because there is no locking here, 748 * simply force to ignore pending or next update to give daemon a chance to 749 * realize the counter has changed. 750 */ 751 static void 752 ffclock_change_tc(struct timehands *th) 753 { 754 struct fftimehands *ffth; 755 struct ffclock_estimate *cest; 756 struct timecounter *tc; 757 uint8_t ogen; 758 759 tc = th->th_counter; 760 ffth = fftimehands->next; 761 ogen = ffth->gen; 762 ffth->gen = 0; 763 764 cest = &ffth->cest; 765 bcopy(&(fftimehands->cest), cest, sizeof(struct ffclock_estimate)); 766 cest->period = ((1ULL << 63) / tc->tc_frequency ) << 1; 767 cest->errb_abs = 0; 768 cest->errb_rate = 0; 769 cest->status |= FFCLOCK_STA_UNSYNC; 770 771 ffth->tick_ffcount = fftimehands->tick_ffcount; 772 ffth->tick_time_lerp = fftimehands->tick_time_lerp; 773 ffth->tick_time = fftimehands->tick_time; 774 ffth->period_lerp = cest->period; 775 776 /* Do not lock but ignore next update from synchronization daemon. */ 777 ffclock_updated--; 778 779 if (++ogen == 0) 780 ogen = 1; 781 ffth->gen = ogen; 782 fftimehands = ffth; 783 } 784 785 /* 786 * Retrieve feed-forward counter and time of last kernel tick. 787 */ 788 void 789 ffclock_last_tick(ffcounter *ffcount, struct bintime *bt, uint32_t flags) 790 { 791 struct fftimehands *ffth; 792 uint8_t gen; 793 794 /* 795 * No locking but check generation has not changed. Also need to make 796 * sure ffdelta is positive, i.e. ffcount > tick_ffcount. 797 */ 798 do { 799 ffth = fftimehands; 800 gen = ffth->gen; 801 if ((flags & FFCLOCK_LERP) == FFCLOCK_LERP) 802 *bt = ffth->tick_time_lerp; 803 else 804 *bt = ffth->tick_time; 805 *ffcount = ffth->tick_ffcount; 806 } while (gen == 0 || gen != ffth->gen); 807 } 808 809 /* 810 * Absolute clock conversion. Low level function to convert ffcounter to 811 * bintime. The ffcounter is converted using the current ffclock period estimate 812 * or the "interpolated period" to ensure monotonicity. 813 * NOTE: this conversion may have been deferred, and the clock updated since the 814 * hardware counter has been read. 815 */ 816 void 817 ffclock_convert_abs(ffcounter ffcount, struct bintime *bt, uint32_t flags) 818 { 819 struct fftimehands *ffth; 820 struct bintime bt2; 821 ffcounter ffdelta; 822 uint8_t gen; 823 824 /* 825 * No locking but check generation has not changed. Also need to make 826 * sure ffdelta is positive, i.e. ffcount > tick_ffcount. 827 */ 828 do { 829 ffth = fftimehands; 830 gen = ffth->gen; 831 if (ffcount > ffth->tick_ffcount) 832 ffdelta = ffcount - ffth->tick_ffcount; 833 else 834 ffdelta = ffth->tick_ffcount - ffcount; 835 836 if ((flags & FFCLOCK_LERP) == FFCLOCK_LERP) { 837 *bt = ffth->tick_time_lerp; 838 ffclock_convert_delta(ffdelta, ffth->period_lerp, &bt2); 839 } else { 840 *bt = ffth->tick_time; 841 ffclock_convert_delta(ffdelta, ffth->cest.period, &bt2); 842 } 843 844 if (ffcount > ffth->tick_ffcount) 845 bintime_add(bt, &bt2); 846 else 847 bintime_sub(bt, &bt2); 848 } while (gen == 0 || gen != ffth->gen); 849 } 850 851 /* 852 * Difference clock conversion. 853 * Low level function to Convert a time interval measured in RAW counter units 854 * into bintime. The difference clock allows measuring small intervals much more 855 * reliably than the absolute clock. 856 */ 857 void 858 ffclock_convert_diff(ffcounter ffdelta, struct bintime *bt) 859 { 860 struct fftimehands *ffth; 861 uint8_t gen; 862 863 /* No locking but check generation has not changed. */ 864 do { 865 ffth = fftimehands; 866 gen = ffth->gen; 867 ffclock_convert_delta(ffdelta, ffth->cest.period, bt); 868 } while (gen == 0 || gen != ffth->gen); 869 } 870 871 /* 872 * Access to current ffcounter value. 873 */ 874 void 875 ffclock_read_counter(ffcounter *ffcount) 876 { 877 struct timehands *th; 878 struct fftimehands *ffth; 879 unsigned int gen, delta; 880 881 /* 882 * ffclock_windup() called from tc_windup(), safe to rely on 883 * th->th_generation only, for correct delta and ffcounter. 884 */ 885 do { 886 th = timehands; 887 gen = atomic_load_acq_int(&th->th_generation); 888 ffth = fftimehands; 889 delta = tc_delta(th); 890 *ffcount = ffth->tick_ffcount; 891 atomic_thread_fence_acq(); 892 } while (gen == 0 || gen != th->th_generation); 893 894 *ffcount += delta; 895 } 896 897 void 898 binuptime(struct bintime *bt) 899 { 900 901 binuptime_fromclock(bt, sysclock_active); 902 } 903 904 void 905 nanouptime(struct timespec *tsp) 906 { 907 908 nanouptime_fromclock(tsp, sysclock_active); 909 } 910 911 void 912 microuptime(struct timeval *tvp) 913 { 914 915 microuptime_fromclock(tvp, sysclock_active); 916 } 917 918 void 919 bintime(struct bintime *bt) 920 { 921 922 bintime_fromclock(bt, sysclock_active); 923 } 924 925 void 926 nanotime(struct timespec *tsp) 927 { 928 929 nanotime_fromclock(tsp, sysclock_active); 930 } 931 932 void 933 microtime(struct timeval *tvp) 934 { 935 936 microtime_fromclock(tvp, sysclock_active); 937 } 938 939 void 940 getbinuptime(struct bintime *bt) 941 { 942 943 getbinuptime_fromclock(bt, sysclock_active); 944 } 945 946 void 947 getnanouptime(struct timespec *tsp) 948 { 949 950 getnanouptime_fromclock(tsp, sysclock_active); 951 } 952 953 void 954 getmicrouptime(struct timeval *tvp) 955 { 956 957 getmicrouptime_fromclock(tvp, sysclock_active); 958 } 959 960 void 961 getbintime(struct bintime *bt) 962 { 963 964 getbintime_fromclock(bt, sysclock_active); 965 } 966 967 void 968 getnanotime(struct timespec *tsp) 969 { 970 971 getnanotime_fromclock(tsp, sysclock_active); 972 } 973 974 void 975 getmicrotime(struct timeval *tvp) 976 { 977 978 getmicrouptime_fromclock(tvp, sysclock_active); 979 } 980 981 #endif /* FFCLOCK */ 982 983 /* 984 * This is a clone of getnanotime and used for walltimestamps. 985 * The dtrace_ prefix prevents fbt from creating probes for 986 * it so walltimestamp can be safely used in all fbt probes. 987 */ 988 void 989 dtrace_getnanotime(struct timespec *tsp) 990 { 991 992 GETTHMEMBER(tsp, th_nanotime); 993 } 994 995 /* 996 * System clock currently providing time to the system. Modifiable via sysctl 997 * when the FFCLOCK option is defined. 998 */ 999 int sysclock_active = SYSCLOCK_FBCK; 1000 1001 /* Internal NTP status and error estimates. */ 1002 extern int time_status; 1003 extern long time_esterror; 1004 1005 /* 1006 * Take a snapshot of sysclock data which can be used to compare system clocks 1007 * and generate timestamps after the fact. 1008 */ 1009 void 1010 sysclock_getsnapshot(struct sysclock_snap *clock_snap, int fast) 1011 { 1012 struct fbclock_info *fbi; 1013 struct timehands *th; 1014 struct bintime bt; 1015 unsigned int delta, gen; 1016 #ifdef FFCLOCK 1017 ffcounter ffcount; 1018 struct fftimehands *ffth; 1019 struct ffclock_info *ffi; 1020 struct ffclock_estimate cest; 1021 1022 ffi = &clock_snap->ff_info; 1023 #endif 1024 1025 fbi = &clock_snap->fb_info; 1026 delta = 0; 1027 1028 do { 1029 th = timehands; 1030 gen = atomic_load_acq_int(&th->th_generation); 1031 fbi->th_scale = th->th_scale; 1032 fbi->tick_time = th->th_offset; 1033 #ifdef FFCLOCK 1034 ffth = fftimehands; 1035 ffi->tick_time = ffth->tick_time_lerp; 1036 ffi->tick_time_lerp = ffth->tick_time_lerp; 1037 ffi->period = ffth->cest.period; 1038 ffi->period_lerp = ffth->period_lerp; 1039 clock_snap->ffcount = ffth->tick_ffcount; 1040 cest = ffth->cest; 1041 #endif 1042 if (!fast) 1043 delta = tc_delta(th); 1044 atomic_thread_fence_acq(); 1045 } while (gen == 0 || gen != th->th_generation); 1046 1047 clock_snap->delta = delta; 1048 clock_snap->sysclock_active = sysclock_active; 1049 1050 /* Record feedback clock status and error. */ 1051 clock_snap->fb_info.status = time_status; 1052 /* XXX: Very crude estimate of feedback clock error. */ 1053 bt.sec = time_esterror / 1000000; 1054 bt.frac = ((time_esterror - bt.sec) * 1000000) * 1055 (uint64_t)18446744073709ULL; 1056 clock_snap->fb_info.error = bt; 1057 1058 #ifdef FFCLOCK 1059 if (!fast) 1060 clock_snap->ffcount += delta; 1061 1062 /* Record feed-forward clock leap second adjustment. */ 1063 ffi->leapsec_adjustment = cest.leapsec_total; 1064 if (clock_snap->ffcount > cest.leapsec_next) 1065 ffi->leapsec_adjustment -= cest.leapsec; 1066 1067 /* Record feed-forward clock status and error. */ 1068 clock_snap->ff_info.status = cest.status; 1069 ffcount = clock_snap->ffcount - cest.update_ffcount; 1070 ffclock_convert_delta(ffcount, cest.period, &bt); 1071 /* 18446744073709 = int(2^64/1e12), err_bound_rate in [ps/s]. */ 1072 bintime_mul(&bt, cest.errb_rate * (uint64_t)18446744073709ULL); 1073 /* 18446744073 = int(2^64 / 1e9), since err_abs in [ns]. */ 1074 bintime_addx(&bt, cest.errb_abs * (uint64_t)18446744073ULL); 1075 clock_snap->ff_info.error = bt; 1076 #endif 1077 } 1078 1079 /* 1080 * Convert a sysclock snapshot into a struct bintime based on the specified 1081 * clock source and flags. 1082 */ 1083 int 1084 sysclock_snap2bintime(struct sysclock_snap *cs, struct bintime *bt, 1085 int whichclock, uint32_t flags) 1086 { 1087 struct bintime boottimebin; 1088 #ifdef FFCLOCK 1089 struct bintime bt2; 1090 uint64_t period; 1091 #endif 1092 1093 switch (whichclock) { 1094 case SYSCLOCK_FBCK: 1095 *bt = cs->fb_info.tick_time; 1096 1097 /* If snapshot was created with !fast, delta will be >0. */ 1098 if (cs->delta > 0) 1099 bintime_addx(bt, cs->fb_info.th_scale * cs->delta); 1100 1101 if ((flags & FBCLOCK_UPTIME) == 0) { 1102 getboottimebin(&boottimebin); 1103 bintime_add(bt, &boottimebin); 1104 } 1105 break; 1106 #ifdef FFCLOCK 1107 case SYSCLOCK_FFWD: 1108 if (flags & FFCLOCK_LERP) { 1109 *bt = cs->ff_info.tick_time_lerp; 1110 period = cs->ff_info.period_lerp; 1111 } else { 1112 *bt = cs->ff_info.tick_time; 1113 period = cs->ff_info.period; 1114 } 1115 1116 /* If snapshot was created with !fast, delta will be >0. */ 1117 if (cs->delta > 0) { 1118 ffclock_convert_delta(cs->delta, period, &bt2); 1119 bintime_add(bt, &bt2); 1120 } 1121 1122 /* Leap second adjustment. */ 1123 if (flags & FFCLOCK_LEAPSEC) 1124 bt->sec -= cs->ff_info.leapsec_adjustment; 1125 1126 /* Boot time adjustment, for uptime/monotonic clocks. */ 1127 if (flags & FFCLOCK_UPTIME) 1128 bintime_sub(bt, &ffclock_boottime); 1129 break; 1130 #endif 1131 default: 1132 return (EINVAL); 1133 break; 1134 } 1135 1136 return (0); 1137 } 1138 1139 /* 1140 * Initialize a new timecounter and possibly use it. 1141 */ 1142 void 1143 tc_init(struct timecounter *tc) 1144 { 1145 u_int u; 1146 struct sysctl_oid *tc_root; 1147 1148 u = tc->tc_frequency / tc->tc_counter_mask; 1149 /* XXX: We need some margin here, 10% is a guess */ 1150 u *= 11; 1151 u /= 10; 1152 if (u > hz && tc->tc_quality >= 0) { 1153 tc->tc_quality = -2000; 1154 if (bootverbose) { 1155 printf("Timecounter \"%s\" frequency %ju Hz", 1156 tc->tc_name, (uintmax_t)tc->tc_frequency); 1157 printf(" -- Insufficient hz, needs at least %u\n", u); 1158 } 1159 } else if (tc->tc_quality >= 0 || bootverbose) { 1160 printf("Timecounter \"%s\" frequency %ju Hz quality %d\n", 1161 tc->tc_name, (uintmax_t)tc->tc_frequency, 1162 tc->tc_quality); 1163 } 1164 1165 tc->tc_next = timecounters; 1166 timecounters = tc; 1167 /* 1168 * Set up sysctl tree for this counter. 1169 */ 1170 tc_root = SYSCTL_ADD_NODE_WITH_LABEL(NULL, 1171 SYSCTL_STATIC_CHILDREN(_kern_timecounter_tc), OID_AUTO, tc->tc_name, 1172 CTLFLAG_RW, 0, "timecounter description", "timecounter"); 1173 SYSCTL_ADD_UINT(NULL, SYSCTL_CHILDREN(tc_root), OID_AUTO, 1174 "mask", CTLFLAG_RD, &(tc->tc_counter_mask), 0, 1175 "mask for implemented bits"); 1176 SYSCTL_ADD_PROC(NULL, SYSCTL_CHILDREN(tc_root), OID_AUTO, 1177 "counter", CTLTYPE_UINT | CTLFLAG_RD, tc, sizeof(*tc), 1178 sysctl_kern_timecounter_get, "IU", "current timecounter value"); 1179 SYSCTL_ADD_PROC(NULL, SYSCTL_CHILDREN(tc_root), OID_AUTO, 1180 "frequency", CTLTYPE_U64 | CTLFLAG_RD, tc, sizeof(*tc), 1181 sysctl_kern_timecounter_freq, "QU", "timecounter frequency"); 1182 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(tc_root), OID_AUTO, 1183 "quality", CTLFLAG_RD, &(tc->tc_quality), 0, 1184 "goodness of time counter"); 1185 /* 1186 * Do not automatically switch if the current tc was specifically 1187 * chosen. Never automatically use a timecounter with negative quality. 1188 * Even though we run on the dummy counter, switching here may be 1189 * worse since this timecounter may not be monotonic. 1190 */ 1191 if (tc_chosen) 1192 return; 1193 if (tc->tc_quality < 0) 1194 return; 1195 if (tc->tc_quality < timecounter->tc_quality) 1196 return; 1197 if (tc->tc_quality == timecounter->tc_quality && 1198 tc->tc_frequency < timecounter->tc_frequency) 1199 return; 1200 (void)tc->tc_get_timecount(tc); 1201 (void)tc->tc_get_timecount(tc); 1202 timecounter = tc; 1203 } 1204 1205 /* Report the frequency of the current timecounter. */ 1206 uint64_t 1207 tc_getfrequency(void) 1208 { 1209 1210 return (timehands->th_counter->tc_frequency); 1211 } 1212 1213 static bool 1214 sleeping_on_old_rtc(struct thread *td) 1215 { 1216 1217 /* 1218 * td_rtcgen is modified by curthread when it is running, 1219 * and by other threads in this function. By finding the thread 1220 * on a sleepqueue and holding the lock on the sleepqueue 1221 * chain, we guarantee that the thread is not running and that 1222 * modifying td_rtcgen is safe. Setting td_rtcgen to zero informs 1223 * the thread that it was woken due to a real-time clock adjustment. 1224 * (The declaration of td_rtcgen refers to this comment.) 1225 */ 1226 if (td->td_rtcgen != 0 && td->td_rtcgen != rtc_generation) { 1227 td->td_rtcgen = 0; 1228 return (true); 1229 } 1230 return (false); 1231 } 1232 1233 static struct mtx tc_setclock_mtx; 1234 MTX_SYSINIT(tc_setclock_init, &tc_setclock_mtx, "tcsetc", MTX_SPIN); 1235 1236 /* 1237 * Step our concept of UTC. This is done by modifying our estimate of 1238 * when we booted. 1239 */ 1240 void 1241 tc_setclock(struct timespec *ts) 1242 { 1243 struct timespec tbef, taft; 1244 struct bintime bt, bt2; 1245 1246 timespec2bintime(ts, &bt); 1247 nanotime(&tbef); 1248 mtx_lock_spin(&tc_setclock_mtx); 1249 cpu_tick_calibrate(1); 1250 binuptime(&bt2); 1251 bintime_sub(&bt, &bt2); 1252 1253 /* XXX fiddle all the little crinkly bits around the fiords... */ 1254 tc_windup(&bt); 1255 mtx_unlock_spin(&tc_setclock_mtx); 1256 1257 /* Avoid rtc_generation == 0, since td_rtcgen == 0 is special. */ 1258 atomic_add_rel_int(&rtc_generation, 2); 1259 sleepq_chains_remove_matching(sleeping_on_old_rtc); 1260 if (timestepwarnings) { 1261 nanotime(&taft); 1262 log(LOG_INFO, 1263 "Time stepped from %jd.%09ld to %jd.%09ld (%jd.%09ld)\n", 1264 (intmax_t)tbef.tv_sec, tbef.tv_nsec, 1265 (intmax_t)taft.tv_sec, taft.tv_nsec, 1266 (intmax_t)ts->tv_sec, ts->tv_nsec); 1267 } 1268 } 1269 1270 /* 1271 * Initialize the next struct timehands in the ring and make 1272 * it the active timehands. Along the way we might switch to a different 1273 * timecounter and/or do seconds processing in NTP. Slightly magic. 1274 */ 1275 static void 1276 tc_windup(struct bintime *new_boottimebin) 1277 { 1278 struct bintime bt; 1279 struct timehands *th, *tho; 1280 uint64_t scale; 1281 u_int delta, ncount, ogen; 1282 int i; 1283 time_t t; 1284 1285 /* 1286 * Make the next timehands a copy of the current one, but do 1287 * not overwrite the generation or next pointer. While we 1288 * update the contents, the generation must be zero. We need 1289 * to ensure that the zero generation is visible before the 1290 * data updates become visible, which requires release fence. 1291 * For similar reasons, re-reading of the generation after the 1292 * data is read should use acquire fence. 1293 */ 1294 tho = timehands; 1295 th = tho->th_next; 1296 ogen = th->th_generation; 1297 th->th_generation = 0; 1298 atomic_thread_fence_rel(); 1299 memcpy(th, tho, offsetof(struct timehands, th_generation)); 1300 if (new_boottimebin != NULL) 1301 th->th_boottime = *new_boottimebin; 1302 1303 /* 1304 * Capture a timecounter delta on the current timecounter and if 1305 * changing timecounters, a counter value from the new timecounter. 1306 * Update the offset fields accordingly. 1307 */ 1308 delta = tc_delta(th); 1309 if (th->th_counter != timecounter) 1310 ncount = timecounter->tc_get_timecount(timecounter); 1311 else 1312 ncount = 0; 1313 #ifdef FFCLOCK 1314 ffclock_windup(delta); 1315 #endif 1316 th->th_offset_count += delta; 1317 th->th_offset_count &= th->th_counter->tc_counter_mask; 1318 while (delta > th->th_counter->tc_frequency) { 1319 /* Eat complete unadjusted seconds. */ 1320 delta -= th->th_counter->tc_frequency; 1321 th->th_offset.sec++; 1322 } 1323 if ((delta > th->th_counter->tc_frequency / 2) && 1324 (th->th_scale * delta < ((uint64_t)1 << 63))) { 1325 /* The product th_scale * delta just barely overflows. */ 1326 th->th_offset.sec++; 1327 } 1328 bintime_addx(&th->th_offset, th->th_scale * delta); 1329 1330 /* 1331 * Hardware latching timecounters may not generate interrupts on 1332 * PPS events, so instead we poll them. There is a finite risk that 1333 * the hardware might capture a count which is later than the one we 1334 * got above, and therefore possibly in the next NTP second which might 1335 * have a different rate than the current NTP second. It doesn't 1336 * matter in practice. 1337 */ 1338 if (tho->th_counter->tc_poll_pps) 1339 tho->th_counter->tc_poll_pps(tho->th_counter); 1340 1341 /* 1342 * Deal with NTP second processing. The for loop normally 1343 * iterates at most once, but in extreme situations it might 1344 * keep NTP sane if timeouts are not run for several seconds. 1345 * At boot, the time step can be large when the TOD hardware 1346 * has been read, so on really large steps, we call 1347 * ntp_update_second only twice. We need to call it twice in 1348 * case we missed a leap second. 1349 */ 1350 bt = th->th_offset; 1351 bintime_add(&bt, &th->th_boottime); 1352 i = bt.sec - tho->th_microtime.tv_sec; 1353 if (i > LARGE_STEP) 1354 i = 2; 1355 for (; i > 0; i--) { 1356 t = bt.sec; 1357 ntp_update_second(&th->th_adjustment, &bt.sec); 1358 if (bt.sec != t) 1359 th->th_boottime.sec += bt.sec - t; 1360 } 1361 /* Update the UTC timestamps used by the get*() functions. */ 1362 th->th_bintime = bt; 1363 bintime2timeval(&bt, &th->th_microtime); 1364 bintime2timespec(&bt, &th->th_nanotime); 1365 1366 /* Now is a good time to change timecounters. */ 1367 if (th->th_counter != timecounter) { 1368 #ifndef __arm__ 1369 if ((timecounter->tc_flags & TC_FLAGS_C2STOP) != 0) 1370 cpu_disable_c2_sleep++; 1371 if ((th->th_counter->tc_flags & TC_FLAGS_C2STOP) != 0) 1372 cpu_disable_c2_sleep--; 1373 #endif 1374 th->th_counter = timecounter; 1375 th->th_offset_count = ncount; 1376 tc_min_ticktock_freq = max(1, timecounter->tc_frequency / 1377 (((uint64_t)timecounter->tc_counter_mask + 1) / 3)); 1378 #ifdef FFCLOCK 1379 ffclock_change_tc(th); 1380 #endif 1381 } 1382 1383 /*- 1384 * Recalculate the scaling factor. We want the number of 1/2^64 1385 * fractions of a second per period of the hardware counter, taking 1386 * into account the th_adjustment factor which the NTP PLL/adjtime(2) 1387 * processing provides us with. 1388 * 1389 * The th_adjustment is nanoseconds per second with 32 bit binary 1390 * fraction and we want 64 bit binary fraction of second: 1391 * 1392 * x = a * 2^32 / 10^9 = a * 4.294967296 1393 * 1394 * The range of th_adjustment is +/- 5000PPM so inside a 64bit int 1395 * we can only multiply by about 850 without overflowing, that 1396 * leaves no suitably precise fractions for multiply before divide. 1397 * 1398 * Divide before multiply with a fraction of 2199/512 results in a 1399 * systematic undercompensation of 10PPM of th_adjustment. On a 1400 * 5000PPM adjustment this is a 0.05PPM error. This is acceptable. 1401 * 1402 * We happily sacrifice the lowest of the 64 bits of our result 1403 * to the goddess of code clarity. 1404 * 1405 */ 1406 scale = (uint64_t)1 << 63; 1407 scale += (th->th_adjustment / 1024) * 2199; 1408 scale /= th->th_counter->tc_frequency; 1409 th->th_scale = scale * 2; 1410 th->th_large_delta = MIN(((uint64_t)1 << 63) / scale, UINT_MAX); 1411 1412 /* 1413 * Now that the struct timehands is again consistent, set the new 1414 * generation number, making sure to not make it zero. 1415 */ 1416 if (++ogen == 0) 1417 ogen = 1; 1418 atomic_store_rel_int(&th->th_generation, ogen); 1419 1420 /* Go live with the new struct timehands. */ 1421 #ifdef FFCLOCK 1422 switch (sysclock_active) { 1423 case SYSCLOCK_FBCK: 1424 #endif 1425 time_second = th->th_microtime.tv_sec; 1426 time_uptime = th->th_offset.sec; 1427 #ifdef FFCLOCK 1428 break; 1429 case SYSCLOCK_FFWD: 1430 time_second = fftimehands->tick_time_lerp.sec; 1431 time_uptime = fftimehands->tick_time_lerp.sec - ffclock_boottime.sec; 1432 break; 1433 } 1434 #endif 1435 1436 timehands = th; 1437 timekeep_push_vdso(); 1438 } 1439 1440 /* Report or change the active timecounter hardware. */ 1441 static int 1442 sysctl_kern_timecounter_hardware(SYSCTL_HANDLER_ARGS) 1443 { 1444 char newname[32]; 1445 struct timecounter *newtc, *tc; 1446 int error; 1447 1448 tc = timecounter; 1449 strlcpy(newname, tc->tc_name, sizeof(newname)); 1450 1451 error = sysctl_handle_string(oidp, &newname[0], sizeof(newname), req); 1452 if (error != 0 || req->newptr == NULL) 1453 return (error); 1454 /* Record that the tc in use now was specifically chosen. */ 1455 tc_chosen = 1; 1456 if (strcmp(newname, tc->tc_name) == 0) 1457 return (0); 1458 for (newtc = timecounters; newtc != NULL; newtc = newtc->tc_next) { 1459 if (strcmp(newname, newtc->tc_name) != 0) 1460 continue; 1461 1462 /* Warm up new timecounter. */ 1463 (void)newtc->tc_get_timecount(newtc); 1464 (void)newtc->tc_get_timecount(newtc); 1465 1466 timecounter = newtc; 1467 1468 /* 1469 * The vdso timehands update is deferred until the next 1470 * 'tc_windup()'. 1471 * 1472 * This is prudent given that 'timekeep_push_vdso()' does not 1473 * use any locking and that it can be called in hard interrupt 1474 * context via 'tc_windup()'. 1475 */ 1476 return (0); 1477 } 1478 return (EINVAL); 1479 } 1480 1481 SYSCTL_PROC(_kern_timecounter, OID_AUTO, hardware, CTLTYPE_STRING | CTLFLAG_RW, 1482 0, 0, sysctl_kern_timecounter_hardware, "A", 1483 "Timecounter hardware selected"); 1484 1485 /* Report the available timecounter hardware. */ 1486 static int 1487 sysctl_kern_timecounter_choice(SYSCTL_HANDLER_ARGS) 1488 { 1489 struct sbuf sb; 1490 struct timecounter *tc; 1491 int error; 1492 1493 sbuf_new_for_sysctl(&sb, NULL, 0, req); 1494 for (tc = timecounters; tc != NULL; tc = tc->tc_next) { 1495 if (tc != timecounters) 1496 sbuf_putc(&sb, ' '); 1497 sbuf_printf(&sb, "%s(%d)", tc->tc_name, tc->tc_quality); 1498 } 1499 error = sbuf_finish(&sb); 1500 sbuf_delete(&sb); 1501 return (error); 1502 } 1503 1504 SYSCTL_PROC(_kern_timecounter, OID_AUTO, choice, CTLTYPE_STRING | CTLFLAG_RD, 1505 0, 0, sysctl_kern_timecounter_choice, "A", "Timecounter hardware detected"); 1506 1507 /* 1508 * RFC 2783 PPS-API implementation. 1509 */ 1510 1511 /* 1512 * Return true if the driver is aware of the abi version extensions in the 1513 * pps_state structure, and it supports at least the given abi version number. 1514 */ 1515 static inline int 1516 abi_aware(struct pps_state *pps, int vers) 1517 { 1518 1519 return ((pps->kcmode & KCMODE_ABIFLAG) && pps->driver_abi >= vers); 1520 } 1521 1522 static int 1523 pps_fetch(struct pps_fetch_args *fapi, struct pps_state *pps) 1524 { 1525 int err, timo; 1526 pps_seq_t aseq, cseq; 1527 struct timeval tv; 1528 1529 if (fapi->tsformat && fapi->tsformat != PPS_TSFMT_TSPEC) 1530 return (EINVAL); 1531 1532 /* 1533 * If no timeout is requested, immediately return whatever values were 1534 * most recently captured. If timeout seconds is -1, that's a request 1535 * to block without a timeout. WITNESS won't let us sleep forever 1536 * without a lock (we really don't need a lock), so just repeatedly 1537 * sleep a long time. 1538 */ 1539 if (fapi->timeout.tv_sec || fapi->timeout.tv_nsec) { 1540 if (fapi->timeout.tv_sec == -1) 1541 timo = 0x7fffffff; 1542 else { 1543 tv.tv_sec = fapi->timeout.tv_sec; 1544 tv.tv_usec = fapi->timeout.tv_nsec / 1000; 1545 timo = tvtohz(&tv); 1546 } 1547 aseq = atomic_load_int(&pps->ppsinfo.assert_sequence); 1548 cseq = atomic_load_int(&pps->ppsinfo.clear_sequence); 1549 while (aseq == atomic_load_int(&pps->ppsinfo.assert_sequence) && 1550 cseq == atomic_load_int(&pps->ppsinfo.clear_sequence)) { 1551 if (abi_aware(pps, 1) && pps->driver_mtx != NULL) { 1552 if (pps->flags & PPSFLAG_MTX_SPIN) { 1553 err = msleep_spin(pps, pps->driver_mtx, 1554 "ppsfch", timo); 1555 } else { 1556 err = msleep(pps, pps->driver_mtx, PCATCH, 1557 "ppsfch", timo); 1558 } 1559 } else { 1560 err = tsleep(pps, PCATCH, "ppsfch", timo); 1561 } 1562 if (err == EWOULDBLOCK) { 1563 if (fapi->timeout.tv_sec == -1) { 1564 continue; 1565 } else { 1566 return (ETIMEDOUT); 1567 } 1568 } else if (err != 0) { 1569 return (err); 1570 } 1571 } 1572 } 1573 1574 pps->ppsinfo.current_mode = pps->ppsparam.mode; 1575 fapi->pps_info_buf = pps->ppsinfo; 1576 1577 return (0); 1578 } 1579 1580 int 1581 pps_ioctl(u_long cmd, caddr_t data, struct pps_state *pps) 1582 { 1583 pps_params_t *app; 1584 struct pps_fetch_args *fapi; 1585 #ifdef FFCLOCK 1586 struct pps_fetch_ffc_args *fapi_ffc; 1587 #endif 1588 #ifdef PPS_SYNC 1589 struct pps_kcbind_args *kapi; 1590 #endif 1591 1592 KASSERT(pps != NULL, ("NULL pps pointer in pps_ioctl")); 1593 switch (cmd) { 1594 case PPS_IOC_CREATE: 1595 return (0); 1596 case PPS_IOC_DESTROY: 1597 return (0); 1598 case PPS_IOC_SETPARAMS: 1599 app = (pps_params_t *)data; 1600 if (app->mode & ~pps->ppscap) 1601 return (EINVAL); 1602 #ifdef FFCLOCK 1603 /* Ensure only a single clock is selected for ffc timestamp. */ 1604 if ((app->mode & PPS_TSCLK_MASK) == PPS_TSCLK_MASK) 1605 return (EINVAL); 1606 #endif 1607 pps->ppsparam = *app; 1608 return (0); 1609 case PPS_IOC_GETPARAMS: 1610 app = (pps_params_t *)data; 1611 *app = pps->ppsparam; 1612 app->api_version = PPS_API_VERS_1; 1613 return (0); 1614 case PPS_IOC_GETCAP: 1615 *(int*)data = pps->ppscap; 1616 return (0); 1617 case PPS_IOC_FETCH: 1618 fapi = (struct pps_fetch_args *)data; 1619 return (pps_fetch(fapi, pps)); 1620 #ifdef FFCLOCK 1621 case PPS_IOC_FETCH_FFCOUNTER: 1622 fapi_ffc = (struct pps_fetch_ffc_args *)data; 1623 if (fapi_ffc->tsformat && fapi_ffc->tsformat != 1624 PPS_TSFMT_TSPEC) 1625 return (EINVAL); 1626 if (fapi_ffc->timeout.tv_sec || fapi_ffc->timeout.tv_nsec) 1627 return (EOPNOTSUPP); 1628 pps->ppsinfo_ffc.current_mode = pps->ppsparam.mode; 1629 fapi_ffc->pps_info_buf_ffc = pps->ppsinfo_ffc; 1630 /* Overwrite timestamps if feedback clock selected. */ 1631 switch (pps->ppsparam.mode & PPS_TSCLK_MASK) { 1632 case PPS_TSCLK_FBCK: 1633 fapi_ffc->pps_info_buf_ffc.assert_timestamp = 1634 pps->ppsinfo.assert_timestamp; 1635 fapi_ffc->pps_info_buf_ffc.clear_timestamp = 1636 pps->ppsinfo.clear_timestamp; 1637 break; 1638 case PPS_TSCLK_FFWD: 1639 break; 1640 default: 1641 break; 1642 } 1643 return (0); 1644 #endif /* FFCLOCK */ 1645 case PPS_IOC_KCBIND: 1646 #ifdef PPS_SYNC 1647 kapi = (struct pps_kcbind_args *)data; 1648 /* XXX Only root should be able to do this */ 1649 if (kapi->tsformat && kapi->tsformat != PPS_TSFMT_TSPEC) 1650 return (EINVAL); 1651 if (kapi->kernel_consumer != PPS_KC_HARDPPS) 1652 return (EINVAL); 1653 if (kapi->edge & ~pps->ppscap) 1654 return (EINVAL); 1655 pps->kcmode = (kapi->edge & KCMODE_EDGEMASK) | 1656 (pps->kcmode & KCMODE_ABIFLAG); 1657 return (0); 1658 #else 1659 return (EOPNOTSUPP); 1660 #endif 1661 default: 1662 return (ENOIOCTL); 1663 } 1664 } 1665 1666 void 1667 pps_init(struct pps_state *pps) 1668 { 1669 pps->ppscap |= PPS_TSFMT_TSPEC | PPS_CANWAIT; 1670 if (pps->ppscap & PPS_CAPTUREASSERT) 1671 pps->ppscap |= PPS_OFFSETASSERT; 1672 if (pps->ppscap & PPS_CAPTURECLEAR) 1673 pps->ppscap |= PPS_OFFSETCLEAR; 1674 #ifdef FFCLOCK 1675 pps->ppscap |= PPS_TSCLK_MASK; 1676 #endif 1677 pps->kcmode &= ~KCMODE_ABIFLAG; 1678 } 1679 1680 void 1681 pps_init_abi(struct pps_state *pps) 1682 { 1683 1684 pps_init(pps); 1685 if (pps->driver_abi > 0) { 1686 pps->kcmode |= KCMODE_ABIFLAG; 1687 pps->kernel_abi = PPS_ABI_VERSION; 1688 } 1689 } 1690 1691 void 1692 pps_capture(struct pps_state *pps) 1693 { 1694 struct timehands *th; 1695 1696 KASSERT(pps != NULL, ("NULL pps pointer in pps_capture")); 1697 th = timehands; 1698 pps->capgen = atomic_load_acq_int(&th->th_generation); 1699 pps->capth = th; 1700 #ifdef FFCLOCK 1701 pps->capffth = fftimehands; 1702 #endif 1703 pps->capcount = th->th_counter->tc_get_timecount(th->th_counter); 1704 atomic_thread_fence_acq(); 1705 if (pps->capgen != th->th_generation) 1706 pps->capgen = 0; 1707 } 1708 1709 void 1710 pps_event(struct pps_state *pps, int event) 1711 { 1712 struct bintime bt; 1713 struct timespec ts, *tsp, *osp; 1714 u_int tcount, *pcount; 1715 int foff; 1716 pps_seq_t *pseq; 1717 #ifdef FFCLOCK 1718 struct timespec *tsp_ffc; 1719 pps_seq_t *pseq_ffc; 1720 ffcounter *ffcount; 1721 #endif 1722 #ifdef PPS_SYNC 1723 int fhard; 1724 #endif 1725 1726 KASSERT(pps != NULL, ("NULL pps pointer in pps_event")); 1727 /* Nothing to do if not currently set to capture this event type. */ 1728 if ((event & pps->ppsparam.mode) == 0) 1729 return; 1730 /* If the timecounter was wound up underneath us, bail out. */ 1731 if (pps->capgen == 0 || pps->capgen != 1732 atomic_load_acq_int(&pps->capth->th_generation)) 1733 return; 1734 1735 /* Things would be easier with arrays. */ 1736 if (event == PPS_CAPTUREASSERT) { 1737 tsp = &pps->ppsinfo.assert_timestamp; 1738 osp = &pps->ppsparam.assert_offset; 1739 foff = pps->ppsparam.mode & PPS_OFFSETASSERT; 1740 #ifdef PPS_SYNC 1741 fhard = pps->kcmode & PPS_CAPTUREASSERT; 1742 #endif 1743 pcount = &pps->ppscount[0]; 1744 pseq = &pps->ppsinfo.assert_sequence; 1745 #ifdef FFCLOCK 1746 ffcount = &pps->ppsinfo_ffc.assert_ffcount; 1747 tsp_ffc = &pps->ppsinfo_ffc.assert_timestamp; 1748 pseq_ffc = &pps->ppsinfo_ffc.assert_sequence; 1749 #endif 1750 } else { 1751 tsp = &pps->ppsinfo.clear_timestamp; 1752 osp = &pps->ppsparam.clear_offset; 1753 foff = pps->ppsparam.mode & PPS_OFFSETCLEAR; 1754 #ifdef PPS_SYNC 1755 fhard = pps->kcmode & PPS_CAPTURECLEAR; 1756 #endif 1757 pcount = &pps->ppscount[1]; 1758 pseq = &pps->ppsinfo.clear_sequence; 1759 #ifdef FFCLOCK 1760 ffcount = &pps->ppsinfo_ffc.clear_ffcount; 1761 tsp_ffc = &pps->ppsinfo_ffc.clear_timestamp; 1762 pseq_ffc = &pps->ppsinfo_ffc.clear_sequence; 1763 #endif 1764 } 1765 1766 /* 1767 * If the timecounter changed, we cannot compare the count values, so 1768 * we have to drop the rest of the PPS-stuff until the next event. 1769 */ 1770 if (pps->ppstc != pps->capth->th_counter) { 1771 pps->ppstc = pps->capth->th_counter; 1772 *pcount = pps->capcount; 1773 pps->ppscount[2] = pps->capcount; 1774 return; 1775 } 1776 1777 /* Convert the count to a timespec. */ 1778 tcount = pps->capcount - pps->capth->th_offset_count; 1779 tcount &= pps->capth->th_counter->tc_counter_mask; 1780 bt = pps->capth->th_bintime; 1781 bintime_addx(&bt, pps->capth->th_scale * tcount); 1782 bintime2timespec(&bt, &ts); 1783 1784 /* If the timecounter was wound up underneath us, bail out. */ 1785 atomic_thread_fence_acq(); 1786 if (pps->capgen != pps->capth->th_generation) 1787 return; 1788 1789 *pcount = pps->capcount; 1790 (*pseq)++; 1791 *tsp = ts; 1792 1793 if (foff) { 1794 timespecadd(tsp, osp, tsp); 1795 if (tsp->tv_nsec < 0) { 1796 tsp->tv_nsec += 1000000000; 1797 tsp->tv_sec -= 1; 1798 } 1799 } 1800 1801 #ifdef FFCLOCK 1802 *ffcount = pps->capffth->tick_ffcount + tcount; 1803 bt = pps->capffth->tick_time; 1804 ffclock_convert_delta(tcount, pps->capffth->cest.period, &bt); 1805 bintime_add(&bt, &pps->capffth->tick_time); 1806 bintime2timespec(&bt, &ts); 1807 (*pseq_ffc)++; 1808 *tsp_ffc = ts; 1809 #endif 1810 1811 #ifdef PPS_SYNC 1812 if (fhard) { 1813 uint64_t scale; 1814 1815 /* 1816 * Feed the NTP PLL/FLL. 1817 * The FLL wants to know how many (hardware) nanoseconds 1818 * elapsed since the previous event. 1819 */ 1820 tcount = pps->capcount - pps->ppscount[2]; 1821 pps->ppscount[2] = pps->capcount; 1822 tcount &= pps->capth->th_counter->tc_counter_mask; 1823 scale = (uint64_t)1 << 63; 1824 scale /= pps->capth->th_counter->tc_frequency; 1825 scale *= 2; 1826 bt.sec = 0; 1827 bt.frac = 0; 1828 bintime_addx(&bt, scale * tcount); 1829 bintime2timespec(&bt, &ts); 1830 hardpps(tsp, ts.tv_nsec + 1000000000 * ts.tv_sec); 1831 } 1832 #endif 1833 1834 /* Wakeup anyone sleeping in pps_fetch(). */ 1835 wakeup(pps); 1836 } 1837 1838 /* 1839 * Timecounters need to be updated every so often to prevent the hardware 1840 * counter from overflowing. Updating also recalculates the cached values 1841 * used by the get*() family of functions, so their precision depends on 1842 * the update frequency. 1843 */ 1844 1845 static int tc_tick; 1846 SYSCTL_INT(_kern_timecounter, OID_AUTO, tick, CTLFLAG_RD, &tc_tick, 0, 1847 "Approximate number of hardclock ticks in a millisecond"); 1848 1849 void 1850 tc_ticktock(int cnt) 1851 { 1852 static int count; 1853 1854 if (mtx_trylock_spin(&tc_setclock_mtx)) { 1855 count += cnt; 1856 if (count >= tc_tick) { 1857 count = 0; 1858 tc_windup(NULL); 1859 } 1860 mtx_unlock_spin(&tc_setclock_mtx); 1861 } 1862 } 1863 1864 static void __inline 1865 tc_adjprecision(void) 1866 { 1867 int t; 1868 1869 if (tc_timepercentage > 0) { 1870 t = (99 + tc_timepercentage) / tc_timepercentage; 1871 tc_precexp = fls(t + (t >> 1)) - 1; 1872 FREQ2BT(hz / tc_tick, &bt_timethreshold); 1873 FREQ2BT(hz, &bt_tickthreshold); 1874 bintime_shift(&bt_timethreshold, tc_precexp); 1875 bintime_shift(&bt_tickthreshold, tc_precexp); 1876 } else { 1877 tc_precexp = 31; 1878 bt_timethreshold.sec = INT_MAX; 1879 bt_timethreshold.frac = ~(uint64_t)0; 1880 bt_tickthreshold = bt_timethreshold; 1881 } 1882 sbt_timethreshold = bttosbt(bt_timethreshold); 1883 sbt_tickthreshold = bttosbt(bt_tickthreshold); 1884 } 1885 1886 static int 1887 sysctl_kern_timecounter_adjprecision(SYSCTL_HANDLER_ARGS) 1888 { 1889 int error, val; 1890 1891 val = tc_timepercentage; 1892 error = sysctl_handle_int(oidp, &val, 0, req); 1893 if (error != 0 || req->newptr == NULL) 1894 return (error); 1895 tc_timepercentage = val; 1896 if (cold) 1897 goto done; 1898 tc_adjprecision(); 1899 done: 1900 return (0); 1901 } 1902 1903 /* Set up the requested number of timehands. */ 1904 static void 1905 inittimehands(void *dummy) 1906 { 1907 struct timehands *thp; 1908 int i; 1909 1910 TUNABLE_INT_FETCH("kern.timecounter.timehands_count", 1911 &timehands_count); 1912 if (timehands_count < 1) 1913 timehands_count = 1; 1914 if (timehands_count > nitems(ths)) 1915 timehands_count = nitems(ths); 1916 for (i = 1, thp = &ths[0]; i < timehands_count; thp = &ths[i++]) 1917 thp->th_next = &ths[i]; 1918 thp->th_next = &ths[0]; 1919 } 1920 SYSINIT(timehands, SI_SUB_TUNABLES, SI_ORDER_ANY, inittimehands, NULL); 1921 1922 static void 1923 inittimecounter(void *dummy) 1924 { 1925 u_int p; 1926 int tick_rate; 1927 1928 /* 1929 * Set the initial timeout to 1930 * max(1, <approx. number of hardclock ticks in a millisecond>). 1931 * People should probably not use the sysctl to set the timeout 1932 * to smaller than its initial value, since that value is the 1933 * smallest reasonable one. If they want better timestamps they 1934 * should use the non-"get"* functions. 1935 */ 1936 if (hz > 1000) 1937 tc_tick = (hz + 500) / 1000; 1938 else 1939 tc_tick = 1; 1940 tc_adjprecision(); 1941 FREQ2BT(hz, &tick_bt); 1942 tick_sbt = bttosbt(tick_bt); 1943 tick_rate = hz / tc_tick; 1944 FREQ2BT(tick_rate, &tc_tick_bt); 1945 tc_tick_sbt = bttosbt(tc_tick_bt); 1946 p = (tc_tick * 1000000) / hz; 1947 printf("Timecounters tick every %d.%03u msec\n", p / 1000, p % 1000); 1948 1949 #ifdef FFCLOCK 1950 ffclock_init(); 1951 #endif 1952 1953 /* warm up new timecounter (again) and get rolling. */ 1954 (void)timecounter->tc_get_timecount(timecounter); 1955 (void)timecounter->tc_get_timecount(timecounter); 1956 mtx_lock_spin(&tc_setclock_mtx); 1957 tc_windup(NULL); 1958 mtx_unlock_spin(&tc_setclock_mtx); 1959 } 1960 1961 SYSINIT(timecounter, SI_SUB_CLOCKS, SI_ORDER_SECOND, inittimecounter, NULL); 1962 1963 /* Cpu tick handling -------------------------------------------------*/ 1964 1965 static int cpu_tick_variable; 1966 static uint64_t cpu_tick_frequency; 1967 1968 DPCPU_DEFINE_STATIC(uint64_t, tc_cpu_ticks_base); 1969 DPCPU_DEFINE_STATIC(unsigned, tc_cpu_ticks_last); 1970 1971 static uint64_t 1972 tc_cpu_ticks(void) 1973 { 1974 struct timecounter *tc; 1975 uint64_t res, *base; 1976 unsigned u, *last; 1977 1978 critical_enter(); 1979 base = DPCPU_PTR(tc_cpu_ticks_base); 1980 last = DPCPU_PTR(tc_cpu_ticks_last); 1981 tc = timehands->th_counter; 1982 u = tc->tc_get_timecount(tc) & tc->tc_counter_mask; 1983 if (u < *last) 1984 *base += (uint64_t)tc->tc_counter_mask + 1; 1985 *last = u; 1986 res = u + *base; 1987 critical_exit(); 1988 return (res); 1989 } 1990 1991 void 1992 cpu_tick_calibration(void) 1993 { 1994 static time_t last_calib; 1995 1996 if (time_uptime != last_calib && !(time_uptime & 0xf)) { 1997 cpu_tick_calibrate(0); 1998 last_calib = time_uptime; 1999 } 2000 } 2001 2002 /* 2003 * This function gets called every 16 seconds on only one designated 2004 * CPU in the system from hardclock() via cpu_tick_calibration()(). 2005 * 2006 * Whenever the real time clock is stepped we get called with reset=1 2007 * to make sure we handle suspend/resume and similar events correctly. 2008 */ 2009 2010 static void 2011 cpu_tick_calibrate(int reset) 2012 { 2013 static uint64_t c_last; 2014 uint64_t c_this, c_delta; 2015 static struct bintime t_last; 2016 struct bintime t_this, t_delta; 2017 uint32_t divi; 2018 2019 if (reset) { 2020 /* The clock was stepped, abort & reset */ 2021 t_last.sec = 0; 2022 return; 2023 } 2024 2025 /* we don't calibrate fixed rate cputicks */ 2026 if (!cpu_tick_variable) 2027 return; 2028 2029 getbinuptime(&t_this); 2030 c_this = cpu_ticks(); 2031 if (t_last.sec != 0) { 2032 c_delta = c_this - c_last; 2033 t_delta = t_this; 2034 bintime_sub(&t_delta, &t_last); 2035 /* 2036 * Headroom: 2037 * 2^(64-20) / 16[s] = 2038 * 2^(44) / 16[s] = 2039 * 17.592.186.044.416 / 16 = 2040 * 1.099.511.627.776 [Hz] 2041 */ 2042 divi = t_delta.sec << 20; 2043 divi |= t_delta.frac >> (64 - 20); 2044 c_delta <<= 20; 2045 c_delta /= divi; 2046 if (c_delta > cpu_tick_frequency) { 2047 if (0 && bootverbose) 2048 printf("cpu_tick increased to %ju Hz\n", 2049 c_delta); 2050 cpu_tick_frequency = c_delta; 2051 } 2052 } 2053 c_last = c_this; 2054 t_last = t_this; 2055 } 2056 2057 void 2058 set_cputicker(cpu_tick_f *func, uint64_t freq, unsigned var) 2059 { 2060 2061 if (func == NULL) { 2062 cpu_ticks = tc_cpu_ticks; 2063 } else { 2064 cpu_tick_frequency = freq; 2065 cpu_tick_variable = var; 2066 cpu_ticks = func; 2067 } 2068 } 2069 2070 uint64_t 2071 cpu_tickrate(void) 2072 { 2073 2074 if (cpu_ticks == tc_cpu_ticks) 2075 return (tc_getfrequency()); 2076 return (cpu_tick_frequency); 2077 } 2078 2079 /* 2080 * We need to be slightly careful converting cputicks to microseconds. 2081 * There is plenty of margin in 64 bits of microseconds (half a million 2082 * years) and in 64 bits at 4 GHz (146 years), but if we do a multiply 2083 * before divide conversion (to retain precision) we find that the 2084 * margin shrinks to 1.5 hours (one millionth of 146y). 2085 * With a three prong approach we never lose significant bits, no 2086 * matter what the cputick rate and length of timeinterval is. 2087 */ 2088 2089 uint64_t 2090 cputick2usec(uint64_t tick) 2091 { 2092 2093 if (tick > 18446744073709551LL) /* floor(2^64 / 1000) */ 2094 return (tick / (cpu_tickrate() / 1000000LL)); 2095 else if (tick > 18446744073709LL) /* floor(2^64 / 1000000) */ 2096 return ((tick * 1000LL) / (cpu_tickrate() / 1000LL)); 2097 else 2098 return ((tick * 1000000LL) / cpu_tickrate()); 2099 } 2100 2101 cpu_tick_f *cpu_ticks = tc_cpu_ticks; 2102 2103 static int vdso_th_enable = 1; 2104 static int 2105 sysctl_fast_gettime(SYSCTL_HANDLER_ARGS) 2106 { 2107 int old_vdso_th_enable, error; 2108 2109 old_vdso_th_enable = vdso_th_enable; 2110 error = sysctl_handle_int(oidp, &old_vdso_th_enable, 0, req); 2111 if (error != 0) 2112 return (error); 2113 vdso_th_enable = old_vdso_th_enable; 2114 return (0); 2115 } 2116 SYSCTL_PROC(_kern_timecounter, OID_AUTO, fast_gettime, 2117 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 2118 NULL, 0, sysctl_fast_gettime, "I", "Enable fast time of day"); 2119 2120 uint32_t 2121 tc_fill_vdso_timehands(struct vdso_timehands *vdso_th) 2122 { 2123 struct timehands *th; 2124 uint32_t enabled; 2125 2126 th = timehands; 2127 vdso_th->th_scale = th->th_scale; 2128 vdso_th->th_offset_count = th->th_offset_count; 2129 vdso_th->th_counter_mask = th->th_counter->tc_counter_mask; 2130 vdso_th->th_offset = th->th_offset; 2131 vdso_th->th_boottime = th->th_boottime; 2132 if (th->th_counter->tc_fill_vdso_timehands != NULL) { 2133 enabled = th->th_counter->tc_fill_vdso_timehands(vdso_th, 2134 th->th_counter); 2135 } else 2136 enabled = 0; 2137 if (!vdso_th_enable) 2138 enabled = 0; 2139 return (enabled); 2140 } 2141 2142 #ifdef COMPAT_FREEBSD32 2143 uint32_t 2144 tc_fill_vdso_timehands32(struct vdso_timehands32 *vdso_th32) 2145 { 2146 struct timehands *th; 2147 uint32_t enabled; 2148 2149 th = timehands; 2150 *(uint64_t *)&vdso_th32->th_scale[0] = th->th_scale; 2151 vdso_th32->th_offset_count = th->th_offset_count; 2152 vdso_th32->th_counter_mask = th->th_counter->tc_counter_mask; 2153 vdso_th32->th_offset.sec = th->th_offset.sec; 2154 *(uint64_t *)&vdso_th32->th_offset.frac[0] = th->th_offset.frac; 2155 vdso_th32->th_boottime.sec = th->th_boottime.sec; 2156 *(uint64_t *)&vdso_th32->th_boottime.frac[0] = th->th_boottime.frac; 2157 if (th->th_counter->tc_fill_vdso_timehands32 != NULL) { 2158 enabled = th->th_counter->tc_fill_vdso_timehands32(vdso_th32, 2159 th->th_counter); 2160 } else 2161 enabled = 0; 2162 if (!vdso_th_enable) 2163 enabled = 0; 2164 return (enabled); 2165 } 2166 #endif 2167