1 /* 2 * linux/kernel/time/clocksource.c 3 * 4 * This file contains the functions which manage clocksource drivers. 5 * 6 * Copyright (C) 2004, 2005 IBM, John Stultz (johnstul@us.ibm.com) 7 * 8 * This program is free software; you can redistribute it and/or modify 9 * it under the terms of the GNU General Public License as published by 10 * the Free Software Foundation; either version 2 of the License, or 11 * (at your option) any later version. 12 * 13 * This program is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 * GNU General Public License for more details. 17 * 18 * You should have received a copy of the GNU General Public License 19 * along with this program; if not, write to the Free Software 20 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 21 * 22 * TODO WishList: 23 * o Allow clocksource drivers to be unregistered 24 */ 25 26 #include <linux/device.h> 27 #include <linux/clocksource.h> 28 #include <linux/init.h> 29 #include <linux/module.h> 30 #include <linux/sched.h> /* for spin_unlock_irq() using preempt_count() m68k */ 31 #include <linux/tick.h> 32 #include <linux/kthread.h> 33 34 #include "tick-internal.h" 35 36 void timecounter_init(struct timecounter *tc, 37 const struct cyclecounter *cc, 38 u64 start_tstamp) 39 { 40 tc->cc = cc; 41 tc->cycle_last = cc->read(cc); 42 tc->nsec = start_tstamp; 43 } 44 EXPORT_SYMBOL_GPL(timecounter_init); 45 46 /** 47 * timecounter_read_delta - get nanoseconds since last call of this function 48 * @tc: Pointer to time counter 49 * 50 * When the underlying cycle counter runs over, this will be handled 51 * correctly as long as it does not run over more than once between 52 * calls. 53 * 54 * The first call to this function for a new time counter initializes 55 * the time tracking and returns an undefined result. 56 */ 57 static u64 timecounter_read_delta(struct timecounter *tc) 58 { 59 cycle_t cycle_now, cycle_delta; 60 u64 ns_offset; 61 62 /* read cycle counter: */ 63 cycle_now = tc->cc->read(tc->cc); 64 65 /* calculate the delta since the last timecounter_read_delta(): */ 66 cycle_delta = (cycle_now - tc->cycle_last) & tc->cc->mask; 67 68 /* convert to nanoseconds: */ 69 ns_offset = cyclecounter_cyc2ns(tc->cc, cycle_delta); 70 71 /* update time stamp of timecounter_read_delta() call: */ 72 tc->cycle_last = cycle_now; 73 74 return ns_offset; 75 } 76 77 u64 timecounter_read(struct timecounter *tc) 78 { 79 u64 nsec; 80 81 /* increment time by nanoseconds since last call */ 82 nsec = timecounter_read_delta(tc); 83 nsec += tc->nsec; 84 tc->nsec = nsec; 85 86 return nsec; 87 } 88 EXPORT_SYMBOL_GPL(timecounter_read); 89 90 u64 timecounter_cyc2time(struct timecounter *tc, 91 cycle_t cycle_tstamp) 92 { 93 u64 cycle_delta = (cycle_tstamp - tc->cycle_last) & tc->cc->mask; 94 u64 nsec; 95 96 /* 97 * Instead of always treating cycle_tstamp as more recent 98 * than tc->cycle_last, detect when it is too far in the 99 * future and treat it as old time stamp instead. 100 */ 101 if (cycle_delta > tc->cc->mask / 2) { 102 cycle_delta = (tc->cycle_last - cycle_tstamp) & tc->cc->mask; 103 nsec = tc->nsec - cyclecounter_cyc2ns(tc->cc, cycle_delta); 104 } else { 105 nsec = cyclecounter_cyc2ns(tc->cc, cycle_delta) + tc->nsec; 106 } 107 108 return nsec; 109 } 110 EXPORT_SYMBOL_GPL(timecounter_cyc2time); 111 112 /** 113 * clocks_calc_mult_shift - calculate mult/shift factors for scaled math of clocks 114 * @mult: pointer to mult variable 115 * @shift: pointer to shift variable 116 * @from: frequency to convert from 117 * @to: frequency to convert to 118 * @maxsec: guaranteed runtime conversion range in seconds 119 * 120 * The function evaluates the shift/mult pair for the scaled math 121 * operations of clocksources and clockevents. 122 * 123 * @to and @from are frequency values in HZ. For clock sources @to is 124 * NSEC_PER_SEC == 1GHz and @from is the counter frequency. For clock 125 * event @to is the counter frequency and @from is NSEC_PER_SEC. 126 * 127 * The @maxsec conversion range argument controls the time frame in 128 * seconds which must be covered by the runtime conversion with the 129 * calculated mult and shift factors. This guarantees that no 64bit 130 * overflow happens when the input value of the conversion is 131 * multiplied with the calculated mult factor. Larger ranges may 132 * reduce the conversion accuracy by chosing smaller mult and shift 133 * factors. 134 */ 135 void 136 clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 maxsec) 137 { 138 u64 tmp; 139 u32 sft, sftacc= 32; 140 141 /* 142 * Calculate the shift factor which is limiting the conversion 143 * range: 144 */ 145 tmp = ((u64)maxsec * from) >> 32; 146 while (tmp) { 147 tmp >>=1; 148 sftacc--; 149 } 150 151 /* 152 * Find the conversion shift/mult pair which has the best 153 * accuracy and fits the maxsec conversion range: 154 */ 155 for (sft = 32; sft > 0; sft--) { 156 tmp = (u64) to << sft; 157 tmp += from / 2; 158 do_div(tmp, from); 159 if ((tmp >> sftacc) == 0) 160 break; 161 } 162 *mult = tmp; 163 *shift = sft; 164 } 165 166 /*[Clocksource internal variables]--------- 167 * curr_clocksource: 168 * currently selected clocksource. 169 * clocksource_list: 170 * linked list with the registered clocksources 171 * clocksource_mutex: 172 * protects manipulations to curr_clocksource and the clocksource_list 173 * override_name: 174 * Name of the user-specified clocksource. 175 */ 176 static struct clocksource *curr_clocksource; 177 static LIST_HEAD(clocksource_list); 178 static DEFINE_MUTEX(clocksource_mutex); 179 static char override_name[CS_NAME_LEN]; 180 static int finished_booting; 181 182 #ifdef CONFIG_CLOCKSOURCE_WATCHDOG 183 static void clocksource_watchdog_work(struct work_struct *work); 184 static void clocksource_select(void); 185 186 static LIST_HEAD(watchdog_list); 187 static struct clocksource *watchdog; 188 static struct timer_list watchdog_timer; 189 static DECLARE_WORK(watchdog_work, clocksource_watchdog_work); 190 static DEFINE_SPINLOCK(watchdog_lock); 191 static int watchdog_running; 192 static atomic_t watchdog_reset_pending; 193 194 static int clocksource_watchdog_kthread(void *data); 195 static void __clocksource_change_rating(struct clocksource *cs, int rating); 196 197 /* 198 * Interval: 0.5sec Threshold: 0.0625s 199 */ 200 #define WATCHDOG_INTERVAL (HZ >> 1) 201 #define WATCHDOG_THRESHOLD (NSEC_PER_SEC >> 4) 202 203 static void clocksource_watchdog_work(struct work_struct *work) 204 { 205 /* 206 * If kthread_run fails the next watchdog scan over the 207 * watchdog_list will find the unstable clock again. 208 */ 209 kthread_run(clocksource_watchdog_kthread, NULL, "kwatchdog"); 210 } 211 212 static void __clocksource_unstable(struct clocksource *cs) 213 { 214 cs->flags &= ~(CLOCK_SOURCE_VALID_FOR_HRES | CLOCK_SOURCE_WATCHDOG); 215 cs->flags |= CLOCK_SOURCE_UNSTABLE; 216 if (finished_booting) 217 schedule_work(&watchdog_work); 218 } 219 220 static void clocksource_unstable(struct clocksource *cs, int64_t delta) 221 { 222 printk(KERN_WARNING "Clocksource %s unstable (delta = %Ld ns)\n", 223 cs->name, delta); 224 __clocksource_unstable(cs); 225 } 226 227 /** 228 * clocksource_mark_unstable - mark clocksource unstable via watchdog 229 * @cs: clocksource to be marked unstable 230 * 231 * This function is called instead of clocksource_change_rating from 232 * cpu hotplug code to avoid a deadlock between the clocksource mutex 233 * and the cpu hotplug mutex. It defers the update of the clocksource 234 * to the watchdog thread. 235 */ 236 void clocksource_mark_unstable(struct clocksource *cs) 237 { 238 unsigned long flags; 239 240 spin_lock_irqsave(&watchdog_lock, flags); 241 if (!(cs->flags & CLOCK_SOURCE_UNSTABLE)) { 242 if (list_empty(&cs->wd_list)) 243 list_add(&cs->wd_list, &watchdog_list); 244 __clocksource_unstable(cs); 245 } 246 spin_unlock_irqrestore(&watchdog_lock, flags); 247 } 248 249 static void clocksource_watchdog(unsigned long data) 250 { 251 struct clocksource *cs; 252 cycle_t csnow, wdnow; 253 int64_t wd_nsec, cs_nsec; 254 int next_cpu, reset_pending; 255 256 spin_lock(&watchdog_lock); 257 if (!watchdog_running) 258 goto out; 259 260 reset_pending = atomic_read(&watchdog_reset_pending); 261 262 list_for_each_entry(cs, &watchdog_list, wd_list) { 263 264 /* Clocksource already marked unstable? */ 265 if (cs->flags & CLOCK_SOURCE_UNSTABLE) { 266 if (finished_booting) 267 schedule_work(&watchdog_work); 268 continue; 269 } 270 271 local_irq_disable(); 272 csnow = cs->read(cs); 273 wdnow = watchdog->read(watchdog); 274 local_irq_enable(); 275 276 /* Clocksource initialized ? */ 277 if (!(cs->flags & CLOCK_SOURCE_WATCHDOG) || 278 atomic_read(&watchdog_reset_pending)) { 279 cs->flags |= CLOCK_SOURCE_WATCHDOG; 280 cs->wd_last = wdnow; 281 cs->cs_last = csnow; 282 continue; 283 } 284 285 wd_nsec = clocksource_cyc2ns((wdnow - cs->wd_last) & watchdog->mask, 286 watchdog->mult, watchdog->shift); 287 288 cs_nsec = clocksource_cyc2ns((csnow - cs->cs_last) & 289 cs->mask, cs->mult, cs->shift); 290 cs->cs_last = csnow; 291 cs->wd_last = wdnow; 292 293 if (atomic_read(&watchdog_reset_pending)) 294 continue; 295 296 /* Check the deviation from the watchdog clocksource. */ 297 if ((abs(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD)) { 298 clocksource_unstable(cs, cs_nsec - wd_nsec); 299 continue; 300 } 301 302 if (!(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) && 303 (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) && 304 (watchdog->flags & CLOCK_SOURCE_IS_CONTINUOUS)) { 305 /* Mark it valid for high-res. */ 306 cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES; 307 308 /* 309 * clocksource_done_booting() will sort it if 310 * finished_booting is not set yet. 311 */ 312 if (!finished_booting) 313 continue; 314 315 /* 316 * If this is not the current clocksource let 317 * the watchdog thread reselect it. Due to the 318 * change to high res this clocksource might 319 * be preferred now. If it is the current 320 * clocksource let the tick code know about 321 * that change. 322 */ 323 if (cs != curr_clocksource) { 324 cs->flags |= CLOCK_SOURCE_RESELECT; 325 schedule_work(&watchdog_work); 326 } else { 327 tick_clock_notify(); 328 } 329 } 330 } 331 332 /* 333 * We only clear the watchdog_reset_pending, when we did a 334 * full cycle through all clocksources. 335 */ 336 if (reset_pending) 337 atomic_dec(&watchdog_reset_pending); 338 339 /* 340 * Cycle through CPUs to check if the CPUs stay synchronized 341 * to each other. 342 */ 343 next_cpu = cpumask_next(raw_smp_processor_id(), cpu_online_mask); 344 if (next_cpu >= nr_cpu_ids) 345 next_cpu = cpumask_first(cpu_online_mask); 346 watchdog_timer.expires += WATCHDOG_INTERVAL; 347 add_timer_on(&watchdog_timer, next_cpu); 348 out: 349 spin_unlock(&watchdog_lock); 350 } 351 352 static inline void clocksource_start_watchdog(void) 353 { 354 if (watchdog_running || !watchdog || list_empty(&watchdog_list)) 355 return; 356 init_timer(&watchdog_timer); 357 watchdog_timer.function = clocksource_watchdog; 358 watchdog_timer.expires = jiffies + WATCHDOG_INTERVAL; 359 add_timer_on(&watchdog_timer, cpumask_first(cpu_online_mask)); 360 watchdog_running = 1; 361 } 362 363 static inline void clocksource_stop_watchdog(void) 364 { 365 if (!watchdog_running || (watchdog && !list_empty(&watchdog_list))) 366 return; 367 del_timer(&watchdog_timer); 368 watchdog_running = 0; 369 } 370 371 static inline void clocksource_reset_watchdog(void) 372 { 373 struct clocksource *cs; 374 375 list_for_each_entry(cs, &watchdog_list, wd_list) 376 cs->flags &= ~CLOCK_SOURCE_WATCHDOG; 377 } 378 379 static void clocksource_resume_watchdog(void) 380 { 381 atomic_inc(&watchdog_reset_pending); 382 } 383 384 static void clocksource_enqueue_watchdog(struct clocksource *cs) 385 { 386 unsigned long flags; 387 388 spin_lock_irqsave(&watchdog_lock, flags); 389 if (cs->flags & CLOCK_SOURCE_MUST_VERIFY) { 390 /* cs is a clocksource to be watched. */ 391 list_add(&cs->wd_list, &watchdog_list); 392 cs->flags &= ~CLOCK_SOURCE_WATCHDOG; 393 } else { 394 /* cs is a watchdog. */ 395 if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) 396 cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES; 397 /* Pick the best watchdog. */ 398 if (!watchdog || cs->rating > watchdog->rating) { 399 watchdog = cs; 400 /* Reset watchdog cycles */ 401 clocksource_reset_watchdog(); 402 } 403 } 404 /* Check if the watchdog timer needs to be started. */ 405 clocksource_start_watchdog(); 406 spin_unlock_irqrestore(&watchdog_lock, flags); 407 } 408 409 static void clocksource_dequeue_watchdog(struct clocksource *cs) 410 { 411 unsigned long flags; 412 413 spin_lock_irqsave(&watchdog_lock, flags); 414 if (cs != watchdog) { 415 if (cs->flags & CLOCK_SOURCE_MUST_VERIFY) { 416 /* cs is a watched clocksource. */ 417 list_del_init(&cs->wd_list); 418 /* Check if the watchdog timer needs to be stopped. */ 419 clocksource_stop_watchdog(); 420 } 421 } 422 spin_unlock_irqrestore(&watchdog_lock, flags); 423 } 424 425 static int __clocksource_watchdog_kthread(void) 426 { 427 struct clocksource *cs, *tmp; 428 unsigned long flags; 429 LIST_HEAD(unstable); 430 int select = 0; 431 432 spin_lock_irqsave(&watchdog_lock, flags); 433 list_for_each_entry_safe(cs, tmp, &watchdog_list, wd_list) { 434 if (cs->flags & CLOCK_SOURCE_UNSTABLE) { 435 list_del_init(&cs->wd_list); 436 list_add(&cs->wd_list, &unstable); 437 select = 1; 438 } 439 if (cs->flags & CLOCK_SOURCE_RESELECT) { 440 cs->flags &= ~CLOCK_SOURCE_RESELECT; 441 select = 1; 442 } 443 } 444 /* Check if the watchdog timer needs to be stopped. */ 445 clocksource_stop_watchdog(); 446 spin_unlock_irqrestore(&watchdog_lock, flags); 447 448 /* Needs to be done outside of watchdog lock */ 449 list_for_each_entry_safe(cs, tmp, &unstable, wd_list) { 450 list_del_init(&cs->wd_list); 451 __clocksource_change_rating(cs, 0); 452 } 453 return select; 454 } 455 456 static int clocksource_watchdog_kthread(void *data) 457 { 458 mutex_lock(&clocksource_mutex); 459 if (__clocksource_watchdog_kthread()) 460 clocksource_select(); 461 mutex_unlock(&clocksource_mutex); 462 return 0; 463 } 464 465 static bool clocksource_is_watchdog(struct clocksource *cs) 466 { 467 return cs == watchdog; 468 } 469 470 #else /* CONFIG_CLOCKSOURCE_WATCHDOG */ 471 472 static void clocksource_enqueue_watchdog(struct clocksource *cs) 473 { 474 if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) 475 cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES; 476 } 477 478 static inline void clocksource_dequeue_watchdog(struct clocksource *cs) { } 479 static inline void clocksource_resume_watchdog(void) { } 480 static inline int __clocksource_watchdog_kthread(void) { return 0; } 481 static bool clocksource_is_watchdog(struct clocksource *cs) { return false; } 482 483 #endif /* CONFIG_CLOCKSOURCE_WATCHDOG */ 484 485 /** 486 * clocksource_suspend - suspend the clocksource(s) 487 */ 488 void clocksource_suspend(void) 489 { 490 struct clocksource *cs; 491 492 list_for_each_entry_reverse(cs, &clocksource_list, list) 493 if (cs->suspend) 494 cs->suspend(cs); 495 } 496 497 /** 498 * clocksource_resume - resume the clocksource(s) 499 */ 500 void clocksource_resume(void) 501 { 502 struct clocksource *cs; 503 504 list_for_each_entry(cs, &clocksource_list, list) 505 if (cs->resume) 506 cs->resume(cs); 507 508 clocksource_resume_watchdog(); 509 } 510 511 /** 512 * clocksource_touch_watchdog - Update watchdog 513 * 514 * Update the watchdog after exception contexts such as kgdb so as not 515 * to incorrectly trip the watchdog. This might fail when the kernel 516 * was stopped in code which holds watchdog_lock. 517 */ 518 void clocksource_touch_watchdog(void) 519 { 520 clocksource_resume_watchdog(); 521 } 522 523 /** 524 * clocksource_max_adjustment- Returns max adjustment amount 525 * @cs: Pointer to clocksource 526 * 527 */ 528 static u32 clocksource_max_adjustment(struct clocksource *cs) 529 { 530 u64 ret; 531 /* 532 * We won't try to correct for more than 11% adjustments (110,000 ppm), 533 */ 534 ret = (u64)cs->mult * 11; 535 do_div(ret,100); 536 return (u32)ret; 537 } 538 539 /** 540 * clocksource_max_deferment - Returns max time the clocksource can be deferred 541 * @cs: Pointer to clocksource 542 * 543 */ 544 static u64 clocksource_max_deferment(struct clocksource *cs) 545 { 546 u64 max_nsecs, max_cycles; 547 548 /* 549 * Calculate the maximum number of cycles that we can pass to the 550 * cyc2ns function without overflowing a 64-bit signed result. The 551 * maximum number of cycles is equal to ULLONG_MAX/(cs->mult+cs->maxadj) 552 * which is equivalent to the below. 553 * max_cycles < (2^63)/(cs->mult + cs->maxadj) 554 * max_cycles < 2^(log2((2^63)/(cs->mult + cs->maxadj))) 555 * max_cycles < 2^(log2(2^63) - log2(cs->mult + cs->maxadj)) 556 * max_cycles < 2^(63 - log2(cs->mult + cs->maxadj)) 557 * max_cycles < 1 << (63 - log2(cs->mult + cs->maxadj)) 558 * Please note that we add 1 to the result of the log2 to account for 559 * any rounding errors, ensure the above inequality is satisfied and 560 * no overflow will occur. 561 */ 562 max_cycles = 1ULL << (63 - (ilog2(cs->mult + cs->maxadj) + 1)); 563 564 /* 565 * The actual maximum number of cycles we can defer the clocksource is 566 * determined by the minimum of max_cycles and cs->mask. 567 * Note: Here we subtract the maxadj to make sure we don't sleep for 568 * too long if there's a large negative adjustment. 569 */ 570 max_cycles = min_t(u64, max_cycles, (u64) cs->mask); 571 max_nsecs = clocksource_cyc2ns(max_cycles, cs->mult - cs->maxadj, 572 cs->shift); 573 574 /* 575 * To ensure that the clocksource does not wrap whilst we are idle, 576 * limit the time the clocksource can be deferred by 12.5%. Please 577 * note a margin of 12.5% is used because this can be computed with 578 * a shift, versus say 10% which would require division. 579 */ 580 return max_nsecs - (max_nsecs >> 3); 581 } 582 583 #ifndef CONFIG_ARCH_USES_GETTIMEOFFSET 584 585 static struct clocksource *clocksource_find_best(bool oneshot, bool skipcur) 586 { 587 struct clocksource *cs; 588 589 if (!finished_booting || list_empty(&clocksource_list)) 590 return NULL; 591 592 /* 593 * We pick the clocksource with the highest rating. If oneshot 594 * mode is active, we pick the highres valid clocksource with 595 * the best rating. 596 */ 597 list_for_each_entry(cs, &clocksource_list, list) { 598 if (skipcur && cs == curr_clocksource) 599 continue; 600 if (oneshot && !(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES)) 601 continue; 602 return cs; 603 } 604 return NULL; 605 } 606 607 static void __clocksource_select(bool skipcur) 608 { 609 bool oneshot = tick_oneshot_mode_active(); 610 struct clocksource *best, *cs; 611 612 /* Find the best suitable clocksource */ 613 best = clocksource_find_best(oneshot, skipcur); 614 if (!best) 615 return; 616 617 /* Check for the override clocksource. */ 618 list_for_each_entry(cs, &clocksource_list, list) { 619 if (skipcur && cs == curr_clocksource) 620 continue; 621 if (strcmp(cs->name, override_name) != 0) 622 continue; 623 /* 624 * Check to make sure we don't switch to a non-highres 625 * capable clocksource if the tick code is in oneshot 626 * mode (highres or nohz) 627 */ 628 if (!(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) && oneshot) { 629 /* Override clocksource cannot be used. */ 630 printk(KERN_WARNING "Override clocksource %s is not " 631 "HRT compatible. Cannot switch while in " 632 "HRT/NOHZ mode\n", cs->name); 633 override_name[0] = 0; 634 } else 635 /* Override clocksource can be used. */ 636 best = cs; 637 break; 638 } 639 640 if (curr_clocksource != best && !timekeeping_notify(best)) { 641 pr_info("Switched to clocksource %s\n", best->name); 642 curr_clocksource = best; 643 } 644 } 645 646 /** 647 * clocksource_select - Select the best clocksource available 648 * 649 * Private function. Must hold clocksource_mutex when called. 650 * 651 * Select the clocksource with the best rating, or the clocksource, 652 * which is selected by userspace override. 653 */ 654 static void clocksource_select(void) 655 { 656 return __clocksource_select(false); 657 } 658 659 static void clocksource_select_fallback(void) 660 { 661 return __clocksource_select(true); 662 } 663 664 #else /* !CONFIG_ARCH_USES_GETTIMEOFFSET */ 665 666 static inline void clocksource_select(void) { } 667 static inline void clocksource_select_fallback(void) { } 668 669 #endif 670 671 /* 672 * clocksource_done_booting - Called near the end of core bootup 673 * 674 * Hack to avoid lots of clocksource churn at boot time. 675 * We use fs_initcall because we want this to start before 676 * device_initcall but after subsys_initcall. 677 */ 678 static int __init clocksource_done_booting(void) 679 { 680 mutex_lock(&clocksource_mutex); 681 curr_clocksource = clocksource_default_clock(); 682 finished_booting = 1; 683 /* 684 * Run the watchdog first to eliminate unstable clock sources 685 */ 686 __clocksource_watchdog_kthread(); 687 clocksource_select(); 688 mutex_unlock(&clocksource_mutex); 689 return 0; 690 } 691 fs_initcall(clocksource_done_booting); 692 693 /* 694 * Enqueue the clocksource sorted by rating 695 */ 696 static void clocksource_enqueue(struct clocksource *cs) 697 { 698 struct list_head *entry = &clocksource_list; 699 struct clocksource *tmp; 700 701 list_for_each_entry(tmp, &clocksource_list, list) 702 /* Keep track of the place, where to insert */ 703 if (tmp->rating >= cs->rating) 704 entry = &tmp->list; 705 list_add(&cs->list, entry); 706 } 707 708 /** 709 * __clocksource_updatefreq_scale - Used update clocksource with new freq 710 * @cs: clocksource to be registered 711 * @scale: Scale factor multiplied against freq to get clocksource hz 712 * @freq: clocksource frequency (cycles per second) divided by scale 713 * 714 * This should only be called from the clocksource->enable() method. 715 * 716 * This *SHOULD NOT* be called directly! Please use the 717 * clocksource_updatefreq_hz() or clocksource_updatefreq_khz helper functions. 718 */ 719 void __clocksource_updatefreq_scale(struct clocksource *cs, u32 scale, u32 freq) 720 { 721 u64 sec; 722 /* 723 * Calc the maximum number of seconds which we can run before 724 * wrapping around. For clocksources which have a mask > 32bit 725 * we need to limit the max sleep time to have a good 726 * conversion precision. 10 minutes is still a reasonable 727 * amount. That results in a shift value of 24 for a 728 * clocksource with mask >= 40bit and f >= 4GHz. That maps to 729 * ~ 0.06ppm granularity for NTP. We apply the same 12.5% 730 * margin as we do in clocksource_max_deferment() 731 */ 732 sec = (cs->mask - (cs->mask >> 3)); 733 do_div(sec, freq); 734 do_div(sec, scale); 735 if (!sec) 736 sec = 1; 737 else if (sec > 600 && cs->mask > UINT_MAX) 738 sec = 600; 739 740 clocks_calc_mult_shift(&cs->mult, &cs->shift, freq, 741 NSEC_PER_SEC / scale, sec * scale); 742 743 /* 744 * for clocksources that have large mults, to avoid overflow. 745 * Since mult may be adjusted by ntp, add an safety extra margin 746 * 747 */ 748 cs->maxadj = clocksource_max_adjustment(cs); 749 while ((cs->mult + cs->maxadj < cs->mult) 750 || (cs->mult - cs->maxadj > cs->mult)) { 751 cs->mult >>= 1; 752 cs->shift--; 753 cs->maxadj = clocksource_max_adjustment(cs); 754 } 755 756 cs->max_idle_ns = clocksource_max_deferment(cs); 757 } 758 EXPORT_SYMBOL_GPL(__clocksource_updatefreq_scale); 759 760 /** 761 * __clocksource_register_scale - Used to install new clocksources 762 * @cs: clocksource to be registered 763 * @scale: Scale factor multiplied against freq to get clocksource hz 764 * @freq: clocksource frequency (cycles per second) divided by scale 765 * 766 * Returns -EBUSY if registration fails, zero otherwise. 767 * 768 * This *SHOULD NOT* be called directly! Please use the 769 * clocksource_register_hz() or clocksource_register_khz helper functions. 770 */ 771 int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq) 772 { 773 774 /* Initialize mult/shift and max_idle_ns */ 775 __clocksource_updatefreq_scale(cs, scale, freq); 776 777 /* Add clocksource to the clcoksource list */ 778 mutex_lock(&clocksource_mutex); 779 clocksource_enqueue(cs); 780 clocksource_enqueue_watchdog(cs); 781 clocksource_select(); 782 mutex_unlock(&clocksource_mutex); 783 return 0; 784 } 785 EXPORT_SYMBOL_GPL(__clocksource_register_scale); 786 787 788 /** 789 * clocksource_register - Used to install new clocksources 790 * @cs: clocksource to be registered 791 * 792 * Returns -EBUSY if registration fails, zero otherwise. 793 */ 794 int clocksource_register(struct clocksource *cs) 795 { 796 /* calculate max adjustment for given mult/shift */ 797 cs->maxadj = clocksource_max_adjustment(cs); 798 WARN_ONCE(cs->mult + cs->maxadj < cs->mult, 799 "Clocksource %s might overflow on 11%% adjustment\n", 800 cs->name); 801 802 /* calculate max idle time permitted for this clocksource */ 803 cs->max_idle_ns = clocksource_max_deferment(cs); 804 805 mutex_lock(&clocksource_mutex); 806 clocksource_enqueue(cs); 807 clocksource_enqueue_watchdog(cs); 808 clocksource_select(); 809 mutex_unlock(&clocksource_mutex); 810 return 0; 811 } 812 EXPORT_SYMBOL(clocksource_register); 813 814 static void __clocksource_change_rating(struct clocksource *cs, int rating) 815 { 816 list_del(&cs->list); 817 cs->rating = rating; 818 clocksource_enqueue(cs); 819 } 820 821 /** 822 * clocksource_change_rating - Change the rating of a registered clocksource 823 * @cs: clocksource to be changed 824 * @rating: new rating 825 */ 826 void clocksource_change_rating(struct clocksource *cs, int rating) 827 { 828 mutex_lock(&clocksource_mutex); 829 __clocksource_change_rating(cs, rating); 830 clocksource_select(); 831 mutex_unlock(&clocksource_mutex); 832 } 833 EXPORT_SYMBOL(clocksource_change_rating); 834 835 /* 836 * Unbind clocksource @cs. Called with clocksource_mutex held 837 */ 838 static int clocksource_unbind(struct clocksource *cs) 839 { 840 /* 841 * I really can't convince myself to support this on hardware 842 * designed by lobotomized monkeys. 843 */ 844 if (clocksource_is_watchdog(cs)) 845 return -EBUSY; 846 847 if (cs == curr_clocksource) { 848 /* Select and try to install a replacement clock source */ 849 clocksource_select_fallback(); 850 if (curr_clocksource == cs) 851 return -EBUSY; 852 } 853 clocksource_dequeue_watchdog(cs); 854 list_del_init(&cs->list); 855 return 0; 856 } 857 858 /** 859 * clocksource_unregister - remove a registered clocksource 860 * @cs: clocksource to be unregistered 861 */ 862 int clocksource_unregister(struct clocksource *cs) 863 { 864 int ret = 0; 865 866 mutex_lock(&clocksource_mutex); 867 if (!list_empty(&cs->list)) 868 ret = clocksource_unbind(cs); 869 mutex_unlock(&clocksource_mutex); 870 return ret; 871 } 872 EXPORT_SYMBOL(clocksource_unregister); 873 874 #ifdef CONFIG_SYSFS 875 /** 876 * sysfs_show_current_clocksources - sysfs interface for current clocksource 877 * @dev: unused 878 * @attr: unused 879 * @buf: char buffer to be filled with clocksource list 880 * 881 * Provides sysfs interface for listing current clocksource. 882 */ 883 static ssize_t 884 sysfs_show_current_clocksources(struct device *dev, 885 struct device_attribute *attr, char *buf) 886 { 887 ssize_t count = 0; 888 889 mutex_lock(&clocksource_mutex); 890 count = snprintf(buf, PAGE_SIZE, "%s\n", curr_clocksource->name); 891 mutex_unlock(&clocksource_mutex); 892 893 return count; 894 } 895 896 size_t sysfs_get_uname(const char *buf, char *dst, size_t cnt) 897 { 898 size_t ret = cnt; 899 900 /* strings from sysfs write are not 0 terminated! */ 901 if (!cnt || cnt >= CS_NAME_LEN) 902 return -EINVAL; 903 904 /* strip of \n: */ 905 if (buf[cnt-1] == '\n') 906 cnt--; 907 if (cnt > 0) 908 memcpy(dst, buf, cnt); 909 dst[cnt] = 0; 910 return ret; 911 } 912 913 /** 914 * sysfs_override_clocksource - interface for manually overriding clocksource 915 * @dev: unused 916 * @attr: unused 917 * @buf: name of override clocksource 918 * @count: length of buffer 919 * 920 * Takes input from sysfs interface for manually overriding the default 921 * clocksource selection. 922 */ 923 static ssize_t sysfs_override_clocksource(struct device *dev, 924 struct device_attribute *attr, 925 const char *buf, size_t count) 926 { 927 size_t ret; 928 929 mutex_lock(&clocksource_mutex); 930 931 ret = sysfs_get_uname(buf, override_name, count); 932 if (ret >= 0) 933 clocksource_select(); 934 935 mutex_unlock(&clocksource_mutex); 936 937 return ret; 938 } 939 940 /** 941 * sysfs_unbind_current_clocksource - interface for manually unbinding clocksource 942 * @dev: unused 943 * @attr: unused 944 * @buf: unused 945 * @count: length of buffer 946 * 947 * Takes input from sysfs interface for manually unbinding a clocksource. 948 */ 949 static ssize_t sysfs_unbind_clocksource(struct device *dev, 950 struct device_attribute *attr, 951 const char *buf, size_t count) 952 { 953 struct clocksource *cs; 954 char name[CS_NAME_LEN]; 955 size_t ret; 956 957 ret = sysfs_get_uname(buf, name, count); 958 if (ret < 0) 959 return ret; 960 961 ret = -ENODEV; 962 mutex_lock(&clocksource_mutex); 963 list_for_each_entry(cs, &clocksource_list, list) { 964 if (strcmp(cs->name, name)) 965 continue; 966 ret = clocksource_unbind(cs); 967 break; 968 } 969 mutex_unlock(&clocksource_mutex); 970 971 return ret ? ret : count; 972 } 973 974 /** 975 * sysfs_show_available_clocksources - sysfs interface for listing clocksource 976 * @dev: unused 977 * @attr: unused 978 * @buf: char buffer to be filled with clocksource list 979 * 980 * Provides sysfs interface for listing registered clocksources 981 */ 982 static ssize_t 983 sysfs_show_available_clocksources(struct device *dev, 984 struct device_attribute *attr, 985 char *buf) 986 { 987 struct clocksource *src; 988 ssize_t count = 0; 989 990 mutex_lock(&clocksource_mutex); 991 list_for_each_entry(src, &clocksource_list, list) { 992 /* 993 * Don't show non-HRES clocksource if the tick code is 994 * in one shot mode (highres=on or nohz=on) 995 */ 996 if (!tick_oneshot_mode_active() || 997 (src->flags & CLOCK_SOURCE_VALID_FOR_HRES)) 998 count += snprintf(buf + count, 999 max((ssize_t)PAGE_SIZE - count, (ssize_t)0), 1000 "%s ", src->name); 1001 } 1002 mutex_unlock(&clocksource_mutex); 1003 1004 count += snprintf(buf + count, 1005 max((ssize_t)PAGE_SIZE - count, (ssize_t)0), "\n"); 1006 1007 return count; 1008 } 1009 1010 /* 1011 * Sysfs setup bits: 1012 */ 1013 static DEVICE_ATTR(current_clocksource, 0644, sysfs_show_current_clocksources, 1014 sysfs_override_clocksource); 1015 1016 static DEVICE_ATTR(unbind_clocksource, 0200, NULL, sysfs_unbind_clocksource); 1017 1018 static DEVICE_ATTR(available_clocksource, 0444, 1019 sysfs_show_available_clocksources, NULL); 1020 1021 static struct bus_type clocksource_subsys = { 1022 .name = "clocksource", 1023 .dev_name = "clocksource", 1024 }; 1025 1026 static struct device device_clocksource = { 1027 .id = 0, 1028 .bus = &clocksource_subsys, 1029 }; 1030 1031 static int __init init_clocksource_sysfs(void) 1032 { 1033 int error = subsys_system_register(&clocksource_subsys, NULL); 1034 1035 if (!error) 1036 error = device_register(&device_clocksource); 1037 if (!error) 1038 error = device_create_file( 1039 &device_clocksource, 1040 &dev_attr_current_clocksource); 1041 if (!error) 1042 error = device_create_file(&device_clocksource, 1043 &dev_attr_unbind_clocksource); 1044 if (!error) 1045 error = device_create_file( 1046 &device_clocksource, 1047 &dev_attr_available_clocksource); 1048 return error; 1049 } 1050 1051 device_initcall(init_clocksource_sysfs); 1052 #endif /* CONFIG_SYSFS */ 1053 1054 /** 1055 * boot_override_clocksource - boot clock override 1056 * @str: override name 1057 * 1058 * Takes a clocksource= boot argument and uses it 1059 * as the clocksource override name. 1060 */ 1061 static int __init boot_override_clocksource(char* str) 1062 { 1063 mutex_lock(&clocksource_mutex); 1064 if (str) 1065 strlcpy(override_name, str, sizeof(override_name)); 1066 mutex_unlock(&clocksource_mutex); 1067 return 1; 1068 } 1069 1070 __setup("clocksource=", boot_override_clocksource); 1071 1072 /** 1073 * boot_override_clock - Compatibility layer for deprecated boot option 1074 * @str: override name 1075 * 1076 * DEPRECATED! Takes a clock= boot argument and uses it 1077 * as the clocksource override name 1078 */ 1079 static int __init boot_override_clock(char* str) 1080 { 1081 if (!strcmp(str, "pmtmr")) { 1082 printk("Warning: clock=pmtmr is deprecated. " 1083 "Use clocksource=acpi_pm.\n"); 1084 return boot_override_clocksource("acpi_pm"); 1085 } 1086 printk("Warning! clock= boot option is deprecated. " 1087 "Use clocksource=xyz\n"); 1088 return boot_override_clocksource(str); 1089 } 1090 1091 __setup("clock=", boot_override_clock); 1092