xref: /linux/kernel/time/clocksource.c (revision ff5599816711d2e67da2d7561fd36ac48debd433)
1 /*
2  * linux/kernel/time/clocksource.c
3  *
4  * This file contains the functions which manage clocksource drivers.
5  *
6  * Copyright (C) 2004, 2005 IBM, John Stultz (johnstul@us.ibm.com)
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License as published by
10  * the Free Software Foundation; either version 2 of the License, or
11  * (at your option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with this program; if not, write to the Free Software
20  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21  *
22  * TODO WishList:
23  *   o Allow clocksource drivers to be unregistered
24  */
25 
26 #include <linux/device.h>
27 #include <linux/clocksource.h>
28 #include <linux/init.h>
29 #include <linux/module.h>
30 #include <linux/sched.h> /* for spin_unlock_irq() using preempt_count() m68k */
31 #include <linux/tick.h>
32 #include <linux/kthread.h>
33 
34 #include "tick-internal.h"
35 
36 void timecounter_init(struct timecounter *tc,
37 		      const struct cyclecounter *cc,
38 		      u64 start_tstamp)
39 {
40 	tc->cc = cc;
41 	tc->cycle_last = cc->read(cc);
42 	tc->nsec = start_tstamp;
43 }
44 EXPORT_SYMBOL_GPL(timecounter_init);
45 
46 /**
47  * timecounter_read_delta - get nanoseconds since last call of this function
48  * @tc:         Pointer to time counter
49  *
50  * When the underlying cycle counter runs over, this will be handled
51  * correctly as long as it does not run over more than once between
52  * calls.
53  *
54  * The first call to this function for a new time counter initializes
55  * the time tracking and returns an undefined result.
56  */
57 static u64 timecounter_read_delta(struct timecounter *tc)
58 {
59 	cycle_t cycle_now, cycle_delta;
60 	u64 ns_offset;
61 
62 	/* read cycle counter: */
63 	cycle_now = tc->cc->read(tc->cc);
64 
65 	/* calculate the delta since the last timecounter_read_delta(): */
66 	cycle_delta = (cycle_now - tc->cycle_last) & tc->cc->mask;
67 
68 	/* convert to nanoseconds: */
69 	ns_offset = cyclecounter_cyc2ns(tc->cc, cycle_delta);
70 
71 	/* update time stamp of timecounter_read_delta() call: */
72 	tc->cycle_last = cycle_now;
73 
74 	return ns_offset;
75 }
76 
77 u64 timecounter_read(struct timecounter *tc)
78 {
79 	u64 nsec;
80 
81 	/* increment time by nanoseconds since last call */
82 	nsec = timecounter_read_delta(tc);
83 	nsec += tc->nsec;
84 	tc->nsec = nsec;
85 
86 	return nsec;
87 }
88 EXPORT_SYMBOL_GPL(timecounter_read);
89 
90 u64 timecounter_cyc2time(struct timecounter *tc,
91 			 cycle_t cycle_tstamp)
92 {
93 	u64 cycle_delta = (cycle_tstamp - tc->cycle_last) & tc->cc->mask;
94 	u64 nsec;
95 
96 	/*
97 	 * Instead of always treating cycle_tstamp as more recent
98 	 * than tc->cycle_last, detect when it is too far in the
99 	 * future and treat it as old time stamp instead.
100 	 */
101 	if (cycle_delta > tc->cc->mask / 2) {
102 		cycle_delta = (tc->cycle_last - cycle_tstamp) & tc->cc->mask;
103 		nsec = tc->nsec - cyclecounter_cyc2ns(tc->cc, cycle_delta);
104 	} else {
105 		nsec = cyclecounter_cyc2ns(tc->cc, cycle_delta) + tc->nsec;
106 	}
107 
108 	return nsec;
109 }
110 EXPORT_SYMBOL_GPL(timecounter_cyc2time);
111 
112 /**
113  * clocks_calc_mult_shift - calculate mult/shift factors for scaled math of clocks
114  * @mult:	pointer to mult variable
115  * @shift:	pointer to shift variable
116  * @from:	frequency to convert from
117  * @to:		frequency to convert to
118  * @maxsec:	guaranteed runtime conversion range in seconds
119  *
120  * The function evaluates the shift/mult pair for the scaled math
121  * operations of clocksources and clockevents.
122  *
123  * @to and @from are frequency values in HZ. For clock sources @to is
124  * NSEC_PER_SEC == 1GHz and @from is the counter frequency. For clock
125  * event @to is the counter frequency and @from is NSEC_PER_SEC.
126  *
127  * The @maxsec conversion range argument controls the time frame in
128  * seconds which must be covered by the runtime conversion with the
129  * calculated mult and shift factors. This guarantees that no 64bit
130  * overflow happens when the input value of the conversion is
131  * multiplied with the calculated mult factor. Larger ranges may
132  * reduce the conversion accuracy by chosing smaller mult and shift
133  * factors.
134  */
135 void
136 clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 maxsec)
137 {
138 	u64 tmp;
139 	u32 sft, sftacc= 32;
140 
141 	/*
142 	 * Calculate the shift factor which is limiting the conversion
143 	 * range:
144 	 */
145 	tmp = ((u64)maxsec * from) >> 32;
146 	while (tmp) {
147 		tmp >>=1;
148 		sftacc--;
149 	}
150 
151 	/*
152 	 * Find the conversion shift/mult pair which has the best
153 	 * accuracy and fits the maxsec conversion range:
154 	 */
155 	for (sft = 32; sft > 0; sft--) {
156 		tmp = (u64) to << sft;
157 		tmp += from / 2;
158 		do_div(tmp, from);
159 		if ((tmp >> sftacc) == 0)
160 			break;
161 	}
162 	*mult = tmp;
163 	*shift = sft;
164 }
165 
166 /*[Clocksource internal variables]---------
167  * curr_clocksource:
168  *	currently selected clocksource.
169  * clocksource_list:
170  *	linked list with the registered clocksources
171  * clocksource_mutex:
172  *	protects manipulations to curr_clocksource and the clocksource_list
173  * override_name:
174  *	Name of the user-specified clocksource.
175  */
176 static struct clocksource *curr_clocksource;
177 static LIST_HEAD(clocksource_list);
178 static DEFINE_MUTEX(clocksource_mutex);
179 static char override_name[CS_NAME_LEN];
180 static int finished_booting;
181 
182 #ifdef CONFIG_CLOCKSOURCE_WATCHDOG
183 static void clocksource_watchdog_work(struct work_struct *work);
184 static void clocksource_select(void);
185 
186 static LIST_HEAD(watchdog_list);
187 static struct clocksource *watchdog;
188 static struct timer_list watchdog_timer;
189 static DECLARE_WORK(watchdog_work, clocksource_watchdog_work);
190 static DEFINE_SPINLOCK(watchdog_lock);
191 static int watchdog_running;
192 static atomic_t watchdog_reset_pending;
193 
194 static int clocksource_watchdog_kthread(void *data);
195 static void __clocksource_change_rating(struct clocksource *cs, int rating);
196 
197 /*
198  * Interval: 0.5sec Threshold: 0.0625s
199  */
200 #define WATCHDOG_INTERVAL (HZ >> 1)
201 #define WATCHDOG_THRESHOLD (NSEC_PER_SEC >> 4)
202 
203 static void clocksource_watchdog_work(struct work_struct *work)
204 {
205 	/*
206 	 * If kthread_run fails the next watchdog scan over the
207 	 * watchdog_list will find the unstable clock again.
208 	 */
209 	kthread_run(clocksource_watchdog_kthread, NULL, "kwatchdog");
210 }
211 
212 static void __clocksource_unstable(struct clocksource *cs)
213 {
214 	cs->flags &= ~(CLOCK_SOURCE_VALID_FOR_HRES | CLOCK_SOURCE_WATCHDOG);
215 	cs->flags |= CLOCK_SOURCE_UNSTABLE;
216 	if (finished_booting)
217 		schedule_work(&watchdog_work);
218 }
219 
220 static void clocksource_unstable(struct clocksource *cs, int64_t delta)
221 {
222 	printk(KERN_WARNING "Clocksource %s unstable (delta = %Ld ns)\n",
223 	       cs->name, delta);
224 	__clocksource_unstable(cs);
225 }
226 
227 /**
228  * clocksource_mark_unstable - mark clocksource unstable via watchdog
229  * @cs:		clocksource to be marked unstable
230  *
231  * This function is called instead of clocksource_change_rating from
232  * cpu hotplug code to avoid a deadlock between the clocksource mutex
233  * and the cpu hotplug mutex. It defers the update of the clocksource
234  * to the watchdog thread.
235  */
236 void clocksource_mark_unstable(struct clocksource *cs)
237 {
238 	unsigned long flags;
239 
240 	spin_lock_irqsave(&watchdog_lock, flags);
241 	if (!(cs->flags & CLOCK_SOURCE_UNSTABLE)) {
242 		if (list_empty(&cs->wd_list))
243 			list_add(&cs->wd_list, &watchdog_list);
244 		__clocksource_unstable(cs);
245 	}
246 	spin_unlock_irqrestore(&watchdog_lock, flags);
247 }
248 
249 static void clocksource_watchdog(unsigned long data)
250 {
251 	struct clocksource *cs;
252 	cycle_t csnow, wdnow;
253 	int64_t wd_nsec, cs_nsec;
254 	int next_cpu, reset_pending;
255 
256 	spin_lock(&watchdog_lock);
257 	if (!watchdog_running)
258 		goto out;
259 
260 	reset_pending = atomic_read(&watchdog_reset_pending);
261 
262 	list_for_each_entry(cs, &watchdog_list, wd_list) {
263 
264 		/* Clocksource already marked unstable? */
265 		if (cs->flags & CLOCK_SOURCE_UNSTABLE) {
266 			if (finished_booting)
267 				schedule_work(&watchdog_work);
268 			continue;
269 		}
270 
271 		local_irq_disable();
272 		csnow = cs->read(cs);
273 		wdnow = watchdog->read(watchdog);
274 		local_irq_enable();
275 
276 		/* Clocksource initialized ? */
277 		if (!(cs->flags & CLOCK_SOURCE_WATCHDOG) ||
278 		    atomic_read(&watchdog_reset_pending)) {
279 			cs->flags |= CLOCK_SOURCE_WATCHDOG;
280 			cs->wd_last = wdnow;
281 			cs->cs_last = csnow;
282 			continue;
283 		}
284 
285 		wd_nsec = clocksource_cyc2ns((wdnow - cs->wd_last) & watchdog->mask,
286 					     watchdog->mult, watchdog->shift);
287 
288 		cs_nsec = clocksource_cyc2ns((csnow - cs->cs_last) &
289 					     cs->mask, cs->mult, cs->shift);
290 		cs->cs_last = csnow;
291 		cs->wd_last = wdnow;
292 
293 		if (atomic_read(&watchdog_reset_pending))
294 			continue;
295 
296 		/* Check the deviation from the watchdog clocksource. */
297 		if ((abs(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD)) {
298 			clocksource_unstable(cs, cs_nsec - wd_nsec);
299 			continue;
300 		}
301 
302 		if (!(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) &&
303 		    (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) &&
304 		    (watchdog->flags & CLOCK_SOURCE_IS_CONTINUOUS)) {
305 			/* Mark it valid for high-res. */
306 			cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES;
307 
308 			/*
309 			 * clocksource_done_booting() will sort it if
310 			 * finished_booting is not set yet.
311 			 */
312 			if (!finished_booting)
313 				continue;
314 
315 			/*
316 			 * If this is not the current clocksource let
317 			 * the watchdog thread reselect it. Due to the
318 			 * change to high res this clocksource might
319 			 * be preferred now. If it is the current
320 			 * clocksource let the tick code know about
321 			 * that change.
322 			 */
323 			if (cs != curr_clocksource) {
324 				cs->flags |= CLOCK_SOURCE_RESELECT;
325 				schedule_work(&watchdog_work);
326 			} else {
327 				tick_clock_notify();
328 			}
329 		}
330 	}
331 
332 	/*
333 	 * We only clear the watchdog_reset_pending, when we did a
334 	 * full cycle through all clocksources.
335 	 */
336 	if (reset_pending)
337 		atomic_dec(&watchdog_reset_pending);
338 
339 	/*
340 	 * Cycle through CPUs to check if the CPUs stay synchronized
341 	 * to each other.
342 	 */
343 	next_cpu = cpumask_next(raw_smp_processor_id(), cpu_online_mask);
344 	if (next_cpu >= nr_cpu_ids)
345 		next_cpu = cpumask_first(cpu_online_mask);
346 	watchdog_timer.expires += WATCHDOG_INTERVAL;
347 	add_timer_on(&watchdog_timer, next_cpu);
348 out:
349 	spin_unlock(&watchdog_lock);
350 }
351 
352 static inline void clocksource_start_watchdog(void)
353 {
354 	if (watchdog_running || !watchdog || list_empty(&watchdog_list))
355 		return;
356 	init_timer(&watchdog_timer);
357 	watchdog_timer.function = clocksource_watchdog;
358 	watchdog_timer.expires = jiffies + WATCHDOG_INTERVAL;
359 	add_timer_on(&watchdog_timer, cpumask_first(cpu_online_mask));
360 	watchdog_running = 1;
361 }
362 
363 static inline void clocksource_stop_watchdog(void)
364 {
365 	if (!watchdog_running || (watchdog && !list_empty(&watchdog_list)))
366 		return;
367 	del_timer(&watchdog_timer);
368 	watchdog_running = 0;
369 }
370 
371 static inline void clocksource_reset_watchdog(void)
372 {
373 	struct clocksource *cs;
374 
375 	list_for_each_entry(cs, &watchdog_list, wd_list)
376 		cs->flags &= ~CLOCK_SOURCE_WATCHDOG;
377 }
378 
379 static void clocksource_resume_watchdog(void)
380 {
381 	atomic_inc(&watchdog_reset_pending);
382 }
383 
384 static void clocksource_enqueue_watchdog(struct clocksource *cs)
385 {
386 	unsigned long flags;
387 
388 	spin_lock_irqsave(&watchdog_lock, flags);
389 	if (cs->flags & CLOCK_SOURCE_MUST_VERIFY) {
390 		/* cs is a clocksource to be watched. */
391 		list_add(&cs->wd_list, &watchdog_list);
392 		cs->flags &= ~CLOCK_SOURCE_WATCHDOG;
393 	} else {
394 		/* cs is a watchdog. */
395 		if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS)
396 			cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES;
397 		/* Pick the best watchdog. */
398 		if (!watchdog || cs->rating > watchdog->rating) {
399 			watchdog = cs;
400 			/* Reset watchdog cycles */
401 			clocksource_reset_watchdog();
402 		}
403 	}
404 	/* Check if the watchdog timer needs to be started. */
405 	clocksource_start_watchdog();
406 	spin_unlock_irqrestore(&watchdog_lock, flags);
407 }
408 
409 static void clocksource_dequeue_watchdog(struct clocksource *cs)
410 {
411 	unsigned long flags;
412 
413 	spin_lock_irqsave(&watchdog_lock, flags);
414 	if (cs != watchdog) {
415 		if (cs->flags & CLOCK_SOURCE_MUST_VERIFY) {
416 			/* cs is a watched clocksource. */
417 			list_del_init(&cs->wd_list);
418 			/* Check if the watchdog timer needs to be stopped. */
419 			clocksource_stop_watchdog();
420 		}
421 	}
422 	spin_unlock_irqrestore(&watchdog_lock, flags);
423 }
424 
425 static int __clocksource_watchdog_kthread(void)
426 {
427 	struct clocksource *cs, *tmp;
428 	unsigned long flags;
429 	LIST_HEAD(unstable);
430 	int select = 0;
431 
432 	spin_lock_irqsave(&watchdog_lock, flags);
433 	list_for_each_entry_safe(cs, tmp, &watchdog_list, wd_list) {
434 		if (cs->flags & CLOCK_SOURCE_UNSTABLE) {
435 			list_del_init(&cs->wd_list);
436 			list_add(&cs->wd_list, &unstable);
437 			select = 1;
438 		}
439 		if (cs->flags & CLOCK_SOURCE_RESELECT) {
440 			cs->flags &= ~CLOCK_SOURCE_RESELECT;
441 			select = 1;
442 		}
443 	}
444 	/* Check if the watchdog timer needs to be stopped. */
445 	clocksource_stop_watchdog();
446 	spin_unlock_irqrestore(&watchdog_lock, flags);
447 
448 	/* Needs to be done outside of watchdog lock */
449 	list_for_each_entry_safe(cs, tmp, &unstable, wd_list) {
450 		list_del_init(&cs->wd_list);
451 		__clocksource_change_rating(cs, 0);
452 	}
453 	return select;
454 }
455 
456 static int clocksource_watchdog_kthread(void *data)
457 {
458 	mutex_lock(&clocksource_mutex);
459 	if (__clocksource_watchdog_kthread())
460 		clocksource_select();
461 	mutex_unlock(&clocksource_mutex);
462 	return 0;
463 }
464 
465 static bool clocksource_is_watchdog(struct clocksource *cs)
466 {
467 	return cs == watchdog;
468 }
469 
470 #else /* CONFIG_CLOCKSOURCE_WATCHDOG */
471 
472 static void clocksource_enqueue_watchdog(struct clocksource *cs)
473 {
474 	if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS)
475 		cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES;
476 }
477 
478 static inline void clocksource_dequeue_watchdog(struct clocksource *cs) { }
479 static inline void clocksource_resume_watchdog(void) { }
480 static inline int __clocksource_watchdog_kthread(void) { return 0; }
481 static bool clocksource_is_watchdog(struct clocksource *cs) { return false; }
482 
483 #endif /* CONFIG_CLOCKSOURCE_WATCHDOG */
484 
485 /**
486  * clocksource_suspend - suspend the clocksource(s)
487  */
488 void clocksource_suspend(void)
489 {
490 	struct clocksource *cs;
491 
492 	list_for_each_entry_reverse(cs, &clocksource_list, list)
493 		if (cs->suspend)
494 			cs->suspend(cs);
495 }
496 
497 /**
498  * clocksource_resume - resume the clocksource(s)
499  */
500 void clocksource_resume(void)
501 {
502 	struct clocksource *cs;
503 
504 	list_for_each_entry(cs, &clocksource_list, list)
505 		if (cs->resume)
506 			cs->resume(cs);
507 
508 	clocksource_resume_watchdog();
509 }
510 
511 /**
512  * clocksource_touch_watchdog - Update watchdog
513  *
514  * Update the watchdog after exception contexts such as kgdb so as not
515  * to incorrectly trip the watchdog. This might fail when the kernel
516  * was stopped in code which holds watchdog_lock.
517  */
518 void clocksource_touch_watchdog(void)
519 {
520 	clocksource_resume_watchdog();
521 }
522 
523 /**
524  * clocksource_max_adjustment- Returns max adjustment amount
525  * @cs:         Pointer to clocksource
526  *
527  */
528 static u32 clocksource_max_adjustment(struct clocksource *cs)
529 {
530 	u64 ret;
531 	/*
532 	 * We won't try to correct for more than 11% adjustments (110,000 ppm),
533 	 */
534 	ret = (u64)cs->mult * 11;
535 	do_div(ret,100);
536 	return (u32)ret;
537 }
538 
539 /**
540  * clocksource_max_deferment - Returns max time the clocksource can be deferred
541  * @cs:         Pointer to clocksource
542  *
543  */
544 static u64 clocksource_max_deferment(struct clocksource *cs)
545 {
546 	u64 max_nsecs, max_cycles;
547 
548 	/*
549 	 * Calculate the maximum number of cycles that we can pass to the
550 	 * cyc2ns function without overflowing a 64-bit signed result. The
551 	 * maximum number of cycles is equal to ULLONG_MAX/(cs->mult+cs->maxadj)
552 	 * which is equivalent to the below.
553 	 * max_cycles < (2^63)/(cs->mult + cs->maxadj)
554 	 * max_cycles < 2^(log2((2^63)/(cs->mult + cs->maxadj)))
555 	 * max_cycles < 2^(log2(2^63) - log2(cs->mult + cs->maxadj))
556 	 * max_cycles < 2^(63 - log2(cs->mult + cs->maxadj))
557 	 * max_cycles < 1 << (63 - log2(cs->mult + cs->maxadj))
558 	 * Please note that we add 1 to the result of the log2 to account for
559 	 * any rounding errors, ensure the above inequality is satisfied and
560 	 * no overflow will occur.
561 	 */
562 	max_cycles = 1ULL << (63 - (ilog2(cs->mult + cs->maxadj) + 1));
563 
564 	/*
565 	 * The actual maximum number of cycles we can defer the clocksource is
566 	 * determined by the minimum of max_cycles and cs->mask.
567 	 * Note: Here we subtract the maxadj to make sure we don't sleep for
568 	 * too long if there's a large negative adjustment.
569 	 */
570 	max_cycles = min_t(u64, max_cycles, (u64) cs->mask);
571 	max_nsecs = clocksource_cyc2ns(max_cycles, cs->mult - cs->maxadj,
572 					cs->shift);
573 
574 	/*
575 	 * To ensure that the clocksource does not wrap whilst we are idle,
576 	 * limit the time the clocksource can be deferred by 12.5%. Please
577 	 * note a margin of 12.5% is used because this can be computed with
578 	 * a shift, versus say 10% which would require division.
579 	 */
580 	return max_nsecs - (max_nsecs >> 3);
581 }
582 
583 #ifndef CONFIG_ARCH_USES_GETTIMEOFFSET
584 
585 static struct clocksource *clocksource_find_best(bool oneshot, bool skipcur)
586 {
587 	struct clocksource *cs;
588 
589 	if (!finished_booting || list_empty(&clocksource_list))
590 		return NULL;
591 
592 	/*
593 	 * We pick the clocksource with the highest rating. If oneshot
594 	 * mode is active, we pick the highres valid clocksource with
595 	 * the best rating.
596 	 */
597 	list_for_each_entry(cs, &clocksource_list, list) {
598 		if (skipcur && cs == curr_clocksource)
599 			continue;
600 		if (oneshot && !(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES))
601 			continue;
602 		return cs;
603 	}
604 	return NULL;
605 }
606 
607 static void __clocksource_select(bool skipcur)
608 {
609 	bool oneshot = tick_oneshot_mode_active();
610 	struct clocksource *best, *cs;
611 
612 	/* Find the best suitable clocksource */
613 	best = clocksource_find_best(oneshot, skipcur);
614 	if (!best)
615 		return;
616 
617 	/* Check for the override clocksource. */
618 	list_for_each_entry(cs, &clocksource_list, list) {
619 		if (skipcur && cs == curr_clocksource)
620 			continue;
621 		if (strcmp(cs->name, override_name) != 0)
622 			continue;
623 		/*
624 		 * Check to make sure we don't switch to a non-highres
625 		 * capable clocksource if the tick code is in oneshot
626 		 * mode (highres or nohz)
627 		 */
628 		if (!(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) && oneshot) {
629 			/* Override clocksource cannot be used. */
630 			printk(KERN_WARNING "Override clocksource %s is not "
631 			       "HRT compatible. Cannot switch while in "
632 			       "HRT/NOHZ mode\n", cs->name);
633 			override_name[0] = 0;
634 		} else
635 			/* Override clocksource can be used. */
636 			best = cs;
637 		break;
638 	}
639 
640 	if (curr_clocksource != best && !timekeeping_notify(best)) {
641 		pr_info("Switched to clocksource %s\n", best->name);
642 		curr_clocksource = best;
643 	}
644 }
645 
646 /**
647  * clocksource_select - Select the best clocksource available
648  *
649  * Private function. Must hold clocksource_mutex when called.
650  *
651  * Select the clocksource with the best rating, or the clocksource,
652  * which is selected by userspace override.
653  */
654 static void clocksource_select(void)
655 {
656 	return __clocksource_select(false);
657 }
658 
659 static void clocksource_select_fallback(void)
660 {
661 	return __clocksource_select(true);
662 }
663 
664 #else /* !CONFIG_ARCH_USES_GETTIMEOFFSET */
665 
666 static inline void clocksource_select(void) { }
667 static inline void clocksource_select_fallback(void) { }
668 
669 #endif
670 
671 /*
672  * clocksource_done_booting - Called near the end of core bootup
673  *
674  * Hack to avoid lots of clocksource churn at boot time.
675  * We use fs_initcall because we want this to start before
676  * device_initcall but after subsys_initcall.
677  */
678 static int __init clocksource_done_booting(void)
679 {
680 	mutex_lock(&clocksource_mutex);
681 	curr_clocksource = clocksource_default_clock();
682 	finished_booting = 1;
683 	/*
684 	 * Run the watchdog first to eliminate unstable clock sources
685 	 */
686 	__clocksource_watchdog_kthread();
687 	clocksource_select();
688 	mutex_unlock(&clocksource_mutex);
689 	return 0;
690 }
691 fs_initcall(clocksource_done_booting);
692 
693 /*
694  * Enqueue the clocksource sorted by rating
695  */
696 static void clocksource_enqueue(struct clocksource *cs)
697 {
698 	struct list_head *entry = &clocksource_list;
699 	struct clocksource *tmp;
700 
701 	list_for_each_entry(tmp, &clocksource_list, list)
702 		/* Keep track of the place, where to insert */
703 		if (tmp->rating >= cs->rating)
704 			entry = &tmp->list;
705 	list_add(&cs->list, entry);
706 }
707 
708 /**
709  * __clocksource_updatefreq_scale - Used update clocksource with new freq
710  * @cs:		clocksource to be registered
711  * @scale:	Scale factor multiplied against freq to get clocksource hz
712  * @freq:	clocksource frequency (cycles per second) divided by scale
713  *
714  * This should only be called from the clocksource->enable() method.
715  *
716  * This *SHOULD NOT* be called directly! Please use the
717  * clocksource_updatefreq_hz() or clocksource_updatefreq_khz helper functions.
718  */
719 void __clocksource_updatefreq_scale(struct clocksource *cs, u32 scale, u32 freq)
720 {
721 	u64 sec;
722 	/*
723 	 * Calc the maximum number of seconds which we can run before
724 	 * wrapping around. For clocksources which have a mask > 32bit
725 	 * we need to limit the max sleep time to have a good
726 	 * conversion precision. 10 minutes is still a reasonable
727 	 * amount. That results in a shift value of 24 for a
728 	 * clocksource with mask >= 40bit and f >= 4GHz. That maps to
729 	 * ~ 0.06ppm granularity for NTP. We apply the same 12.5%
730 	 * margin as we do in clocksource_max_deferment()
731 	 */
732 	sec = (cs->mask - (cs->mask >> 3));
733 	do_div(sec, freq);
734 	do_div(sec, scale);
735 	if (!sec)
736 		sec = 1;
737 	else if (sec > 600 && cs->mask > UINT_MAX)
738 		sec = 600;
739 
740 	clocks_calc_mult_shift(&cs->mult, &cs->shift, freq,
741 			       NSEC_PER_SEC / scale, sec * scale);
742 
743 	/*
744 	 * for clocksources that have large mults, to avoid overflow.
745 	 * Since mult may be adjusted by ntp, add an safety extra margin
746 	 *
747 	 */
748 	cs->maxadj = clocksource_max_adjustment(cs);
749 	while ((cs->mult + cs->maxadj < cs->mult)
750 		|| (cs->mult - cs->maxadj > cs->mult)) {
751 		cs->mult >>= 1;
752 		cs->shift--;
753 		cs->maxadj = clocksource_max_adjustment(cs);
754 	}
755 
756 	cs->max_idle_ns = clocksource_max_deferment(cs);
757 }
758 EXPORT_SYMBOL_GPL(__clocksource_updatefreq_scale);
759 
760 /**
761  * __clocksource_register_scale - Used to install new clocksources
762  * @cs:		clocksource to be registered
763  * @scale:	Scale factor multiplied against freq to get clocksource hz
764  * @freq:	clocksource frequency (cycles per second) divided by scale
765  *
766  * Returns -EBUSY if registration fails, zero otherwise.
767  *
768  * This *SHOULD NOT* be called directly! Please use the
769  * clocksource_register_hz() or clocksource_register_khz helper functions.
770  */
771 int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq)
772 {
773 
774 	/* Initialize mult/shift and max_idle_ns */
775 	__clocksource_updatefreq_scale(cs, scale, freq);
776 
777 	/* Add clocksource to the clcoksource list */
778 	mutex_lock(&clocksource_mutex);
779 	clocksource_enqueue(cs);
780 	clocksource_enqueue_watchdog(cs);
781 	clocksource_select();
782 	mutex_unlock(&clocksource_mutex);
783 	return 0;
784 }
785 EXPORT_SYMBOL_GPL(__clocksource_register_scale);
786 
787 
788 /**
789  * clocksource_register - Used to install new clocksources
790  * @cs:		clocksource to be registered
791  *
792  * Returns -EBUSY if registration fails, zero otherwise.
793  */
794 int clocksource_register(struct clocksource *cs)
795 {
796 	/* calculate max adjustment for given mult/shift */
797 	cs->maxadj = clocksource_max_adjustment(cs);
798 	WARN_ONCE(cs->mult + cs->maxadj < cs->mult,
799 		"Clocksource %s might overflow on 11%% adjustment\n",
800 		cs->name);
801 
802 	/* calculate max idle time permitted for this clocksource */
803 	cs->max_idle_ns = clocksource_max_deferment(cs);
804 
805 	mutex_lock(&clocksource_mutex);
806 	clocksource_enqueue(cs);
807 	clocksource_enqueue_watchdog(cs);
808 	clocksource_select();
809 	mutex_unlock(&clocksource_mutex);
810 	return 0;
811 }
812 EXPORT_SYMBOL(clocksource_register);
813 
814 static void __clocksource_change_rating(struct clocksource *cs, int rating)
815 {
816 	list_del(&cs->list);
817 	cs->rating = rating;
818 	clocksource_enqueue(cs);
819 }
820 
821 /**
822  * clocksource_change_rating - Change the rating of a registered clocksource
823  * @cs:		clocksource to be changed
824  * @rating:	new rating
825  */
826 void clocksource_change_rating(struct clocksource *cs, int rating)
827 {
828 	mutex_lock(&clocksource_mutex);
829 	__clocksource_change_rating(cs, rating);
830 	clocksource_select();
831 	mutex_unlock(&clocksource_mutex);
832 }
833 EXPORT_SYMBOL(clocksource_change_rating);
834 
835 /*
836  * Unbind clocksource @cs. Called with clocksource_mutex held
837  */
838 static int clocksource_unbind(struct clocksource *cs)
839 {
840 	/*
841 	 * I really can't convince myself to support this on hardware
842 	 * designed by lobotomized monkeys.
843 	 */
844 	if (clocksource_is_watchdog(cs))
845 		return -EBUSY;
846 
847 	if (cs == curr_clocksource) {
848 		/* Select and try to install a replacement clock source */
849 		clocksource_select_fallback();
850 		if (curr_clocksource == cs)
851 			return -EBUSY;
852 	}
853 	clocksource_dequeue_watchdog(cs);
854 	list_del_init(&cs->list);
855 	return 0;
856 }
857 
858 /**
859  * clocksource_unregister - remove a registered clocksource
860  * @cs:	clocksource to be unregistered
861  */
862 int clocksource_unregister(struct clocksource *cs)
863 {
864 	int ret = 0;
865 
866 	mutex_lock(&clocksource_mutex);
867 	if (!list_empty(&cs->list))
868 		ret = clocksource_unbind(cs);
869 	mutex_unlock(&clocksource_mutex);
870 	return ret;
871 }
872 EXPORT_SYMBOL(clocksource_unregister);
873 
874 #ifdef CONFIG_SYSFS
875 /**
876  * sysfs_show_current_clocksources - sysfs interface for current clocksource
877  * @dev:	unused
878  * @attr:	unused
879  * @buf:	char buffer to be filled with clocksource list
880  *
881  * Provides sysfs interface for listing current clocksource.
882  */
883 static ssize_t
884 sysfs_show_current_clocksources(struct device *dev,
885 				struct device_attribute *attr, char *buf)
886 {
887 	ssize_t count = 0;
888 
889 	mutex_lock(&clocksource_mutex);
890 	count = snprintf(buf, PAGE_SIZE, "%s\n", curr_clocksource->name);
891 	mutex_unlock(&clocksource_mutex);
892 
893 	return count;
894 }
895 
896 size_t sysfs_get_uname(const char *buf, char *dst, size_t cnt)
897 {
898 	size_t ret = cnt;
899 
900 	/* strings from sysfs write are not 0 terminated! */
901 	if (!cnt || cnt >= CS_NAME_LEN)
902 		return -EINVAL;
903 
904 	/* strip of \n: */
905 	if (buf[cnt-1] == '\n')
906 		cnt--;
907 	if (cnt > 0)
908 		memcpy(dst, buf, cnt);
909 	dst[cnt] = 0;
910 	return ret;
911 }
912 
913 /**
914  * sysfs_override_clocksource - interface for manually overriding clocksource
915  * @dev:	unused
916  * @attr:	unused
917  * @buf:	name of override clocksource
918  * @count:	length of buffer
919  *
920  * Takes input from sysfs interface for manually overriding the default
921  * clocksource selection.
922  */
923 static ssize_t sysfs_override_clocksource(struct device *dev,
924 					  struct device_attribute *attr,
925 					  const char *buf, size_t count)
926 {
927 	size_t ret;
928 
929 	mutex_lock(&clocksource_mutex);
930 
931 	ret = sysfs_get_uname(buf, override_name, count);
932 	if (ret >= 0)
933 		clocksource_select();
934 
935 	mutex_unlock(&clocksource_mutex);
936 
937 	return ret;
938 }
939 
940 /**
941  * sysfs_unbind_current_clocksource - interface for manually unbinding clocksource
942  * @dev:	unused
943  * @attr:	unused
944  * @buf:	unused
945  * @count:	length of buffer
946  *
947  * Takes input from sysfs interface for manually unbinding a clocksource.
948  */
949 static ssize_t sysfs_unbind_clocksource(struct device *dev,
950 					struct device_attribute *attr,
951 					const char *buf, size_t count)
952 {
953 	struct clocksource *cs;
954 	char name[CS_NAME_LEN];
955 	size_t ret;
956 
957 	ret = sysfs_get_uname(buf, name, count);
958 	if (ret < 0)
959 		return ret;
960 
961 	ret = -ENODEV;
962 	mutex_lock(&clocksource_mutex);
963 	list_for_each_entry(cs, &clocksource_list, list) {
964 		if (strcmp(cs->name, name))
965 			continue;
966 		ret = clocksource_unbind(cs);
967 		break;
968 	}
969 	mutex_unlock(&clocksource_mutex);
970 
971 	return ret ? ret : count;
972 }
973 
974 /**
975  * sysfs_show_available_clocksources - sysfs interface for listing clocksource
976  * @dev:	unused
977  * @attr:	unused
978  * @buf:	char buffer to be filled with clocksource list
979  *
980  * Provides sysfs interface for listing registered clocksources
981  */
982 static ssize_t
983 sysfs_show_available_clocksources(struct device *dev,
984 				  struct device_attribute *attr,
985 				  char *buf)
986 {
987 	struct clocksource *src;
988 	ssize_t count = 0;
989 
990 	mutex_lock(&clocksource_mutex);
991 	list_for_each_entry(src, &clocksource_list, list) {
992 		/*
993 		 * Don't show non-HRES clocksource if the tick code is
994 		 * in one shot mode (highres=on or nohz=on)
995 		 */
996 		if (!tick_oneshot_mode_active() ||
997 		    (src->flags & CLOCK_SOURCE_VALID_FOR_HRES))
998 			count += snprintf(buf + count,
999 				  max((ssize_t)PAGE_SIZE - count, (ssize_t)0),
1000 				  "%s ", src->name);
1001 	}
1002 	mutex_unlock(&clocksource_mutex);
1003 
1004 	count += snprintf(buf + count,
1005 			  max((ssize_t)PAGE_SIZE - count, (ssize_t)0), "\n");
1006 
1007 	return count;
1008 }
1009 
1010 /*
1011  * Sysfs setup bits:
1012  */
1013 static DEVICE_ATTR(current_clocksource, 0644, sysfs_show_current_clocksources,
1014 		   sysfs_override_clocksource);
1015 
1016 static DEVICE_ATTR(unbind_clocksource, 0200, NULL, sysfs_unbind_clocksource);
1017 
1018 static DEVICE_ATTR(available_clocksource, 0444,
1019 		   sysfs_show_available_clocksources, NULL);
1020 
1021 static struct bus_type clocksource_subsys = {
1022 	.name = "clocksource",
1023 	.dev_name = "clocksource",
1024 };
1025 
1026 static struct device device_clocksource = {
1027 	.id	= 0,
1028 	.bus	= &clocksource_subsys,
1029 };
1030 
1031 static int __init init_clocksource_sysfs(void)
1032 {
1033 	int error = subsys_system_register(&clocksource_subsys, NULL);
1034 
1035 	if (!error)
1036 		error = device_register(&device_clocksource);
1037 	if (!error)
1038 		error = device_create_file(
1039 				&device_clocksource,
1040 				&dev_attr_current_clocksource);
1041 	if (!error)
1042 		error = device_create_file(&device_clocksource,
1043 					   &dev_attr_unbind_clocksource);
1044 	if (!error)
1045 		error = device_create_file(
1046 				&device_clocksource,
1047 				&dev_attr_available_clocksource);
1048 	return error;
1049 }
1050 
1051 device_initcall(init_clocksource_sysfs);
1052 #endif /* CONFIG_SYSFS */
1053 
1054 /**
1055  * boot_override_clocksource - boot clock override
1056  * @str:	override name
1057  *
1058  * Takes a clocksource= boot argument and uses it
1059  * as the clocksource override name.
1060  */
1061 static int __init boot_override_clocksource(char* str)
1062 {
1063 	mutex_lock(&clocksource_mutex);
1064 	if (str)
1065 		strlcpy(override_name, str, sizeof(override_name));
1066 	mutex_unlock(&clocksource_mutex);
1067 	return 1;
1068 }
1069 
1070 __setup("clocksource=", boot_override_clocksource);
1071 
1072 /**
1073  * boot_override_clock - Compatibility layer for deprecated boot option
1074  * @str:	override name
1075  *
1076  * DEPRECATED! Takes a clock= boot argument and uses it
1077  * as the clocksource override name
1078  */
1079 static int __init boot_override_clock(char* str)
1080 {
1081 	if (!strcmp(str, "pmtmr")) {
1082 		printk("Warning: clock=pmtmr is deprecated. "
1083 			"Use clocksource=acpi_pm.\n");
1084 		return boot_override_clocksource("acpi_pm");
1085 	}
1086 	printk("Warning! clock= boot option is deprecated. "
1087 		"Use clocksource=xyz\n");
1088 	return boot_override_clocksource(str);
1089 }
1090 
1091 __setup("clock=", boot_override_clock);
1092