xref: /linux/arch/um/kernel/time.c (revision 7f71507851fc7764b36a3221839607d3a45c2025)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2015 Anton Ivanov (aivanov@{brocade.com,kot-begemot.co.uk})
4  * Copyright (C) 2015 Thomas Meyer (thomas@m3y3r.de)
5  * Copyright (C) 2012-2014 Cisco Systems
6  * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
7  * Copyright (C) 2019 Intel Corporation
8  */
9 
10 #include <linux/clockchips.h>
11 #include <linux/init.h>
12 #include <linux/interrupt.h>
13 #include <linux/jiffies.h>
14 #include <linux/mm.h>
15 #include <linux/sched.h>
16 #include <linux/spinlock.h>
17 #include <linux/threads.h>
18 #include <asm/irq.h>
19 #include <asm/param.h>
20 #include <kern_util.h>
21 #include <os.h>
22 #include <linux/delay.h>
23 #include <linux/time-internal.h>
24 #include <linux/um_timetravel.h>
25 #include <shared/init.h>
26 
27 #ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
28 #include <linux/sched/clock.h>
29 
30 enum time_travel_mode time_travel_mode;
31 EXPORT_SYMBOL_GPL(time_travel_mode);
32 
33 static bool time_travel_start_set;
34 static unsigned long long time_travel_start;
35 static unsigned long long time_travel_time;
36 static unsigned long long time_travel_shm_offset;
37 static LIST_HEAD(time_travel_events);
38 static LIST_HEAD(time_travel_irqs);
39 static unsigned long long time_travel_timer_interval;
40 static unsigned long long time_travel_next_event;
41 static struct time_travel_event time_travel_timer_event;
42 static int time_travel_ext_fd = -1;
43 static unsigned int time_travel_ext_waiting;
44 static bool time_travel_ext_prev_request_valid;
45 static unsigned long long time_travel_ext_prev_request;
46 static unsigned long long *time_travel_ext_free_until;
47 static unsigned long long _time_travel_ext_free_until;
48 static u16 time_travel_shm_id;
49 static struct um_timetravel_schedshm *time_travel_shm;
50 static union um_timetravel_schedshm_client *time_travel_shm_client;
51 
52 unsigned long tt_extra_sched_jiffies;
53 
54 notrace unsigned long long sched_clock(void)
55 {
56 	return (unsigned long long)(jiffies - INITIAL_JIFFIES +
57 				    tt_extra_sched_jiffies)
58 					* (NSEC_PER_SEC / HZ);
59 }
60 
61 static void time_travel_set_time(unsigned long long ns)
62 {
63 	if (unlikely(ns < time_travel_time))
64 		panic("time-travel: time goes backwards %lld -> %lld\n",
65 		      time_travel_time, ns);
66 	else if (unlikely(ns >= S64_MAX))
67 		panic("The system was going to sleep forever, aborting");
68 
69 	time_travel_time = ns;
70 }
71 
72 enum time_travel_message_handling {
73 	TTMH_IDLE,
74 	TTMH_POLL,
75 	TTMH_READ,
76 	TTMH_READ_START_ACK,
77 };
78 
79 static u64 bc_message;
80 int time_travel_should_print_bc_msg;
81 
82 void _time_travel_print_bc_msg(void)
83 {
84 	time_travel_should_print_bc_msg = 0;
85 	printk(KERN_INFO "time-travel: received broadcast 0x%llx\n", bc_message);
86 }
87 
88 static void time_travel_setup_shm(int fd, u16 id)
89 {
90 	u32 len;
91 
92 	time_travel_shm = os_mmap_rw_shared(fd, sizeof(*time_travel_shm));
93 
94 	if (!time_travel_shm)
95 		goto out;
96 
97 	len = time_travel_shm->len;
98 
99 	if (time_travel_shm->version != UM_TIMETRAVEL_SCHEDSHM_VERSION ||
100 	    len < struct_size(time_travel_shm, clients, id + 1)) {
101 		os_unmap_memory(time_travel_shm, sizeof(*time_travel_shm));
102 		time_travel_shm = NULL;
103 		goto out;
104 	}
105 
106 	time_travel_shm = os_mremap_rw_shared(time_travel_shm,
107 					      sizeof(*time_travel_shm),
108 					      len);
109 	if (!time_travel_shm)
110 		goto out;
111 
112 	time_travel_shm_offset = time_travel_shm->current_time;
113 	time_travel_shm_client = &time_travel_shm->clients[id];
114 	time_travel_shm_client->capa |= UM_TIMETRAVEL_SCHEDSHM_CAP_TIME_SHARE;
115 	time_travel_shm_id = id;
116 	/* always look at that free_until from now on */
117 	time_travel_ext_free_until = &time_travel_shm->free_until;
118 out:
119 	os_close_file(fd);
120 }
121 
122 static void time_travel_handle_message(struct um_timetravel_msg *msg,
123 				       enum time_travel_message_handling mode)
124 {
125 	struct um_timetravel_msg resp = {
126 		.op = UM_TIMETRAVEL_ACK,
127 	};
128 	int ret;
129 
130 	/*
131 	 * We can't unlock here, but interrupt signals with a timetravel_handler
132 	 * (see um_request_irq_tt) get to the timetravel_handler anyway.
133 	 */
134 	if (mode != TTMH_READ) {
135 		BUG_ON(mode == TTMH_IDLE && !irqs_disabled());
136 
137 		while (os_poll(1, &time_travel_ext_fd) != 0) {
138 			/* nothing */
139 		}
140 	}
141 
142 	if (unlikely(mode == TTMH_READ_START_ACK)) {
143 		int fd[UM_TIMETRAVEL_SHARED_MAX_FDS];
144 
145 		ret = os_rcv_fd_msg(time_travel_ext_fd, fd,
146 				    ARRAY_SIZE(fd), msg, sizeof(*msg));
147 		if (ret == sizeof(*msg)) {
148 			time_travel_setup_shm(fd[UM_TIMETRAVEL_SHARED_MEMFD],
149 					      msg->time & UM_TIMETRAVEL_START_ACK_ID);
150 			/* we don't use the logging for now */
151 			os_close_file(fd[UM_TIMETRAVEL_SHARED_LOGFD]);
152 		}
153 	} else {
154 		ret = os_read_file(time_travel_ext_fd, msg, sizeof(*msg));
155 	}
156 
157 	if (ret == 0)
158 		panic("time-travel external link is broken\n");
159 	if (ret != sizeof(*msg))
160 		panic("invalid time-travel message - %d bytes\n", ret);
161 
162 	switch (msg->op) {
163 	default:
164 		WARN_ONCE(1, "time-travel: unexpected message %lld\n",
165 			  (unsigned long long)msg->op);
166 		break;
167 	case UM_TIMETRAVEL_ACK:
168 		return;
169 	case UM_TIMETRAVEL_RUN:
170 		time_travel_set_time(msg->time);
171 		if (time_travel_shm) {
172 			/* no request right now since we're running */
173 			time_travel_shm_client->flags &=
174 				~UM_TIMETRAVEL_SCHEDSHM_FLAGS_REQ_RUN;
175 			/* no ack for shared memory RUN */
176 			return;
177 		}
178 		break;
179 	case UM_TIMETRAVEL_FREE_UNTIL:
180 		/* not supposed to get this with shm, but ignore it */
181 		if (time_travel_shm)
182 			break;
183 		time_travel_ext_free_until = &_time_travel_ext_free_until;
184 		_time_travel_ext_free_until = msg->time;
185 		break;
186 	case UM_TIMETRAVEL_BROADCAST:
187 		bc_message = msg->time;
188 		time_travel_should_print_bc_msg = 1;
189 		break;
190 	}
191 
192 	resp.seq = msg->seq;
193 	os_write_file(time_travel_ext_fd, &resp, sizeof(resp));
194 }
195 
196 static u64 time_travel_ext_req(u32 op, u64 time)
197 {
198 	static int seq;
199 	int mseq = ++seq;
200 	struct um_timetravel_msg msg = {
201 		.op = op,
202 		.time = time,
203 		.seq = mseq,
204 	};
205 
206 	/*
207 	 * We need to block even the timetravel handlers of SIGIO here and
208 	 * only restore their use when we got the ACK - otherwise we may
209 	 * (will) get interrupted by that, try to queue the IRQ for future
210 	 * processing and thus send another request while we're still waiting
211 	 * for an ACK, but the peer doesn't know we got interrupted and will
212 	 * send the ACKs in the same order as the message, but we'd need to
213 	 * see them in the opposite order ...
214 	 *
215 	 * This wouldn't matter *too* much, but some ACKs carry the
216 	 * current time (for UM_TIMETRAVEL_GET) and getting another
217 	 * ACK without a time would confuse us a lot!
218 	 *
219 	 * The sequence number assignment that happens here lets us
220 	 * debug such message handling issues more easily.
221 	 */
222 	block_signals_hard();
223 	os_write_file(time_travel_ext_fd, &msg, sizeof(msg));
224 
225 	/* no ACK expected for WAIT in shared memory mode */
226 	if (msg.op == UM_TIMETRAVEL_WAIT && time_travel_shm)
227 		goto done;
228 
229 	while (msg.op != UM_TIMETRAVEL_ACK)
230 		time_travel_handle_message(&msg,
231 					   op == UM_TIMETRAVEL_START ?
232 						TTMH_READ_START_ACK :
233 						TTMH_READ);
234 
235 	if (msg.seq != mseq)
236 		panic("time-travel: ACK message has different seqno! op=%d, seq=%d != %d time=%lld\n",
237 		      msg.op, msg.seq, mseq, msg.time);
238 
239 	if (op == UM_TIMETRAVEL_GET)
240 		time_travel_set_time(msg.time);
241 done:
242 	unblock_signals_hard();
243 
244 	return msg.time;
245 }
246 
247 void __time_travel_wait_readable(int fd)
248 {
249 	int fds[2] = { fd, time_travel_ext_fd };
250 	int ret;
251 
252 	if (time_travel_mode != TT_MODE_EXTERNAL)
253 		return;
254 
255 	while ((ret = os_poll(2, fds))) {
256 		struct um_timetravel_msg msg;
257 
258 		if (ret == 1)
259 			time_travel_handle_message(&msg, TTMH_READ);
260 	}
261 }
262 EXPORT_SYMBOL_GPL(__time_travel_wait_readable);
263 
264 static void time_travel_ext_update_request(unsigned long long time)
265 {
266 	if (time_travel_mode != TT_MODE_EXTERNAL)
267 		return;
268 
269 	/* asked for exactly this time previously */
270 	if (time_travel_ext_prev_request_valid &&
271 	    time == time_travel_ext_prev_request)
272 		return;
273 
274 	/*
275 	 * if we're running and are allowed to run past the request
276 	 * then we don't need to update it either
277 	 *
278 	 * Note for shm we ignore FREE_UNTIL messages and leave the pointer
279 	 * to shared memory, and for non-shm the offset is 0.
280 	 */
281 	if (!time_travel_ext_waiting && time_travel_ext_free_until &&
282 	    time < (*time_travel_ext_free_until - time_travel_shm_offset))
283 		return;
284 
285 	time_travel_ext_prev_request = time;
286 	time_travel_ext_prev_request_valid = true;
287 
288 	if (time_travel_shm) {
289 		union um_timetravel_schedshm_client *running;
290 
291 		running = &time_travel_shm->clients[time_travel_shm->running_id];
292 
293 		if (running->capa & UM_TIMETRAVEL_SCHEDSHM_CAP_TIME_SHARE) {
294 			time_travel_shm_client->flags |=
295 				UM_TIMETRAVEL_SCHEDSHM_FLAGS_REQ_RUN;
296 			time += time_travel_shm_offset;
297 			time_travel_shm_client->req_time = time;
298 			if (time < time_travel_shm->free_until)
299 				time_travel_shm->free_until = time;
300 			return;
301 		}
302 	}
303 
304 	time_travel_ext_req(UM_TIMETRAVEL_REQUEST, time);
305 }
306 
307 void __time_travel_propagate_time(void)
308 {
309 	static unsigned long long last_propagated;
310 
311 	if (time_travel_shm) {
312 		if (time_travel_shm->running_id != time_travel_shm_id)
313 			panic("time-travel: setting time while not running\n");
314 		time_travel_shm->current_time = time_travel_time +
315 						time_travel_shm_offset;
316 		return;
317 	}
318 
319 	if (last_propagated == time_travel_time)
320 		return;
321 
322 	time_travel_ext_req(UM_TIMETRAVEL_UPDATE, time_travel_time);
323 	last_propagated = time_travel_time;
324 }
325 EXPORT_SYMBOL_GPL(__time_travel_propagate_time);
326 
327 /* returns true if we must do a wait to the simtime device */
328 static bool time_travel_ext_request(unsigned long long time)
329 {
330 	/*
331 	 * If we received an external sync point ("free until") then we
332 	 * don't have to request/wait for anything until then, unless
333 	 * we're already waiting.
334 	 *
335 	 * Note for shm we ignore FREE_UNTIL messages and leave the pointer
336 	 * to shared memory, and for non-shm the offset is 0.
337 	 */
338 	if (!time_travel_ext_waiting && time_travel_ext_free_until &&
339 	    time < (*time_travel_ext_free_until - time_travel_shm_offset))
340 		return false;
341 
342 	time_travel_ext_update_request(time);
343 	return true;
344 }
345 
346 static void time_travel_ext_wait(bool idle)
347 {
348 	struct um_timetravel_msg msg = {
349 		.op = UM_TIMETRAVEL_ACK,
350 	};
351 
352 	time_travel_ext_prev_request_valid = false;
353 	if (!time_travel_shm)
354 		time_travel_ext_free_until = NULL;
355 	time_travel_ext_waiting++;
356 
357 	time_travel_ext_req(UM_TIMETRAVEL_WAIT, -1);
358 
359 	/*
360 	 * Here we are deep in the idle loop, so we have to break out of the
361 	 * kernel abstraction in a sense and implement this in terms of the
362 	 * UML system waiting on the VQ interrupt while sleeping, when we get
363 	 * the signal it'll call time_travel_ext_vq_notify_done() completing the
364 	 * call.
365 	 */
366 	while (msg.op != UM_TIMETRAVEL_RUN)
367 		time_travel_handle_message(&msg, idle ? TTMH_IDLE : TTMH_POLL);
368 
369 	time_travel_ext_waiting--;
370 
371 	/* we might request more stuff while polling - reset when we run */
372 	time_travel_ext_prev_request_valid = false;
373 }
374 
375 static void time_travel_ext_get_time(void)
376 {
377 	if (time_travel_shm)
378 		time_travel_set_time(time_travel_shm->current_time -
379 				     time_travel_shm_offset);
380 	else
381 		time_travel_ext_req(UM_TIMETRAVEL_GET, -1);
382 }
383 
384 static void __time_travel_update_time(unsigned long long ns, bool idle)
385 {
386 	if (time_travel_mode == TT_MODE_EXTERNAL && time_travel_ext_request(ns))
387 		time_travel_ext_wait(idle);
388 	else
389 		time_travel_set_time(ns);
390 }
391 
392 static struct time_travel_event *time_travel_first_event(void)
393 {
394 	return list_first_entry_or_null(&time_travel_events,
395 					struct time_travel_event,
396 					list);
397 }
398 
399 static void __time_travel_add_event(struct time_travel_event *e,
400 				    unsigned long long time)
401 {
402 	struct time_travel_event *tmp;
403 	bool inserted = false;
404 	unsigned long flags;
405 
406 	if (e->pending)
407 		return;
408 
409 	e->pending = true;
410 	e->time = time;
411 
412 	local_irq_save(flags);
413 	list_for_each_entry(tmp, &time_travel_events, list) {
414 		/*
415 		 * Add the new entry before one with higher time,
416 		 * or if they're equal and both on stack, because
417 		 * in that case we need to unwind the stack in the
418 		 * right order, and the later event (timer sleep
419 		 * or such) must be dequeued first.
420 		 */
421 		if ((tmp->time > e->time) ||
422 		    (tmp->time == e->time && tmp->onstack && e->onstack)) {
423 			list_add_tail(&e->list, &tmp->list);
424 			inserted = true;
425 			break;
426 		}
427 	}
428 
429 	if (!inserted)
430 		list_add_tail(&e->list, &time_travel_events);
431 
432 	tmp = time_travel_first_event();
433 	time_travel_ext_update_request(tmp->time);
434 	time_travel_next_event = tmp->time;
435 	local_irq_restore(flags);
436 }
437 
438 static void time_travel_add_event(struct time_travel_event *e,
439 				  unsigned long long time)
440 {
441 	if (WARN_ON(!e->fn))
442 		return;
443 
444 	__time_travel_add_event(e, time);
445 }
446 
447 void time_travel_add_event_rel(struct time_travel_event *e,
448 			       unsigned long long delay_ns)
449 {
450 	time_travel_add_event(e, time_travel_time + delay_ns);
451 }
452 
453 static void time_travel_periodic_timer(struct time_travel_event *e)
454 {
455 	time_travel_add_event(&time_travel_timer_event,
456 			      time_travel_time + time_travel_timer_interval);
457 
458 	/* clock tick; decrease extra jiffies by keeping sched_clock constant */
459 	if (tt_extra_sched_jiffies > 0)
460 		tt_extra_sched_jiffies -= 1;
461 
462 	deliver_alarm();
463 }
464 
465 void deliver_time_travel_irqs(void)
466 {
467 	struct time_travel_event *e;
468 	unsigned long flags;
469 
470 	/*
471 	 * Don't do anything for most cases. Note that because here we have
472 	 * to disable IRQs (and re-enable later) we'll actually recurse at
473 	 * the end of the function, so this is strictly necessary.
474 	 */
475 	if (likely(list_empty(&time_travel_irqs)))
476 		return;
477 
478 	local_irq_save(flags);
479 	irq_enter();
480 	while ((e = list_first_entry_or_null(&time_travel_irqs,
481 					     struct time_travel_event,
482 					     list))) {
483 		list_del(&e->list);
484 		e->pending = false;
485 		e->fn(e);
486 	}
487 	irq_exit();
488 	local_irq_restore(flags);
489 }
490 
491 static void time_travel_deliver_event(struct time_travel_event *e)
492 {
493 	if (e == &time_travel_timer_event) {
494 		/*
495 		 * deliver_alarm() does the irq_enter/irq_exit
496 		 * by itself, so must handle it specially here
497 		 */
498 		e->fn(e);
499 	} else if (irqs_disabled()) {
500 		list_add_tail(&e->list, &time_travel_irqs);
501 		/*
502 		 * set pending again, it was set to false when the
503 		 * event was deleted from the original list, but
504 		 * now it's still pending until we deliver the IRQ.
505 		 */
506 		e->pending = true;
507 	} else {
508 		unsigned long flags;
509 
510 		local_irq_save(flags);
511 		irq_enter();
512 		e->fn(e);
513 		irq_exit();
514 		local_irq_restore(flags);
515 	}
516 }
517 
518 bool time_travel_del_event(struct time_travel_event *e)
519 {
520 	unsigned long flags;
521 
522 	if (!e->pending)
523 		return false;
524 	local_irq_save(flags);
525 	list_del(&e->list);
526 	e->pending = false;
527 	local_irq_restore(flags);
528 	return true;
529 }
530 
531 static void time_travel_update_time(unsigned long long next, bool idle)
532 {
533 	struct time_travel_event ne = {
534 		.onstack = true,
535 	};
536 	struct time_travel_event *e;
537 	bool finished = idle;
538 
539 	/* add it without a handler - we deal with that specifically below */
540 	__time_travel_add_event(&ne, next);
541 
542 	do {
543 		e = time_travel_first_event();
544 
545 		BUG_ON(!e);
546 		__time_travel_update_time(e->time, idle);
547 
548 		/* new events may have been inserted while we were waiting */
549 		if (e == time_travel_first_event()) {
550 			BUG_ON(!time_travel_del_event(e));
551 			BUG_ON(time_travel_time != e->time);
552 
553 			if (e == &ne) {
554 				finished = true;
555 			} else {
556 				if (e->onstack)
557 					panic("On-stack event dequeued outside of the stack! time=%lld, event time=%lld, event=%pS\n",
558 					      time_travel_time, e->time, e);
559 				time_travel_deliver_event(e);
560 			}
561 		}
562 
563 		e = time_travel_first_event();
564 		if (e)
565 			time_travel_ext_update_request(e->time);
566 	} while (ne.pending && !finished);
567 
568 	time_travel_del_event(&ne);
569 }
570 
571 static void time_travel_update_time_rel(unsigned long long offs)
572 {
573 	unsigned long flags;
574 
575 	/*
576 	 * Disable interrupts before calculating the new time so
577 	 * that a real timer interrupt (signal) can't happen at
578 	 * a bad time e.g. after we read time_travel_time but
579 	 * before we've completed updating the time.
580 	 */
581 	local_irq_save(flags);
582 	time_travel_update_time(time_travel_time + offs, false);
583 	local_irq_restore(flags);
584 }
585 
586 void time_travel_ndelay(unsigned long nsec)
587 {
588 	/*
589 	 * Not strictly needed to use _rel() version since this is
590 	 * only used in INFCPU/EXT modes, but it doesn't hurt and
591 	 * is more readable too.
592 	 */
593 	time_travel_update_time_rel(nsec);
594 }
595 EXPORT_SYMBOL(time_travel_ndelay);
596 
597 void time_travel_add_irq_event(struct time_travel_event *e)
598 {
599 	BUG_ON(time_travel_mode != TT_MODE_EXTERNAL);
600 
601 	time_travel_ext_get_time();
602 	/*
603 	 * We could model interrupt latency here, for now just
604 	 * don't have any latency at all and request the exact
605 	 * same time (again) to run the interrupt...
606 	 */
607 	time_travel_add_event(e, time_travel_time);
608 }
609 EXPORT_SYMBOL_GPL(time_travel_add_irq_event);
610 
611 static void time_travel_oneshot_timer(struct time_travel_event *e)
612 {
613 	/* clock tick; decrease extra jiffies by keeping sched_clock constant */
614 	if (tt_extra_sched_jiffies > 0)
615 		tt_extra_sched_jiffies -= 1;
616 
617 	deliver_alarm();
618 }
619 
620 void time_travel_sleep(void)
621 {
622 	/*
623 	 * Wait "forever" (using S64_MAX because there are some potential
624 	 * wrapping issues, especially with the current TT_MODE_EXTERNAL
625 	 * controller application.
626 	 */
627 	unsigned long long next = S64_MAX;
628 
629 	if (time_travel_mode == TT_MODE_BASIC)
630 		os_timer_disable();
631 
632 	time_travel_update_time(next, true);
633 
634 	if (time_travel_mode == TT_MODE_BASIC &&
635 	    time_travel_timer_event.pending) {
636 		if (time_travel_timer_event.fn == time_travel_periodic_timer) {
637 			/*
638 			 * This is somewhat wrong - we should get the first
639 			 * one sooner like the os_timer_one_shot() below...
640 			 */
641 			os_timer_set_interval(time_travel_timer_interval);
642 		} else {
643 			os_timer_one_shot(time_travel_timer_event.time - next);
644 		}
645 	}
646 }
647 
648 static void time_travel_handle_real_alarm(void)
649 {
650 	time_travel_set_time(time_travel_next_event);
651 
652 	time_travel_del_event(&time_travel_timer_event);
653 
654 	if (time_travel_timer_event.fn == time_travel_periodic_timer)
655 		time_travel_add_event(&time_travel_timer_event,
656 				      time_travel_time +
657 				      time_travel_timer_interval);
658 }
659 
660 static void time_travel_set_interval(unsigned long long interval)
661 {
662 	time_travel_timer_interval = interval;
663 }
664 
665 static int time_travel_connect_external(const char *socket)
666 {
667 	const char *sep;
668 	unsigned long long id = (unsigned long long)-1;
669 	int rc;
670 
671 	if ((sep = strchr(socket, ':'))) {
672 		char buf[25] = {};
673 		if (sep - socket > sizeof(buf) - 1)
674 			goto invalid_number;
675 
676 		memcpy(buf, socket, sep - socket);
677 		if (kstrtoull(buf, 0, &id)) {
678 invalid_number:
679 			panic("time-travel: invalid external ID in string '%s'\n",
680 			      socket);
681 			return -EINVAL;
682 		}
683 
684 		socket = sep + 1;
685 	}
686 
687 	rc = os_connect_socket(socket);
688 	if (rc < 0) {
689 		panic("time-travel: failed to connect to external socket %s\n",
690 		      socket);
691 		return rc;
692 	}
693 
694 	time_travel_ext_fd = rc;
695 
696 	time_travel_ext_req(UM_TIMETRAVEL_START, id);
697 
698 	return 1;
699 }
700 
701 static void time_travel_set_start(void)
702 {
703 	if (time_travel_start_set)
704 		return;
705 
706 	switch (time_travel_mode) {
707 	case TT_MODE_EXTERNAL:
708 		time_travel_start = time_travel_ext_req(UM_TIMETRAVEL_GET_TOD, -1);
709 		/* controller gave us the *current* time, so adjust by that */
710 		time_travel_ext_get_time();
711 		time_travel_start -= time_travel_time;
712 		break;
713 	case TT_MODE_INFCPU:
714 	case TT_MODE_BASIC:
715 		if (!time_travel_start_set)
716 			time_travel_start = os_persistent_clock_emulation();
717 		break;
718 	case TT_MODE_OFF:
719 		/* we just read the host clock with os_persistent_clock_emulation() */
720 		break;
721 	}
722 
723 	time_travel_start_set = true;
724 }
725 #else /* CONFIG_UML_TIME_TRAVEL_SUPPORT */
726 #define time_travel_start_set 0
727 #define time_travel_start 0
728 #define time_travel_time 0
729 #define time_travel_ext_waiting 0
730 
731 static inline void time_travel_update_time(unsigned long long ns, bool idle)
732 {
733 }
734 
735 static inline void time_travel_update_time_rel(unsigned long long offs)
736 {
737 }
738 
739 static inline void time_travel_handle_real_alarm(void)
740 {
741 }
742 
743 static void time_travel_set_interval(unsigned long long interval)
744 {
745 }
746 
747 static inline void time_travel_set_start(void)
748 {
749 }
750 
751 /* fail link if this actually gets used */
752 extern u64 time_travel_ext_req(u32 op, u64 time);
753 
754 /* these are empty macros so the struct/fn need not exist */
755 #define time_travel_add_event(e, time) do { } while (0)
756 /* externally not usable - redefine here so we can */
757 #undef time_travel_del_event
758 #define time_travel_del_event(e) do { } while (0)
759 #endif
760 
761 void timer_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
762 {
763 	unsigned long flags;
764 
765 	/*
766 	 * In basic time-travel mode we still get real interrupts
767 	 * (signals) but since we don't read time from the OS, we
768 	 * must update the simulated time here to the expiry when
769 	 * we get a signal.
770 	 * This is not the case in inf-cpu mode, since there we
771 	 * never get any real signals from the OS.
772 	 */
773 	if (time_travel_mode == TT_MODE_BASIC)
774 		time_travel_handle_real_alarm();
775 
776 	local_irq_save(flags);
777 	do_IRQ(TIMER_IRQ, regs);
778 	local_irq_restore(flags);
779 }
780 
781 static int itimer_shutdown(struct clock_event_device *evt)
782 {
783 	if (time_travel_mode != TT_MODE_OFF)
784 		time_travel_del_event(&time_travel_timer_event);
785 
786 	if (time_travel_mode != TT_MODE_INFCPU &&
787 	    time_travel_mode != TT_MODE_EXTERNAL)
788 		os_timer_disable();
789 
790 	return 0;
791 }
792 
793 static int itimer_set_periodic(struct clock_event_device *evt)
794 {
795 	unsigned long long interval = NSEC_PER_SEC / HZ;
796 
797 	if (time_travel_mode != TT_MODE_OFF) {
798 		time_travel_del_event(&time_travel_timer_event);
799 		time_travel_set_event_fn(&time_travel_timer_event,
800 					 time_travel_periodic_timer);
801 		time_travel_set_interval(interval);
802 		time_travel_add_event(&time_travel_timer_event,
803 				      time_travel_time + interval);
804 	}
805 
806 	if (time_travel_mode != TT_MODE_INFCPU &&
807 	    time_travel_mode != TT_MODE_EXTERNAL)
808 		os_timer_set_interval(interval);
809 
810 	return 0;
811 }
812 
813 static int itimer_next_event(unsigned long delta,
814 			     struct clock_event_device *evt)
815 {
816 	delta += 1;
817 
818 	if (time_travel_mode != TT_MODE_OFF) {
819 		time_travel_del_event(&time_travel_timer_event);
820 		time_travel_set_event_fn(&time_travel_timer_event,
821 					 time_travel_oneshot_timer);
822 		time_travel_add_event(&time_travel_timer_event,
823 				      time_travel_time + delta);
824 	}
825 
826 	if (time_travel_mode != TT_MODE_INFCPU &&
827 	    time_travel_mode != TT_MODE_EXTERNAL)
828 		return os_timer_one_shot(delta);
829 
830 	return 0;
831 }
832 
833 static int itimer_one_shot(struct clock_event_device *evt)
834 {
835 	return itimer_next_event(0, evt);
836 }
837 
838 static struct clock_event_device timer_clockevent = {
839 	.name			= "posix-timer",
840 	.rating			= 250,
841 	.cpumask		= cpu_possible_mask,
842 	.features		= CLOCK_EVT_FEAT_PERIODIC |
843 				  CLOCK_EVT_FEAT_ONESHOT,
844 	.set_state_shutdown	= itimer_shutdown,
845 	.set_state_periodic	= itimer_set_periodic,
846 	.set_state_oneshot	= itimer_one_shot,
847 	.set_next_event		= itimer_next_event,
848 	.shift			= 0,
849 	.max_delta_ns		= 0xffffffff,
850 	.max_delta_ticks	= 0xffffffff,
851 	.min_delta_ns		= TIMER_MIN_DELTA,
852 	.min_delta_ticks	= TIMER_MIN_DELTA, // microsecond resolution should be enough for anyone, same as 640K RAM
853 	.irq			= 0,
854 	.mult			= 1,
855 };
856 
857 static irqreturn_t um_timer(int irq, void *dev)
858 {
859 	if (get_current()->mm != NULL)
860 	{
861         /* userspace - relay signal, results in correct userspace timers */
862 		os_alarm_process(get_current()->mm->context.id.pid);
863 	}
864 
865 	(*timer_clockevent.event_handler)(&timer_clockevent);
866 
867 	return IRQ_HANDLED;
868 }
869 
870 static u64 timer_read(struct clocksource *cs)
871 {
872 	if (time_travel_mode != TT_MODE_OFF) {
873 		/*
874 		 * We make reading the timer cost a bit so that we don't get
875 		 * stuck in loops that expect time to move more than the
876 		 * exact requested sleep amount, e.g. python's socket server,
877 		 * see https://bugs.python.org/issue37026.
878 		 *
879 		 * However, don't do that when we're in interrupt or such as
880 		 * then we might recurse into our own processing, and get to
881 		 * even more waiting, and that's not good - it messes up the
882 		 * "what do I do next" and onstack event we use to know when
883 		 * to return from time_travel_update_time().
884 		 */
885 		if (!irqs_disabled() && !in_interrupt() && !in_softirq() &&
886 		    !time_travel_ext_waiting)
887 			time_travel_update_time_rel(TIMER_MULTIPLIER);
888 		return time_travel_time / TIMER_MULTIPLIER;
889 	}
890 
891 	return os_nsecs() / TIMER_MULTIPLIER;
892 }
893 
894 static struct clocksource timer_clocksource = {
895 	.name		= "timer",
896 	.rating		= 300,
897 	.read		= timer_read,
898 	.mask		= CLOCKSOURCE_MASK(64),
899 	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
900 };
901 
902 static void __init um_timer_setup(void)
903 {
904 	int err;
905 
906 	err = request_irq(TIMER_IRQ, um_timer, IRQF_TIMER, "hr timer", NULL);
907 	if (err != 0)
908 		printk(KERN_ERR "register_timer : request_irq failed - "
909 		       "errno = %d\n", -err);
910 
911 	err = os_timer_create();
912 	if (err != 0) {
913 		printk(KERN_ERR "creation of timer failed - errno = %d\n", -err);
914 		return;
915 	}
916 
917 	err = clocksource_register_hz(&timer_clocksource, NSEC_PER_SEC/TIMER_MULTIPLIER);
918 	if (err) {
919 		printk(KERN_ERR "clocksource_register_hz returned %d\n", err);
920 		return;
921 	}
922 	clockevents_register_device(&timer_clockevent);
923 }
924 
925 void read_persistent_clock64(struct timespec64 *ts)
926 {
927 	long long nsecs;
928 
929 	time_travel_set_start();
930 
931 	if (time_travel_mode != TT_MODE_OFF)
932 		nsecs = time_travel_start + time_travel_time;
933 	else
934 		nsecs = os_persistent_clock_emulation();
935 
936 	set_normalized_timespec64(ts, nsecs / NSEC_PER_SEC,
937 				  nsecs % NSEC_PER_SEC);
938 }
939 
940 void __init time_init(void)
941 {
942 	timer_set_signal_handler();
943 	late_time_init = um_timer_setup;
944 }
945 
946 #ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
947 unsigned long calibrate_delay_is_known(void)
948 {
949 	if (time_travel_mode == TT_MODE_INFCPU ||
950 	    time_travel_mode == TT_MODE_EXTERNAL)
951 		return 1;
952 	return 0;
953 }
954 
955 static int setup_time_travel(char *str)
956 {
957 	if (strcmp(str, "=inf-cpu") == 0) {
958 		time_travel_mode = TT_MODE_INFCPU;
959 		timer_clockevent.name = "time-travel-timer-infcpu";
960 		timer_clocksource.name = "time-travel-clock";
961 		return 1;
962 	}
963 
964 	if (strncmp(str, "=ext:", 5) == 0) {
965 		time_travel_mode = TT_MODE_EXTERNAL;
966 		timer_clockevent.name = "time-travel-timer-external";
967 		timer_clocksource.name = "time-travel-clock-external";
968 		return time_travel_connect_external(str + 5);
969 	}
970 
971 	if (!*str) {
972 		time_travel_mode = TT_MODE_BASIC;
973 		timer_clockevent.name = "time-travel-timer";
974 		timer_clocksource.name = "time-travel-clock";
975 		return 1;
976 	}
977 
978 	return -EINVAL;
979 }
980 
981 __setup("time-travel", setup_time_travel);
982 __uml_help(setup_time_travel,
983 "time-travel\n"
984 "This option just enables basic time travel mode, in which the clock/timers\n"
985 "inside the UML instance skip forward when there's nothing to do, rather than\n"
986 "waiting for real time to elapse. However, instance CPU speed is limited by\n"
987 "the real CPU speed, so e.g. a 10ms timer will always fire after ~10ms wall\n"
988 "clock (but quicker when there's nothing to do).\n"
989 "\n"
990 "time-travel=inf-cpu\n"
991 "This enables time travel mode with infinite processing power, in which there\n"
992 "are no wall clock timers, and any CPU processing happens - as seen from the\n"
993 "guest - instantly. This can be useful for accurate simulation regardless of\n"
994 "debug overhead, physical CPU speed, etc. but is somewhat dangerous as it can\n"
995 "easily lead to getting stuck (e.g. if anything in the system busy loops).\n"
996 "\n"
997 "time-travel=ext:[ID:]/path/to/socket\n"
998 "This enables time travel mode similar to =inf-cpu, except the system will\n"
999 "use the given socket to coordinate with a central scheduler, in order to\n"
1000 "have more than one system simultaneously be on simulated time. The virtio\n"
1001 "driver code in UML knows about this so you can also simulate networks and\n"
1002 "devices using it, assuming the device has the right capabilities.\n"
1003 "The optional ID is a 64-bit integer that's sent to the central scheduler.\n");
1004 
1005 static int setup_time_travel_start(char *str)
1006 {
1007 	int err;
1008 
1009 	err = kstrtoull(str, 0, &time_travel_start);
1010 	if (err)
1011 		return err;
1012 
1013 	time_travel_start_set = 1;
1014 	return 1;
1015 }
1016 
1017 __setup("time-travel-start=", setup_time_travel_start);
1018 __uml_help(setup_time_travel_start,
1019 "time-travel-start=<nanoseconds>\n"
1020 "Configure the UML instance's wall clock to start at this value rather than\n"
1021 "the host's wall clock at the time of UML boot.\n");
1022 static struct kobject *bc_time_kobject;
1023 
1024 static ssize_t bc_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
1025 {
1026 	return sprintf(buf, "0x%llx", bc_message);
1027 }
1028 
1029 static ssize_t bc_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count)
1030 {
1031 	int ret;
1032 	u64 user_bc_message;
1033 
1034 	ret = kstrtou64(buf, 0, &user_bc_message);
1035 	if (ret)
1036 		return ret;
1037 
1038 	bc_message = user_bc_message;
1039 
1040 	time_travel_ext_req(UM_TIMETRAVEL_BROADCAST, bc_message);
1041 	pr_info("um: time: sent broadcast message: 0x%llx\n", bc_message);
1042 	return count;
1043 }
1044 
1045 static struct kobj_attribute bc_attribute = __ATTR(bc-message, 0660, bc_show, bc_store);
1046 
1047 static int __init um_bc_start(void)
1048 {
1049 	if (time_travel_mode != TT_MODE_EXTERNAL)
1050 		return 0;
1051 
1052 	bc_time_kobject = kobject_create_and_add("um-ext-time", kernel_kobj);
1053 	if (!bc_time_kobject)
1054 		return 0;
1055 
1056 	if (sysfs_create_file(bc_time_kobject, &bc_attribute.attr))
1057 		pr_debug("failed to create the bc file in /sys/kernel/um_time");
1058 
1059 	return 0;
1060 }
1061 late_initcall(um_bc_start);
1062 #endif
1063