xref: /linux/arch/um/kernel/time.c (revision 399ead3a6d76cbdd29a716660db5c84a314dab70)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2015 Anton Ivanov (aivanov@{brocade.com,kot-begemot.co.uk})
4  * Copyright (C) 2015 Thomas Meyer (thomas@m3y3r.de)
5  * Copyright (C) 2012-2014 Cisco Systems
6  * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
7  * Copyright (C) 2019 Intel Corporation
8  */
9 
10 #include <linux/clockchips.h>
11 #include <linux/init.h>
12 #include <linux/interrupt.h>
13 #include <linux/jiffies.h>
14 #include <linux/mm.h>
15 #include <linux/sched.h>
16 #include <linux/spinlock.h>
17 #include <linux/threads.h>
18 #include <asm/irq.h>
19 #include <asm/param.h>
20 #include <kern_util.h>
21 #include <os.h>
22 #include <linux/delay.h>
23 #include <linux/time-internal.h>
24 #include <linux/um_timetravel.h>
25 #include <shared/init.h>
26 
27 #ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
28 #include <linux/sched/clock.h>
29 
30 enum time_travel_mode time_travel_mode;
31 EXPORT_SYMBOL_GPL(time_travel_mode);
32 
33 static bool time_travel_start_set;
34 static unsigned long long time_travel_start;
35 static unsigned long long time_travel_time;
36 static unsigned long long time_travel_shm_offset;
37 static LIST_HEAD(time_travel_events);
38 static LIST_HEAD(time_travel_irqs);
39 static unsigned long long time_travel_timer_interval;
40 static unsigned long long time_travel_next_event;
41 static struct time_travel_event time_travel_timer_event;
42 static int time_travel_ext_fd = -1;
43 static unsigned int time_travel_ext_waiting;
44 static bool time_travel_ext_prev_request_valid;
45 static unsigned long long time_travel_ext_prev_request;
46 static unsigned long long *time_travel_ext_free_until;
47 static unsigned long long _time_travel_ext_free_until;
48 static u16 time_travel_shm_id;
49 static struct um_timetravel_schedshm *time_travel_shm;
50 static union um_timetravel_schedshm_client *time_travel_shm_client;
51 
52 unsigned long tt_extra_sched_jiffies;
53 
sched_clock(void)54 notrace unsigned long long sched_clock(void)
55 {
56 	return (unsigned long long)(jiffies - INITIAL_JIFFIES +
57 				    tt_extra_sched_jiffies)
58 					* (NSEC_PER_SEC / HZ);
59 }
60 
time_travel_set_time(unsigned long long ns)61 static void time_travel_set_time(unsigned long long ns)
62 {
63 	if (unlikely(ns < time_travel_time))
64 		panic("time-travel: time goes backwards %lld -> %lld\n",
65 		      time_travel_time, ns);
66 	else if (unlikely(ns >= S64_MAX))
67 		panic("The system was going to sleep forever, aborting");
68 
69 	time_travel_time = ns;
70 }
71 
72 enum time_travel_message_handling {
73 	TTMH_IDLE,
74 	TTMH_POLL,
75 	TTMH_READ,
76 	TTMH_READ_START_ACK,
77 };
78 
79 static u64 bc_message;
80 int time_travel_should_print_bc_msg;
81 
_time_travel_print_bc_msg(void)82 void _time_travel_print_bc_msg(void)
83 {
84 	time_travel_should_print_bc_msg = 0;
85 	printk(KERN_INFO "time-travel: received broadcast 0x%llx\n", bc_message);
86 }
87 
time_travel_setup_shm(int fd,u16 id)88 static void time_travel_setup_shm(int fd, u16 id)
89 {
90 	u32 len;
91 
92 	time_travel_shm = os_mmap_rw_shared(fd, sizeof(*time_travel_shm));
93 
94 	if (!time_travel_shm)
95 		goto out;
96 
97 	len = time_travel_shm->len;
98 
99 	if (time_travel_shm->version != UM_TIMETRAVEL_SCHEDSHM_VERSION ||
100 	    len < struct_size(time_travel_shm, clients, id + 1)) {
101 		os_unmap_memory(time_travel_shm, sizeof(*time_travel_shm));
102 		time_travel_shm = NULL;
103 		goto out;
104 	}
105 
106 	time_travel_shm = os_mremap_rw_shared(time_travel_shm,
107 					      sizeof(*time_travel_shm),
108 					      len);
109 	if (!time_travel_shm)
110 		goto out;
111 
112 	time_travel_shm_offset = time_travel_shm->current_time;
113 	time_travel_shm_client = &time_travel_shm->clients[id];
114 	time_travel_shm_client->capa |= UM_TIMETRAVEL_SCHEDSHM_CAP_TIME_SHARE;
115 	time_travel_shm_id = id;
116 	/* always look at that free_until from now on */
117 	time_travel_ext_free_until = &time_travel_shm->free_until;
118 out:
119 	os_close_file(fd);
120 }
121 
time_travel_handle_message(struct um_timetravel_msg * msg,enum time_travel_message_handling mode)122 static void time_travel_handle_message(struct um_timetravel_msg *msg,
123 				       enum time_travel_message_handling mode)
124 {
125 	struct um_timetravel_msg resp = {
126 		.op = UM_TIMETRAVEL_ACK,
127 	};
128 	int ret;
129 
130 	/*
131 	 * We can't unlock here, but interrupt signals with a timetravel_handler
132 	 * (see um_request_irq_tt) get to the timetravel_handler anyway.
133 	 */
134 	if (mode != TTMH_READ) {
135 		BUG_ON(mode == TTMH_IDLE && !irqs_disabled());
136 
137 		while (os_poll(1, &time_travel_ext_fd) != 0) {
138 			/* nothing */
139 		}
140 	}
141 
142 	if (unlikely(mode == TTMH_READ_START_ACK)) {
143 		int fd[UM_TIMETRAVEL_SHARED_MAX_FDS];
144 
145 		ret = os_rcv_fd_msg(time_travel_ext_fd, fd,
146 				    ARRAY_SIZE(fd), msg, sizeof(*msg));
147 		if (ret == sizeof(*msg)) {
148 			time_travel_setup_shm(fd[UM_TIMETRAVEL_SHARED_MEMFD],
149 					      msg->time & UM_TIMETRAVEL_START_ACK_ID);
150 			/* we don't use the logging for now */
151 			os_close_file(fd[UM_TIMETRAVEL_SHARED_LOGFD]);
152 		}
153 	} else {
154 		ret = os_read_file(time_travel_ext_fd, msg, sizeof(*msg));
155 	}
156 
157 	if (ret == 0)
158 		panic("time-travel external link is broken\n");
159 	if (ret != sizeof(*msg))
160 		panic("invalid time-travel message - %d bytes\n", ret);
161 
162 	switch (msg->op) {
163 	default:
164 		WARN_ONCE(1, "time-travel: unexpected message %lld\n",
165 			  (unsigned long long)msg->op);
166 		break;
167 	case UM_TIMETRAVEL_ACK:
168 		return;
169 	case UM_TIMETRAVEL_RUN:
170 		time_travel_set_time(msg->time);
171 		if (time_travel_shm) {
172 			/* no request right now since we're running */
173 			time_travel_shm_client->flags &=
174 				~UM_TIMETRAVEL_SCHEDSHM_FLAGS_REQ_RUN;
175 			/* no ack for shared memory RUN */
176 			return;
177 		}
178 		break;
179 	case UM_TIMETRAVEL_FREE_UNTIL:
180 		/* not supposed to get this with shm, but ignore it */
181 		if (time_travel_shm)
182 			break;
183 		time_travel_ext_free_until = &_time_travel_ext_free_until;
184 		_time_travel_ext_free_until = msg->time;
185 		break;
186 	case UM_TIMETRAVEL_BROADCAST:
187 		bc_message = msg->time;
188 		time_travel_should_print_bc_msg = 1;
189 		break;
190 	}
191 
192 	resp.seq = msg->seq;
193 	os_write_file(time_travel_ext_fd, &resp, sizeof(resp));
194 }
195 
time_travel_ext_req(u32 op,u64 time)196 static u64 time_travel_ext_req(u32 op, u64 time)
197 {
198 	static int seq;
199 	int mseq = ++seq;
200 	struct um_timetravel_msg msg = {
201 		.op = op,
202 		.time = time,
203 		.seq = mseq,
204 	};
205 
206 	/*
207 	 * We need to block even the timetravel handlers of SIGIO here and
208 	 * only restore their use when we got the ACK - otherwise we may
209 	 * (will) get interrupted by that, try to queue the IRQ for future
210 	 * processing and thus send another request while we're still waiting
211 	 * for an ACK, but the peer doesn't know we got interrupted and will
212 	 * send the ACKs in the same order as the message, but we'd need to
213 	 * see them in the opposite order ...
214 	 *
215 	 * This wouldn't matter *too* much, but some ACKs carry the
216 	 * current time (for UM_TIMETRAVEL_GET) and getting another
217 	 * ACK without a time would confuse us a lot!
218 	 *
219 	 * The sequence number assignment that happens here lets us
220 	 * debug such message handling issues more easily.
221 	 */
222 	block_signals_hard();
223 	os_write_file(time_travel_ext_fd, &msg, sizeof(msg));
224 
225 	/* no ACK expected for WAIT in shared memory mode */
226 	if (msg.op == UM_TIMETRAVEL_WAIT && time_travel_shm)
227 		goto done;
228 
229 	while (msg.op != UM_TIMETRAVEL_ACK)
230 		time_travel_handle_message(&msg,
231 					   op == UM_TIMETRAVEL_START ?
232 						TTMH_READ_START_ACK :
233 						TTMH_READ);
234 
235 	if (msg.seq != mseq)
236 		panic("time-travel: ACK message has different seqno! op=%d, seq=%d != %d time=%lld\n",
237 		      msg.op, msg.seq, mseq, msg.time);
238 
239 	if (op == UM_TIMETRAVEL_GET)
240 		time_travel_set_time(msg.time);
241 done:
242 	unblock_signals_hard();
243 
244 	return msg.time;
245 }
246 
__time_travel_wait_readable(int fd)247 void __time_travel_wait_readable(int fd)
248 {
249 	int fds[2] = { fd, time_travel_ext_fd };
250 	int ret;
251 
252 	if (time_travel_mode != TT_MODE_EXTERNAL)
253 		return;
254 
255 	while ((ret = os_poll(2, fds))) {
256 		struct um_timetravel_msg msg;
257 
258 		if (ret == 1)
259 			time_travel_handle_message(&msg, TTMH_READ);
260 	}
261 }
262 EXPORT_SYMBOL_GPL(__time_travel_wait_readable);
263 
time_travel_ext_update_request(unsigned long long time)264 static void time_travel_ext_update_request(unsigned long long time)
265 {
266 	if (time_travel_mode != TT_MODE_EXTERNAL)
267 		return;
268 
269 	/* asked for exactly this time previously */
270 	if (time_travel_ext_prev_request_valid &&
271 	    time == time_travel_ext_prev_request)
272 		return;
273 
274 	/*
275 	 * if we're running and are allowed to run past the request
276 	 * then we don't need to update it either
277 	 *
278 	 * Note for shm we ignore FREE_UNTIL messages and leave the pointer
279 	 * to shared memory, and for non-shm the offset is 0.
280 	 */
281 	if (!time_travel_ext_waiting && time_travel_ext_free_until &&
282 	    time < (*time_travel_ext_free_until - time_travel_shm_offset))
283 		return;
284 
285 	time_travel_ext_prev_request = time;
286 	time_travel_ext_prev_request_valid = true;
287 
288 	if (time_travel_shm) {
289 		union um_timetravel_schedshm_client *running;
290 
291 		running = &time_travel_shm->clients[time_travel_shm->running_id];
292 
293 		if (running->capa & UM_TIMETRAVEL_SCHEDSHM_CAP_TIME_SHARE) {
294 			time_travel_shm_client->flags |=
295 				UM_TIMETRAVEL_SCHEDSHM_FLAGS_REQ_RUN;
296 			time += time_travel_shm_offset;
297 			time_travel_shm_client->req_time = time;
298 			if (time < time_travel_shm->free_until)
299 				time_travel_shm->free_until = time;
300 			return;
301 		}
302 	}
303 
304 	time_travel_ext_req(UM_TIMETRAVEL_REQUEST, time);
305 }
306 
__time_travel_propagate_time(void)307 void __time_travel_propagate_time(void)
308 {
309 	static unsigned long long last_propagated;
310 
311 	if (time_travel_shm) {
312 		if (time_travel_shm->running_id != time_travel_shm_id)
313 			panic("time-travel: setting time while not running\n");
314 		time_travel_shm->current_time = time_travel_time +
315 						time_travel_shm_offset;
316 		return;
317 	}
318 
319 	if (last_propagated == time_travel_time)
320 		return;
321 
322 	time_travel_ext_req(UM_TIMETRAVEL_UPDATE, time_travel_time);
323 	last_propagated = time_travel_time;
324 }
325 EXPORT_SYMBOL_GPL(__time_travel_propagate_time);
326 
327 /* returns true if we must do a wait to the simtime device */
time_travel_ext_request(unsigned long long time)328 static bool time_travel_ext_request(unsigned long long time)
329 {
330 	/*
331 	 * If we received an external sync point ("free until") then we
332 	 * don't have to request/wait for anything until then, unless
333 	 * we're already waiting.
334 	 *
335 	 * Note for shm we ignore FREE_UNTIL messages and leave the pointer
336 	 * to shared memory, and for non-shm the offset is 0.
337 	 */
338 	if (!time_travel_ext_waiting && time_travel_ext_free_until &&
339 	    time < (*time_travel_ext_free_until - time_travel_shm_offset))
340 		return false;
341 
342 	time_travel_ext_update_request(time);
343 	return true;
344 }
345 
time_travel_ext_wait(bool idle)346 static void time_travel_ext_wait(bool idle)
347 {
348 	struct um_timetravel_msg msg = {
349 		.op = UM_TIMETRAVEL_ACK,
350 	};
351 
352 	time_travel_ext_prev_request_valid = false;
353 	if (!time_travel_shm)
354 		time_travel_ext_free_until = NULL;
355 	time_travel_ext_waiting++;
356 
357 	time_travel_ext_req(UM_TIMETRAVEL_WAIT, -1);
358 
359 	/*
360 	 * Here we are deep in the idle loop, so we have to break out of the
361 	 * kernel abstraction in a sense and implement this in terms of the
362 	 * UML system waiting on the VQ interrupt while sleeping, when we get
363 	 * the signal it'll call time_travel_ext_vq_notify_done() completing the
364 	 * call.
365 	 */
366 	while (msg.op != UM_TIMETRAVEL_RUN)
367 		time_travel_handle_message(&msg, idle ? TTMH_IDLE : TTMH_POLL);
368 
369 	time_travel_ext_waiting--;
370 
371 	/* we might request more stuff while polling - reset when we run */
372 	time_travel_ext_prev_request_valid = false;
373 }
374 
time_travel_ext_get_time(void)375 static void time_travel_ext_get_time(void)
376 {
377 	if (time_travel_shm)
378 		time_travel_set_time(time_travel_shm->current_time -
379 				     time_travel_shm_offset);
380 	else
381 		time_travel_ext_req(UM_TIMETRAVEL_GET, -1);
382 }
383 
__time_travel_update_time(unsigned long long ns,bool idle)384 static void __time_travel_update_time(unsigned long long ns, bool idle)
385 {
386 	if (time_travel_mode == TT_MODE_EXTERNAL && time_travel_ext_request(ns))
387 		time_travel_ext_wait(idle);
388 	else
389 		time_travel_set_time(ns);
390 }
391 
time_travel_first_event(void)392 static struct time_travel_event *time_travel_first_event(void)
393 {
394 	return list_first_entry_or_null(&time_travel_events,
395 					struct time_travel_event,
396 					list);
397 }
398 
__time_travel_add_event(struct time_travel_event * e,unsigned long long time)399 static void __time_travel_add_event(struct time_travel_event *e,
400 				    unsigned long long time)
401 {
402 	struct time_travel_event *tmp;
403 	bool inserted = false;
404 	unsigned long flags;
405 
406 	if (e->pending)
407 		return;
408 
409 	e->pending = true;
410 	e->time = time;
411 
412 	local_irq_save(flags);
413 	list_for_each_entry(tmp, &time_travel_events, list) {
414 		/*
415 		 * Add the new entry before one with higher time,
416 		 * or if they're equal and both on stack, because
417 		 * in that case we need to unwind the stack in the
418 		 * right order, and the later event (timer sleep
419 		 * or such) must be dequeued first.
420 		 */
421 		if ((tmp->time > e->time) ||
422 		    (tmp->time == e->time && tmp->onstack && e->onstack)) {
423 			list_add_tail(&e->list, &tmp->list);
424 			inserted = true;
425 			break;
426 		}
427 	}
428 
429 	if (!inserted)
430 		list_add_tail(&e->list, &time_travel_events);
431 
432 	tmp = time_travel_first_event();
433 	time_travel_ext_update_request(tmp->time);
434 	time_travel_next_event = tmp->time;
435 	local_irq_restore(flags);
436 }
437 
time_travel_add_event(struct time_travel_event * e,unsigned long long time)438 static void time_travel_add_event(struct time_travel_event *e,
439 				  unsigned long long time)
440 {
441 	if (WARN_ON(!e->fn))
442 		return;
443 
444 	__time_travel_add_event(e, time);
445 }
446 
time_travel_add_event_rel(struct time_travel_event * e,unsigned long long delay_ns)447 void time_travel_add_event_rel(struct time_travel_event *e,
448 			       unsigned long long delay_ns)
449 {
450 	time_travel_add_event(e, time_travel_time + delay_ns);
451 }
452 
time_travel_periodic_timer(struct time_travel_event * e)453 static void time_travel_periodic_timer(struct time_travel_event *e)
454 {
455 	time_travel_add_event(&time_travel_timer_event,
456 			      time_travel_time + time_travel_timer_interval);
457 
458 	/* clock tick; decrease extra jiffies by keeping sched_clock constant */
459 	if (tt_extra_sched_jiffies > 0)
460 		tt_extra_sched_jiffies -= 1;
461 
462 	deliver_alarm();
463 }
464 
deliver_time_travel_irqs(void)465 void deliver_time_travel_irqs(void)
466 {
467 	struct time_travel_event *e;
468 	unsigned long flags;
469 
470 	/*
471 	 * Don't do anything for most cases. Note that because here we have
472 	 * to disable IRQs (and re-enable later) we'll actually recurse at
473 	 * the end of the function, so this is strictly necessary.
474 	 */
475 	if (likely(list_empty(&time_travel_irqs)))
476 		return;
477 
478 	local_irq_save(flags);
479 	irq_enter();
480 	while ((e = list_first_entry_or_null(&time_travel_irqs,
481 					     struct time_travel_event,
482 					     list))) {
483 		list_del(&e->list);
484 		e->pending = false;
485 		e->fn(e);
486 	}
487 	irq_exit();
488 	local_irq_restore(flags);
489 }
490 
time_travel_deliver_event(struct time_travel_event * e)491 static void time_travel_deliver_event(struct time_travel_event *e)
492 {
493 	if (e == &time_travel_timer_event) {
494 		/*
495 		 * deliver_alarm() does the irq_enter/irq_exit
496 		 * by itself, so must handle it specially here
497 		 */
498 		e->fn(e);
499 	} else if (irqs_disabled()) {
500 		list_add_tail(&e->list, &time_travel_irqs);
501 		/*
502 		 * set pending again, it was set to false when the
503 		 * event was deleted from the original list, but
504 		 * now it's still pending until we deliver the IRQ.
505 		 */
506 		e->pending = true;
507 	} else {
508 		unsigned long flags;
509 
510 		local_irq_save(flags);
511 		irq_enter();
512 		e->fn(e);
513 		irq_exit();
514 		local_irq_restore(flags);
515 	}
516 }
517 
time_travel_del_event(struct time_travel_event * e)518 bool time_travel_del_event(struct time_travel_event *e)
519 {
520 	unsigned long flags;
521 
522 	if (!e->pending)
523 		return false;
524 	local_irq_save(flags);
525 	list_del(&e->list);
526 	e->pending = false;
527 	local_irq_restore(flags);
528 	return true;
529 }
530 
time_travel_update_time(unsigned long long next,bool idle)531 static void time_travel_update_time(unsigned long long next, bool idle)
532 {
533 	struct time_travel_event ne = {
534 		.onstack = true,
535 	};
536 	struct time_travel_event *e;
537 	bool finished = idle;
538 
539 	/* add it without a handler - we deal with that specifically below */
540 	__time_travel_add_event(&ne, next);
541 
542 	do {
543 		e = time_travel_first_event();
544 
545 		BUG_ON(!e);
546 		__time_travel_update_time(e->time, idle);
547 
548 		/* new events may have been inserted while we were waiting */
549 		if (e == time_travel_first_event()) {
550 			BUG_ON(!time_travel_del_event(e));
551 			BUG_ON(time_travel_time != e->time);
552 
553 			if (e == &ne) {
554 				finished = true;
555 			} else {
556 				if (e->onstack)
557 					panic("On-stack event dequeued outside of the stack! time=%lld, event time=%lld, event=%pS\n",
558 					      time_travel_time, e->time, e);
559 				time_travel_deliver_event(e);
560 			}
561 		}
562 
563 		e = time_travel_first_event();
564 		if (e)
565 			time_travel_ext_update_request(e->time);
566 	} while (ne.pending && !finished);
567 
568 	time_travel_del_event(&ne);
569 }
570 
time_travel_update_time_rel(unsigned long long offs)571 static void time_travel_update_time_rel(unsigned long long offs)
572 {
573 	unsigned long flags;
574 
575 	/*
576 	 * Disable interrupts before calculating the new time so
577 	 * that a real timer interrupt (signal) can't happen at
578 	 * a bad time e.g. after we read time_travel_time but
579 	 * before we've completed updating the time.
580 	 */
581 	local_irq_save(flags);
582 	time_travel_update_time(time_travel_time + offs, false);
583 	local_irq_restore(flags);
584 }
585 
time_travel_ndelay(unsigned long nsec)586 void time_travel_ndelay(unsigned long nsec)
587 {
588 	/*
589 	 * Not strictly needed to use _rel() version since this is
590 	 * only used in INFCPU/EXT modes, but it doesn't hurt and
591 	 * is more readable too.
592 	 */
593 	time_travel_update_time_rel(nsec);
594 }
595 EXPORT_SYMBOL(time_travel_ndelay);
596 
time_travel_add_irq_event(struct time_travel_event * e)597 void time_travel_add_irq_event(struct time_travel_event *e)
598 {
599 	BUG_ON(time_travel_mode != TT_MODE_EXTERNAL);
600 
601 	time_travel_ext_get_time();
602 	/*
603 	 * We could model interrupt latency here, for now just
604 	 * don't have any latency at all and request the exact
605 	 * same time (again) to run the interrupt...
606 	 */
607 	time_travel_add_event(e, time_travel_time);
608 }
609 EXPORT_SYMBOL_GPL(time_travel_add_irq_event);
610 
time_travel_oneshot_timer(struct time_travel_event * e)611 static void time_travel_oneshot_timer(struct time_travel_event *e)
612 {
613 	/* clock tick; decrease extra jiffies by keeping sched_clock constant */
614 	if (tt_extra_sched_jiffies > 0)
615 		tt_extra_sched_jiffies -= 1;
616 
617 	deliver_alarm();
618 }
619 
time_travel_sleep(void)620 void time_travel_sleep(void)
621 {
622 	/*
623 	 * Wait "forever" (using S64_MAX because there are some potential
624 	 * wrapping issues, especially with the current TT_MODE_EXTERNAL
625 	 * controller application.
626 	 */
627 	unsigned long long next = S64_MAX;
628 	int cpu = raw_smp_processor_id();
629 
630 	if (time_travel_mode == TT_MODE_BASIC)
631 		os_timer_disable(cpu);
632 
633 	time_travel_update_time(next, true);
634 
635 	if (time_travel_mode == TT_MODE_BASIC &&
636 	    time_travel_timer_event.pending) {
637 		if (time_travel_timer_event.fn == time_travel_periodic_timer) {
638 			/*
639 			 * This is somewhat wrong - we should get the first
640 			 * one sooner like the os_timer_one_shot() below...
641 			 */
642 			os_timer_set_interval(cpu, time_travel_timer_interval);
643 		} else {
644 			os_timer_one_shot(cpu, time_travel_timer_event.time - next);
645 		}
646 	}
647 }
648 
time_travel_handle_real_alarm(void)649 static void time_travel_handle_real_alarm(void)
650 {
651 	time_travel_set_time(time_travel_next_event);
652 
653 	time_travel_del_event(&time_travel_timer_event);
654 
655 	if (time_travel_timer_event.fn == time_travel_periodic_timer)
656 		time_travel_add_event(&time_travel_timer_event,
657 				      time_travel_time +
658 				      time_travel_timer_interval);
659 }
660 
time_travel_set_interval(unsigned long long interval)661 static void time_travel_set_interval(unsigned long long interval)
662 {
663 	time_travel_timer_interval = interval;
664 }
665 
time_travel_connect_external(const char * socket)666 static int time_travel_connect_external(const char *socket)
667 {
668 	const char *sep;
669 	unsigned long long id = (unsigned long long)-1;
670 	int rc;
671 
672 	if ((sep = strchr(socket, ':'))) {
673 		char buf[25] = {};
674 		if (sep - socket > sizeof(buf) - 1)
675 			goto invalid_number;
676 
677 		memcpy(buf, socket, sep - socket);
678 		if (kstrtoull(buf, 0, &id)) {
679 invalid_number:
680 			panic("time-travel: invalid external ID in string '%s'\n",
681 			      socket);
682 			return -EINVAL;
683 		}
684 
685 		socket = sep + 1;
686 	}
687 
688 	rc = os_connect_socket(socket);
689 	if (rc < 0) {
690 		panic("time-travel: failed to connect to external socket %s\n",
691 		      socket);
692 		return rc;
693 	}
694 
695 	time_travel_ext_fd = rc;
696 
697 	time_travel_ext_req(UM_TIMETRAVEL_START, id);
698 
699 	return 1;
700 }
701 
time_travel_set_start(void)702 static void time_travel_set_start(void)
703 {
704 	if (time_travel_start_set)
705 		return;
706 
707 	switch (time_travel_mode) {
708 	case TT_MODE_EXTERNAL:
709 		time_travel_start = time_travel_ext_req(UM_TIMETRAVEL_GET_TOD, -1);
710 		/* controller gave us the *current* time, so adjust by that */
711 		time_travel_ext_get_time();
712 		time_travel_start -= time_travel_time;
713 		break;
714 	case TT_MODE_INFCPU:
715 	case TT_MODE_BASIC:
716 		if (!time_travel_start_set)
717 			time_travel_start = os_persistent_clock_emulation();
718 		break;
719 	case TT_MODE_OFF:
720 		/* we just read the host clock with os_persistent_clock_emulation() */
721 		break;
722 	}
723 
724 	time_travel_start_set = true;
725 }
726 #else /* CONFIG_UML_TIME_TRAVEL_SUPPORT */
727 #define time_travel_start_set 0
728 #define time_travel_start 0
729 #define time_travel_time 0
730 #define time_travel_ext_waiting 0
731 
time_travel_update_time(unsigned long long ns,bool idle)732 static inline void time_travel_update_time(unsigned long long ns, bool idle)
733 {
734 }
735 
time_travel_update_time_rel(unsigned long long offs)736 static inline void time_travel_update_time_rel(unsigned long long offs)
737 {
738 }
739 
time_travel_handle_real_alarm(void)740 static inline void time_travel_handle_real_alarm(void)
741 {
742 }
743 
time_travel_set_interval(unsigned long long interval)744 static void time_travel_set_interval(unsigned long long interval)
745 {
746 }
747 
time_travel_set_start(void)748 static inline void time_travel_set_start(void)
749 {
750 }
751 
752 /* fail link if this actually gets used */
753 extern u64 time_travel_ext_req(u32 op, u64 time);
754 
755 /* these are empty macros so the struct/fn need not exist */
756 #define time_travel_add_event(e, time) do { } while (0)
757 /* externally not usable - redefine here so we can */
758 #undef time_travel_del_event
759 #define time_travel_del_event(e) do { } while (0)
760 #endif
761 
762 static struct clock_event_device timer_clockevent[NR_CPUS];
763 
timer_handler(int sig,struct siginfo * unused_si,struct uml_pt_regs * regs)764 void timer_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
765 {
766 	unsigned long flags;
767 
768 	/*
769 	 * In basic time-travel mode we still get real interrupts
770 	 * (signals) but since we don't read time from the OS, we
771 	 * must update the simulated time here to the expiry when
772 	 * we get a signal.
773 	 * This is not the case in inf-cpu mode, since there we
774 	 * never get any real signals from the OS.
775 	 */
776 	if (time_travel_mode == TT_MODE_BASIC)
777 		time_travel_handle_real_alarm();
778 
779 	local_irq_save(flags);
780 	do_IRQ(TIMER_IRQ, regs);
781 	local_irq_restore(flags);
782 }
783 
itimer_shutdown(struct clock_event_device * evt)784 static int itimer_shutdown(struct clock_event_device *evt)
785 {
786 	int cpu = evt - &timer_clockevent[0];
787 
788 	if (time_travel_mode != TT_MODE_OFF)
789 		time_travel_del_event(&time_travel_timer_event);
790 
791 	if (time_travel_mode != TT_MODE_INFCPU &&
792 	    time_travel_mode != TT_MODE_EXTERNAL)
793 		os_timer_disable(cpu);
794 
795 	return 0;
796 }
797 
itimer_set_periodic(struct clock_event_device * evt)798 static int itimer_set_periodic(struct clock_event_device *evt)
799 {
800 	unsigned long long interval = NSEC_PER_SEC / HZ;
801 	int cpu = evt - &timer_clockevent[0];
802 
803 	if (time_travel_mode != TT_MODE_OFF) {
804 		time_travel_del_event(&time_travel_timer_event);
805 		time_travel_set_event_fn(&time_travel_timer_event,
806 					 time_travel_periodic_timer);
807 		time_travel_set_interval(interval);
808 		time_travel_add_event(&time_travel_timer_event,
809 				      time_travel_time + interval);
810 	}
811 
812 	if (time_travel_mode != TT_MODE_INFCPU &&
813 	    time_travel_mode != TT_MODE_EXTERNAL)
814 		os_timer_set_interval(cpu, interval);
815 
816 	return 0;
817 }
818 
itimer_next_event(unsigned long delta,struct clock_event_device * evt)819 static int itimer_next_event(unsigned long delta,
820 			     struct clock_event_device *evt)
821 {
822 	delta += 1;
823 
824 	if (time_travel_mode != TT_MODE_OFF) {
825 		time_travel_del_event(&time_travel_timer_event);
826 		time_travel_set_event_fn(&time_travel_timer_event,
827 					 time_travel_oneshot_timer);
828 		time_travel_add_event(&time_travel_timer_event,
829 				      time_travel_time + delta);
830 	}
831 
832 	if (time_travel_mode != TT_MODE_INFCPU &&
833 	    time_travel_mode != TT_MODE_EXTERNAL)
834 		return os_timer_one_shot(raw_smp_processor_id(), delta);
835 
836 	return 0;
837 }
838 
itimer_one_shot(struct clock_event_device * evt)839 static int itimer_one_shot(struct clock_event_device *evt)
840 {
841 	return itimer_next_event(0, evt);
842 }
843 
844 static struct clock_event_device _timer_clockevent = {
845 	.name			= "posix-timer",
846 	.rating			= 250,
847 	.features		= CLOCK_EVT_FEAT_PERIODIC |
848 				  CLOCK_EVT_FEAT_ONESHOT,
849 	.set_state_shutdown	= itimer_shutdown,
850 	.set_state_periodic	= itimer_set_periodic,
851 	.set_state_oneshot	= itimer_one_shot,
852 	.set_next_event		= itimer_next_event,
853 	.shift			= 0,
854 	.max_delta_ns		= 0xffffffff,
855 	.max_delta_ticks	= 0xffffffff,
856 	.min_delta_ns		= TIMER_MIN_DELTA,
857 	.min_delta_ticks	= TIMER_MIN_DELTA, // microsecond resolution should be enough for anyone, same as 640K RAM
858 	.irq			= 0,
859 	.mult			= 1,
860 };
861 
um_timer(int irq,void * dev)862 static irqreturn_t um_timer(int irq, void *dev)
863 {
864 	int cpu = raw_smp_processor_id();
865 	struct clock_event_device *evt = &timer_clockevent[cpu];
866 
867 	/*
868 	 * Interrupt the (possibly) running userspace process, technically this
869 	 * should only happen if userspace is currently executing.
870 	 * With infinite CPU time-travel, we can only get here when userspace
871 	 * is not executing. Do not notify there and avoid spurious scheduling.
872 	 */
873 	if (time_travel_mode != TT_MODE_INFCPU &&
874 	    time_travel_mode != TT_MODE_EXTERNAL &&
875 	    get_current()->mm)
876 		os_alarm_process(get_current()->mm->context.id.pid);
877 
878 	evt->event_handler(evt);
879 
880 	return IRQ_HANDLED;
881 }
882 
timer_read(struct clocksource * cs)883 static u64 timer_read(struct clocksource *cs)
884 {
885 	if (time_travel_mode != TT_MODE_OFF) {
886 		/*
887 		 * We make reading the timer cost a bit so that we don't get
888 		 * stuck in loops that expect time to move more than the
889 		 * exact requested sleep amount, e.g. python's socket server,
890 		 * see https://bugs.python.org/issue37026.
891 		 *
892 		 * However, don't do that when we're in interrupt or such as
893 		 * then we might recurse into our own processing, and get to
894 		 * even more waiting, and that's not good - it messes up the
895 		 * "what do I do next" and onstack event we use to know when
896 		 * to return from time_travel_update_time().
897 		 */
898 		if (!irqs_disabled() && !in_interrupt() && !in_softirq() &&
899 		    !time_travel_ext_waiting)
900 			time_travel_update_time_rel(TIMER_MULTIPLIER);
901 		return time_travel_time / TIMER_MULTIPLIER;
902 	}
903 
904 	return os_nsecs() / TIMER_MULTIPLIER;
905 }
906 
907 static struct clocksource timer_clocksource = {
908 	.name		= "timer",
909 	.rating		= 300,
910 	.read		= timer_read,
911 	.mask		= CLOCKSOURCE_MASK(64),
912 	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
913 };
914 
um_setup_timer(void)915 int um_setup_timer(void)
916 {
917 	int cpu = raw_smp_processor_id();
918 	struct clock_event_device *evt = &timer_clockevent[cpu];
919 	int err;
920 
921 	err = os_timer_create();
922 	if (err)
923 		return err;
924 
925 	memcpy(evt, &_timer_clockevent, sizeof(*evt));
926 	evt->cpumask = cpumask_of(cpu);
927 	clockevents_register_device(evt);
928 
929 	return 0;
930 }
931 
um_timer_init(void)932 static void __init um_timer_init(void)
933 {
934 	int err;
935 
936 	err = request_irq(TIMER_IRQ, um_timer, IRQF_TIMER, "hr timer", NULL);
937 	if (err != 0)
938 		printk(KERN_ERR "register_timer : request_irq failed - "
939 		       "errno = %d\n", -err);
940 
941 	err = um_setup_timer();
942 	if (err) {
943 		printk(KERN_ERR "creation of timer failed - errno = %d\n", -err);
944 		return;
945 	}
946 
947 	err = clocksource_register_hz(&timer_clocksource, NSEC_PER_SEC/TIMER_MULTIPLIER);
948 	if (err) {
949 		printk(KERN_ERR "clocksource_register_hz returned %d\n", err);
950 		return;
951 	}
952 }
953 
read_persistent_clock64(struct timespec64 * ts)954 void read_persistent_clock64(struct timespec64 *ts)
955 {
956 	long long nsecs;
957 
958 	time_travel_set_start();
959 
960 	if (time_travel_mode != TT_MODE_OFF)
961 		nsecs = time_travel_start + time_travel_time;
962 	else
963 		nsecs = os_persistent_clock_emulation();
964 
965 	set_normalized_timespec64(ts, nsecs / NSEC_PER_SEC,
966 				  nsecs % NSEC_PER_SEC);
967 }
968 
time_init(void)969 void __init time_init(void)
970 {
971 	timer_set_signal_handler();
972 	late_time_init = um_timer_init;
973 }
974 
975 #ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
calibrate_delay_is_known(void)976 unsigned long calibrate_delay_is_known(void)
977 {
978 	if (time_travel_mode == TT_MODE_INFCPU ||
979 	    time_travel_mode == TT_MODE_EXTERNAL)
980 		return 1;
981 	return 0;
982 }
983 
setup_time_travel(char * str)984 static int setup_time_travel(char *str)
985 {
986 	if (strcmp(str, "=inf-cpu") == 0) {
987 		time_travel_mode = TT_MODE_INFCPU;
988 		_timer_clockevent.name = "time-travel-timer-infcpu";
989 		timer_clocksource.name = "time-travel-clock";
990 		return 1;
991 	}
992 
993 	if (strncmp(str, "=ext:", 5) == 0) {
994 		time_travel_mode = TT_MODE_EXTERNAL;
995 		_timer_clockevent.name = "time-travel-timer-external";
996 		timer_clocksource.name = "time-travel-clock-external";
997 		return time_travel_connect_external(str + 5);
998 	}
999 
1000 	if (!*str) {
1001 		time_travel_mode = TT_MODE_BASIC;
1002 		_timer_clockevent.name = "time-travel-timer";
1003 		timer_clocksource.name = "time-travel-clock";
1004 		return 1;
1005 	}
1006 
1007 	return -EINVAL;
1008 }
1009 
1010 __setup("time-travel", setup_time_travel);
1011 __uml_help(setup_time_travel,
1012 "time-travel\n"
1013 "    This option just enables basic time travel mode, in which the clock/timers\n"
1014 "    inside the UML instance skip forward when there's nothing to do, rather than\n"
1015 "    waiting for real time to elapse. However, instance CPU speed is limited by\n"
1016 "    the real CPU speed, so e.g. a 10ms timer will always fire after ~10ms wall\n"
1017 "    clock (but quicker when there's nothing to do).\n"
1018 "\n"
1019 "time-travel=inf-cpu\n"
1020 "    This enables time travel mode with infinite processing power, in which there\n"
1021 "    are no wall clock timers, and any CPU processing happens - as seen from the\n"
1022 "    guest - instantly. This can be useful for accurate simulation regardless of\n"
1023 "    debug overhead, physical CPU speed, etc. but is somewhat dangerous as it can\n"
1024 "    easily lead to getting stuck (e.g. if anything in the system busy loops).\n"
1025 "\n"
1026 "time-travel=ext:[ID:]/path/to/socket\n"
1027 "    This enables time travel mode similar to =inf-cpu, except the system will\n"
1028 "    use the given socket to coordinate with a central scheduler, in order to\n"
1029 "    have more than one system simultaneously be on simulated time. The virtio\n"
1030 "    driver code in UML knows about this so you can also simulate networks and\n"
1031 "    devices using it, assuming the device has the right capabilities.\n"
1032 "    The optional ID is a 64-bit integer that's sent to the central scheduler.\n\n");
1033 
setup_time_travel_start(char * str)1034 static int setup_time_travel_start(char *str)
1035 {
1036 	int err;
1037 
1038 	err = kstrtoull(str, 0, &time_travel_start);
1039 	if (err)
1040 		return err;
1041 
1042 	time_travel_start_set = 1;
1043 	return 1;
1044 }
1045 
1046 __setup("time-travel-start=", setup_time_travel_start);
1047 __uml_help(setup_time_travel_start,
1048 "time-travel-start=<nanoseconds>\n"
1049 "    Configure the UML instance's wall clock to start at this value rather than\n"
1050 "    the host's wall clock at the time of UML boot.\n\n");
1051 
1052 static struct kobject *bc_time_kobject;
1053 
bc_show(struct kobject * kobj,struct kobj_attribute * attr,char * buf)1054 static ssize_t bc_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
1055 {
1056 	return sprintf(buf, "0x%llx", bc_message);
1057 }
1058 
bc_store(struct kobject * kobj,struct kobj_attribute * attr,const char * buf,size_t count)1059 static ssize_t bc_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count)
1060 {
1061 	int ret;
1062 	u64 user_bc_message;
1063 
1064 	ret = kstrtou64(buf, 0, &user_bc_message);
1065 	if (ret)
1066 		return ret;
1067 
1068 	bc_message = user_bc_message;
1069 
1070 	time_travel_ext_req(UM_TIMETRAVEL_BROADCAST, bc_message);
1071 	pr_info("um: time: sent broadcast message: 0x%llx\n", bc_message);
1072 	return count;
1073 }
1074 
1075 static struct kobj_attribute bc_attribute = __ATTR(bc-message, 0660, bc_show, bc_store);
1076 
um_bc_start(void)1077 static int __init um_bc_start(void)
1078 {
1079 	if (time_travel_mode != TT_MODE_EXTERNAL)
1080 		return 0;
1081 
1082 	bc_time_kobject = kobject_create_and_add("um-ext-time", kernel_kobj);
1083 	if (!bc_time_kobject)
1084 		return 0;
1085 
1086 	if (sysfs_create_file(bc_time_kobject, &bc_attribute.attr))
1087 		pr_debug("failed to create the bc file in /sys/kernel/um_time");
1088 
1089 	return 0;
1090 }
1091 late_initcall(um_bc_start);
1092 #endif
1093