1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright (C) 2015 Anton Ivanov (aivanov@{brocade.com,kot-begemot.co.uk})
4 * Copyright (C) 2015 Thomas Meyer (thomas@m3y3r.de)
5 * Copyright (C) 2012-2014 Cisco Systems
6 * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
7 * Copyright (C) 2019 Intel Corporation
8 */
9
10 #include <linux/clockchips.h>
11 #include <linux/init.h>
12 #include <linux/interrupt.h>
13 #include <linux/jiffies.h>
14 #include <linux/mm.h>
15 #include <linux/sched.h>
16 #include <linux/spinlock.h>
17 #include <linux/threads.h>
18 #include <asm/irq.h>
19 #include <asm/param.h>
20 #include <kern_util.h>
21 #include <os.h>
22 #include <linux/delay.h>
23 #include <linux/time-internal.h>
24 #include <linux/um_timetravel.h>
25 #include <shared/init.h>
26
27 #ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
28 #include <linux/sched/clock.h>
29
30 enum time_travel_mode time_travel_mode;
31 EXPORT_SYMBOL_GPL(time_travel_mode);
32
33 static bool time_travel_start_set;
34 static unsigned long long time_travel_start;
35 static unsigned long long time_travel_time;
36 static unsigned long long time_travel_shm_offset;
37 static LIST_HEAD(time_travel_events);
38 static LIST_HEAD(time_travel_irqs);
39 static unsigned long long time_travel_timer_interval;
40 static unsigned long long time_travel_next_event;
41 static struct time_travel_event time_travel_timer_event;
42 static int time_travel_ext_fd = -1;
43 static unsigned int time_travel_ext_waiting;
44 static bool time_travel_ext_prev_request_valid;
45 static unsigned long long time_travel_ext_prev_request;
46 static unsigned long long *time_travel_ext_free_until;
47 static unsigned long long _time_travel_ext_free_until;
48 static u16 time_travel_shm_id;
49 static struct um_timetravel_schedshm *time_travel_shm;
50 static union um_timetravel_schedshm_client *time_travel_shm_client;
51
52 unsigned long tt_extra_sched_jiffies;
53
sched_clock(void)54 notrace unsigned long long sched_clock(void)
55 {
56 return (unsigned long long)(jiffies - INITIAL_JIFFIES +
57 tt_extra_sched_jiffies)
58 * (NSEC_PER_SEC / HZ);
59 }
60
time_travel_set_time(unsigned long long ns)61 static void time_travel_set_time(unsigned long long ns)
62 {
63 if (unlikely(ns < time_travel_time))
64 panic("time-travel: time goes backwards %lld -> %lld\n",
65 time_travel_time, ns);
66 else if (unlikely(ns >= S64_MAX))
67 panic("The system was going to sleep forever, aborting");
68
69 time_travel_time = ns;
70 }
71
72 enum time_travel_message_handling {
73 TTMH_IDLE,
74 TTMH_POLL,
75 TTMH_READ,
76 TTMH_READ_START_ACK,
77 };
78
79 static u64 bc_message;
80 int time_travel_should_print_bc_msg;
81
_time_travel_print_bc_msg(void)82 void _time_travel_print_bc_msg(void)
83 {
84 time_travel_should_print_bc_msg = 0;
85 printk(KERN_INFO "time-travel: received broadcast 0x%llx\n", bc_message);
86 }
87
time_travel_setup_shm(int fd,u16 id)88 static void time_travel_setup_shm(int fd, u16 id)
89 {
90 u32 len;
91
92 time_travel_shm = os_mmap_rw_shared(fd, sizeof(*time_travel_shm));
93
94 if (!time_travel_shm)
95 goto out;
96
97 len = time_travel_shm->len;
98
99 if (time_travel_shm->version != UM_TIMETRAVEL_SCHEDSHM_VERSION ||
100 len < struct_size(time_travel_shm, clients, id + 1)) {
101 os_unmap_memory(time_travel_shm, sizeof(*time_travel_shm));
102 time_travel_shm = NULL;
103 goto out;
104 }
105
106 time_travel_shm = os_mremap_rw_shared(time_travel_shm,
107 sizeof(*time_travel_shm),
108 len);
109 if (!time_travel_shm)
110 goto out;
111
112 time_travel_shm_offset = time_travel_shm->current_time;
113 time_travel_shm_client = &time_travel_shm->clients[id];
114 time_travel_shm_client->capa |= UM_TIMETRAVEL_SCHEDSHM_CAP_TIME_SHARE;
115 time_travel_shm_id = id;
116 /* always look at that free_until from now on */
117 time_travel_ext_free_until = &time_travel_shm->free_until;
118 out:
119 os_close_file(fd);
120 }
121
time_travel_handle_message(struct um_timetravel_msg * msg,enum time_travel_message_handling mode)122 static void time_travel_handle_message(struct um_timetravel_msg *msg,
123 enum time_travel_message_handling mode)
124 {
125 struct um_timetravel_msg resp = {
126 .op = UM_TIMETRAVEL_ACK,
127 };
128 int ret;
129
130 /*
131 * We can't unlock here, but interrupt signals with a timetravel_handler
132 * (see um_request_irq_tt) get to the timetravel_handler anyway.
133 */
134 if (mode != TTMH_READ) {
135 BUG_ON(mode == TTMH_IDLE && !irqs_disabled());
136
137 while (os_poll(1, &time_travel_ext_fd) != 0) {
138 /* nothing */
139 }
140 }
141
142 if (unlikely(mode == TTMH_READ_START_ACK)) {
143 int fd[UM_TIMETRAVEL_SHARED_MAX_FDS];
144
145 ret = os_rcv_fd_msg(time_travel_ext_fd, fd,
146 ARRAY_SIZE(fd), msg, sizeof(*msg));
147 if (ret == sizeof(*msg)) {
148 time_travel_setup_shm(fd[UM_TIMETRAVEL_SHARED_MEMFD],
149 msg->time & UM_TIMETRAVEL_START_ACK_ID);
150 /* we don't use the logging for now */
151 os_close_file(fd[UM_TIMETRAVEL_SHARED_LOGFD]);
152 }
153 } else {
154 ret = os_read_file(time_travel_ext_fd, msg, sizeof(*msg));
155 }
156
157 if (ret == 0)
158 panic("time-travel external link is broken\n");
159 if (ret != sizeof(*msg))
160 panic("invalid time-travel message - %d bytes\n", ret);
161
162 switch (msg->op) {
163 default:
164 WARN_ONCE(1, "time-travel: unexpected message %lld\n",
165 (unsigned long long)msg->op);
166 break;
167 case UM_TIMETRAVEL_ACK:
168 return;
169 case UM_TIMETRAVEL_RUN:
170 time_travel_set_time(msg->time);
171 if (time_travel_shm) {
172 /* no request right now since we're running */
173 time_travel_shm_client->flags &=
174 ~UM_TIMETRAVEL_SCHEDSHM_FLAGS_REQ_RUN;
175 /* no ack for shared memory RUN */
176 return;
177 }
178 break;
179 case UM_TIMETRAVEL_FREE_UNTIL:
180 /* not supposed to get this with shm, but ignore it */
181 if (time_travel_shm)
182 break;
183 time_travel_ext_free_until = &_time_travel_ext_free_until;
184 _time_travel_ext_free_until = msg->time;
185 break;
186 case UM_TIMETRAVEL_BROADCAST:
187 bc_message = msg->time;
188 time_travel_should_print_bc_msg = 1;
189 break;
190 }
191
192 resp.seq = msg->seq;
193 os_write_file(time_travel_ext_fd, &resp, sizeof(resp));
194 }
195
time_travel_ext_req(u32 op,u64 time)196 static u64 time_travel_ext_req(u32 op, u64 time)
197 {
198 static int seq;
199 int mseq = ++seq;
200 struct um_timetravel_msg msg = {
201 .op = op,
202 .time = time,
203 .seq = mseq,
204 };
205
206 /*
207 * We need to block even the timetravel handlers of SIGIO here and
208 * only restore their use when we got the ACK - otherwise we may
209 * (will) get interrupted by that, try to queue the IRQ for future
210 * processing and thus send another request while we're still waiting
211 * for an ACK, but the peer doesn't know we got interrupted and will
212 * send the ACKs in the same order as the message, but we'd need to
213 * see them in the opposite order ...
214 *
215 * This wouldn't matter *too* much, but some ACKs carry the
216 * current time (for UM_TIMETRAVEL_GET) and getting another
217 * ACK without a time would confuse us a lot!
218 *
219 * The sequence number assignment that happens here lets us
220 * debug such message handling issues more easily.
221 */
222 block_signals_hard();
223 os_write_file(time_travel_ext_fd, &msg, sizeof(msg));
224
225 /* no ACK expected for WAIT in shared memory mode */
226 if (msg.op == UM_TIMETRAVEL_WAIT && time_travel_shm)
227 goto done;
228
229 while (msg.op != UM_TIMETRAVEL_ACK)
230 time_travel_handle_message(&msg,
231 op == UM_TIMETRAVEL_START ?
232 TTMH_READ_START_ACK :
233 TTMH_READ);
234
235 if (msg.seq != mseq)
236 panic("time-travel: ACK message has different seqno! op=%d, seq=%d != %d time=%lld\n",
237 msg.op, msg.seq, mseq, msg.time);
238
239 if (op == UM_TIMETRAVEL_GET)
240 time_travel_set_time(msg.time);
241 done:
242 unblock_signals_hard();
243
244 return msg.time;
245 }
246
__time_travel_wait_readable(int fd)247 void __time_travel_wait_readable(int fd)
248 {
249 int fds[2] = { fd, time_travel_ext_fd };
250 int ret;
251
252 if (time_travel_mode != TT_MODE_EXTERNAL)
253 return;
254
255 while ((ret = os_poll(2, fds))) {
256 struct um_timetravel_msg msg;
257
258 if (ret == 1)
259 time_travel_handle_message(&msg, TTMH_READ);
260 }
261 }
262 EXPORT_SYMBOL_GPL(__time_travel_wait_readable);
263
time_travel_ext_update_request(unsigned long long time)264 static void time_travel_ext_update_request(unsigned long long time)
265 {
266 if (time_travel_mode != TT_MODE_EXTERNAL)
267 return;
268
269 /* asked for exactly this time previously */
270 if (time_travel_ext_prev_request_valid &&
271 time == time_travel_ext_prev_request)
272 return;
273
274 /*
275 * if we're running and are allowed to run past the request
276 * then we don't need to update it either
277 *
278 * Note for shm we ignore FREE_UNTIL messages and leave the pointer
279 * to shared memory, and for non-shm the offset is 0.
280 */
281 if (!time_travel_ext_waiting && time_travel_ext_free_until &&
282 time < (*time_travel_ext_free_until - time_travel_shm_offset))
283 return;
284
285 time_travel_ext_prev_request = time;
286 time_travel_ext_prev_request_valid = true;
287
288 if (time_travel_shm) {
289 union um_timetravel_schedshm_client *running;
290
291 running = &time_travel_shm->clients[time_travel_shm->running_id];
292
293 if (running->capa & UM_TIMETRAVEL_SCHEDSHM_CAP_TIME_SHARE) {
294 time_travel_shm_client->flags |=
295 UM_TIMETRAVEL_SCHEDSHM_FLAGS_REQ_RUN;
296 time += time_travel_shm_offset;
297 time_travel_shm_client->req_time = time;
298 if (time < time_travel_shm->free_until)
299 time_travel_shm->free_until = time;
300 return;
301 }
302 }
303
304 time_travel_ext_req(UM_TIMETRAVEL_REQUEST, time);
305 }
306
__time_travel_propagate_time(void)307 void __time_travel_propagate_time(void)
308 {
309 static unsigned long long last_propagated;
310
311 if (time_travel_shm) {
312 if (time_travel_shm->running_id != time_travel_shm_id)
313 panic("time-travel: setting time while not running\n");
314 time_travel_shm->current_time = time_travel_time +
315 time_travel_shm_offset;
316 return;
317 }
318
319 if (last_propagated == time_travel_time)
320 return;
321
322 time_travel_ext_req(UM_TIMETRAVEL_UPDATE, time_travel_time);
323 last_propagated = time_travel_time;
324 }
325 EXPORT_SYMBOL_GPL(__time_travel_propagate_time);
326
327 /* returns true if we must do a wait to the simtime device */
time_travel_ext_request(unsigned long long time)328 static bool time_travel_ext_request(unsigned long long time)
329 {
330 /*
331 * If we received an external sync point ("free until") then we
332 * don't have to request/wait for anything until then, unless
333 * we're already waiting.
334 *
335 * Note for shm we ignore FREE_UNTIL messages and leave the pointer
336 * to shared memory, and for non-shm the offset is 0.
337 */
338 if (!time_travel_ext_waiting && time_travel_ext_free_until &&
339 time < (*time_travel_ext_free_until - time_travel_shm_offset))
340 return false;
341
342 time_travel_ext_update_request(time);
343 return true;
344 }
345
time_travel_ext_wait(bool idle)346 static void time_travel_ext_wait(bool idle)
347 {
348 struct um_timetravel_msg msg = {
349 .op = UM_TIMETRAVEL_ACK,
350 };
351
352 time_travel_ext_prev_request_valid = false;
353 if (!time_travel_shm)
354 time_travel_ext_free_until = NULL;
355 time_travel_ext_waiting++;
356
357 time_travel_ext_req(UM_TIMETRAVEL_WAIT, -1);
358
359 /*
360 * Here we are deep in the idle loop, so we have to break out of the
361 * kernel abstraction in a sense and implement this in terms of the
362 * UML system waiting on the VQ interrupt while sleeping, when we get
363 * the signal it'll call time_travel_ext_vq_notify_done() completing the
364 * call.
365 */
366 while (msg.op != UM_TIMETRAVEL_RUN)
367 time_travel_handle_message(&msg, idle ? TTMH_IDLE : TTMH_POLL);
368
369 time_travel_ext_waiting--;
370
371 /* we might request more stuff while polling - reset when we run */
372 time_travel_ext_prev_request_valid = false;
373 }
374
time_travel_ext_get_time(void)375 static void time_travel_ext_get_time(void)
376 {
377 if (time_travel_shm)
378 time_travel_set_time(time_travel_shm->current_time -
379 time_travel_shm_offset);
380 else
381 time_travel_ext_req(UM_TIMETRAVEL_GET, -1);
382 }
383
__time_travel_update_time(unsigned long long ns,bool idle)384 static void __time_travel_update_time(unsigned long long ns, bool idle)
385 {
386 if (time_travel_mode == TT_MODE_EXTERNAL && time_travel_ext_request(ns))
387 time_travel_ext_wait(idle);
388 else
389 time_travel_set_time(ns);
390 }
391
time_travel_first_event(void)392 static struct time_travel_event *time_travel_first_event(void)
393 {
394 return list_first_entry_or_null(&time_travel_events,
395 struct time_travel_event,
396 list);
397 }
398
__time_travel_add_event(struct time_travel_event * e,unsigned long long time)399 static void __time_travel_add_event(struct time_travel_event *e,
400 unsigned long long time)
401 {
402 struct time_travel_event *tmp;
403 bool inserted = false;
404 unsigned long flags;
405
406 if (e->pending)
407 return;
408
409 e->pending = true;
410 e->time = time;
411
412 local_irq_save(flags);
413 list_for_each_entry(tmp, &time_travel_events, list) {
414 /*
415 * Add the new entry before one with higher time,
416 * or if they're equal and both on stack, because
417 * in that case we need to unwind the stack in the
418 * right order, and the later event (timer sleep
419 * or such) must be dequeued first.
420 */
421 if ((tmp->time > e->time) ||
422 (tmp->time == e->time && tmp->onstack && e->onstack)) {
423 list_add_tail(&e->list, &tmp->list);
424 inserted = true;
425 break;
426 }
427 }
428
429 if (!inserted)
430 list_add_tail(&e->list, &time_travel_events);
431
432 tmp = time_travel_first_event();
433 time_travel_ext_update_request(tmp->time);
434 time_travel_next_event = tmp->time;
435 local_irq_restore(flags);
436 }
437
time_travel_add_event(struct time_travel_event * e,unsigned long long time)438 static void time_travel_add_event(struct time_travel_event *e,
439 unsigned long long time)
440 {
441 if (WARN_ON(!e->fn))
442 return;
443
444 __time_travel_add_event(e, time);
445 }
446
time_travel_add_event_rel(struct time_travel_event * e,unsigned long long delay_ns)447 void time_travel_add_event_rel(struct time_travel_event *e,
448 unsigned long long delay_ns)
449 {
450 time_travel_add_event(e, time_travel_time + delay_ns);
451 }
452
time_travel_periodic_timer(struct time_travel_event * e)453 static void time_travel_periodic_timer(struct time_travel_event *e)
454 {
455 time_travel_add_event(&time_travel_timer_event,
456 time_travel_time + time_travel_timer_interval);
457
458 /* clock tick; decrease extra jiffies by keeping sched_clock constant */
459 if (tt_extra_sched_jiffies > 0)
460 tt_extra_sched_jiffies -= 1;
461
462 deliver_alarm();
463 }
464
deliver_time_travel_irqs(void)465 void deliver_time_travel_irqs(void)
466 {
467 struct time_travel_event *e;
468 unsigned long flags;
469
470 /*
471 * Don't do anything for most cases. Note that because here we have
472 * to disable IRQs (and re-enable later) we'll actually recurse at
473 * the end of the function, so this is strictly necessary.
474 */
475 if (likely(list_empty(&time_travel_irqs)))
476 return;
477
478 local_irq_save(flags);
479 irq_enter();
480 while ((e = list_first_entry_or_null(&time_travel_irqs,
481 struct time_travel_event,
482 list))) {
483 list_del(&e->list);
484 e->pending = false;
485 e->fn(e);
486 }
487 irq_exit();
488 local_irq_restore(flags);
489 }
490
time_travel_deliver_event(struct time_travel_event * e)491 static void time_travel_deliver_event(struct time_travel_event *e)
492 {
493 if (e == &time_travel_timer_event) {
494 /*
495 * deliver_alarm() does the irq_enter/irq_exit
496 * by itself, so must handle it specially here
497 */
498 e->fn(e);
499 } else if (irqs_disabled()) {
500 list_add_tail(&e->list, &time_travel_irqs);
501 /*
502 * set pending again, it was set to false when the
503 * event was deleted from the original list, but
504 * now it's still pending until we deliver the IRQ.
505 */
506 e->pending = true;
507 } else {
508 unsigned long flags;
509
510 local_irq_save(flags);
511 irq_enter();
512 e->fn(e);
513 irq_exit();
514 local_irq_restore(flags);
515 }
516 }
517
time_travel_del_event(struct time_travel_event * e)518 bool time_travel_del_event(struct time_travel_event *e)
519 {
520 unsigned long flags;
521
522 if (!e->pending)
523 return false;
524 local_irq_save(flags);
525 list_del(&e->list);
526 e->pending = false;
527 local_irq_restore(flags);
528 return true;
529 }
530
time_travel_update_time(unsigned long long next,bool idle)531 static void time_travel_update_time(unsigned long long next, bool idle)
532 {
533 struct time_travel_event ne = {
534 .onstack = true,
535 };
536 struct time_travel_event *e;
537 bool finished = idle;
538
539 /* add it without a handler - we deal with that specifically below */
540 __time_travel_add_event(&ne, next);
541
542 do {
543 e = time_travel_first_event();
544
545 BUG_ON(!e);
546 __time_travel_update_time(e->time, idle);
547
548 /* new events may have been inserted while we were waiting */
549 if (e == time_travel_first_event()) {
550 BUG_ON(!time_travel_del_event(e));
551 BUG_ON(time_travel_time != e->time);
552
553 if (e == &ne) {
554 finished = true;
555 } else {
556 if (e->onstack)
557 panic("On-stack event dequeued outside of the stack! time=%lld, event time=%lld, event=%pS\n",
558 time_travel_time, e->time, e);
559 time_travel_deliver_event(e);
560 }
561 }
562
563 e = time_travel_first_event();
564 if (e)
565 time_travel_ext_update_request(e->time);
566 } while (ne.pending && !finished);
567
568 time_travel_del_event(&ne);
569 }
570
time_travel_update_time_rel(unsigned long long offs)571 static void time_travel_update_time_rel(unsigned long long offs)
572 {
573 unsigned long flags;
574
575 /*
576 * Disable interrupts before calculating the new time so
577 * that a real timer interrupt (signal) can't happen at
578 * a bad time e.g. after we read time_travel_time but
579 * before we've completed updating the time.
580 */
581 local_irq_save(flags);
582 time_travel_update_time(time_travel_time + offs, false);
583 local_irq_restore(flags);
584 }
585
time_travel_ndelay(unsigned long nsec)586 void time_travel_ndelay(unsigned long nsec)
587 {
588 /*
589 * Not strictly needed to use _rel() version since this is
590 * only used in INFCPU/EXT modes, but it doesn't hurt and
591 * is more readable too.
592 */
593 time_travel_update_time_rel(nsec);
594 }
595 EXPORT_SYMBOL(time_travel_ndelay);
596
time_travel_add_irq_event(struct time_travel_event * e)597 void time_travel_add_irq_event(struct time_travel_event *e)
598 {
599 BUG_ON(time_travel_mode != TT_MODE_EXTERNAL);
600
601 time_travel_ext_get_time();
602 /*
603 * We could model interrupt latency here, for now just
604 * don't have any latency at all and request the exact
605 * same time (again) to run the interrupt...
606 */
607 time_travel_add_event(e, time_travel_time);
608 }
609 EXPORT_SYMBOL_GPL(time_travel_add_irq_event);
610
time_travel_oneshot_timer(struct time_travel_event * e)611 static void time_travel_oneshot_timer(struct time_travel_event *e)
612 {
613 /* clock tick; decrease extra jiffies by keeping sched_clock constant */
614 if (tt_extra_sched_jiffies > 0)
615 tt_extra_sched_jiffies -= 1;
616
617 deliver_alarm();
618 }
619
time_travel_sleep(void)620 void time_travel_sleep(void)
621 {
622 /*
623 * Wait "forever" (using S64_MAX because there are some potential
624 * wrapping issues, especially with the current TT_MODE_EXTERNAL
625 * controller application.
626 */
627 unsigned long long next = S64_MAX;
628
629 if (time_travel_mode == TT_MODE_BASIC)
630 os_timer_disable();
631
632 time_travel_update_time(next, true);
633
634 if (time_travel_mode == TT_MODE_BASIC &&
635 time_travel_timer_event.pending) {
636 if (time_travel_timer_event.fn == time_travel_periodic_timer) {
637 /*
638 * This is somewhat wrong - we should get the first
639 * one sooner like the os_timer_one_shot() below...
640 */
641 os_timer_set_interval(time_travel_timer_interval);
642 } else {
643 os_timer_one_shot(time_travel_timer_event.time - next);
644 }
645 }
646 }
647
time_travel_handle_real_alarm(void)648 static void time_travel_handle_real_alarm(void)
649 {
650 time_travel_set_time(time_travel_next_event);
651
652 time_travel_del_event(&time_travel_timer_event);
653
654 if (time_travel_timer_event.fn == time_travel_periodic_timer)
655 time_travel_add_event(&time_travel_timer_event,
656 time_travel_time +
657 time_travel_timer_interval);
658 }
659
time_travel_set_interval(unsigned long long interval)660 static void time_travel_set_interval(unsigned long long interval)
661 {
662 time_travel_timer_interval = interval;
663 }
664
time_travel_connect_external(const char * socket)665 static int time_travel_connect_external(const char *socket)
666 {
667 const char *sep;
668 unsigned long long id = (unsigned long long)-1;
669 int rc;
670
671 if ((sep = strchr(socket, ':'))) {
672 char buf[25] = {};
673 if (sep - socket > sizeof(buf) - 1)
674 goto invalid_number;
675
676 memcpy(buf, socket, sep - socket);
677 if (kstrtoull(buf, 0, &id)) {
678 invalid_number:
679 panic("time-travel: invalid external ID in string '%s'\n",
680 socket);
681 return -EINVAL;
682 }
683
684 socket = sep + 1;
685 }
686
687 rc = os_connect_socket(socket);
688 if (rc < 0) {
689 panic("time-travel: failed to connect to external socket %s\n",
690 socket);
691 return rc;
692 }
693
694 time_travel_ext_fd = rc;
695
696 time_travel_ext_req(UM_TIMETRAVEL_START, id);
697
698 return 1;
699 }
700
time_travel_set_start(void)701 static void time_travel_set_start(void)
702 {
703 if (time_travel_start_set)
704 return;
705
706 switch (time_travel_mode) {
707 case TT_MODE_EXTERNAL:
708 time_travel_start = time_travel_ext_req(UM_TIMETRAVEL_GET_TOD, -1);
709 /* controller gave us the *current* time, so adjust by that */
710 time_travel_ext_get_time();
711 time_travel_start -= time_travel_time;
712 break;
713 case TT_MODE_INFCPU:
714 case TT_MODE_BASIC:
715 if (!time_travel_start_set)
716 time_travel_start = os_persistent_clock_emulation();
717 break;
718 case TT_MODE_OFF:
719 /* we just read the host clock with os_persistent_clock_emulation() */
720 break;
721 }
722
723 time_travel_start_set = true;
724 }
725 #else /* CONFIG_UML_TIME_TRAVEL_SUPPORT */
726 #define time_travel_start_set 0
727 #define time_travel_start 0
728 #define time_travel_time 0
729 #define time_travel_ext_waiting 0
730
time_travel_update_time(unsigned long long ns,bool idle)731 static inline void time_travel_update_time(unsigned long long ns, bool idle)
732 {
733 }
734
time_travel_update_time_rel(unsigned long long offs)735 static inline void time_travel_update_time_rel(unsigned long long offs)
736 {
737 }
738
time_travel_handle_real_alarm(void)739 static inline void time_travel_handle_real_alarm(void)
740 {
741 }
742
time_travel_set_interval(unsigned long long interval)743 static void time_travel_set_interval(unsigned long long interval)
744 {
745 }
746
time_travel_set_start(void)747 static inline void time_travel_set_start(void)
748 {
749 }
750
751 /* fail link if this actually gets used */
752 extern u64 time_travel_ext_req(u32 op, u64 time);
753
754 /* these are empty macros so the struct/fn need not exist */
755 #define time_travel_add_event(e, time) do { } while (0)
756 /* externally not usable - redefine here so we can */
757 #undef time_travel_del_event
758 #define time_travel_del_event(e) do { } while (0)
759 #endif
760
timer_handler(int sig,struct siginfo * unused_si,struct uml_pt_regs * regs)761 void timer_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
762 {
763 unsigned long flags;
764
765 /*
766 * In basic time-travel mode we still get real interrupts
767 * (signals) but since we don't read time from the OS, we
768 * must update the simulated time here to the expiry when
769 * we get a signal.
770 * This is not the case in inf-cpu mode, since there we
771 * never get any real signals from the OS.
772 */
773 if (time_travel_mode == TT_MODE_BASIC)
774 time_travel_handle_real_alarm();
775
776 local_irq_save(flags);
777 do_IRQ(TIMER_IRQ, regs);
778 local_irq_restore(flags);
779 }
780
itimer_shutdown(struct clock_event_device * evt)781 static int itimer_shutdown(struct clock_event_device *evt)
782 {
783 if (time_travel_mode != TT_MODE_OFF)
784 time_travel_del_event(&time_travel_timer_event);
785
786 if (time_travel_mode != TT_MODE_INFCPU &&
787 time_travel_mode != TT_MODE_EXTERNAL)
788 os_timer_disable();
789
790 return 0;
791 }
792
itimer_set_periodic(struct clock_event_device * evt)793 static int itimer_set_periodic(struct clock_event_device *evt)
794 {
795 unsigned long long interval = NSEC_PER_SEC / HZ;
796
797 if (time_travel_mode != TT_MODE_OFF) {
798 time_travel_del_event(&time_travel_timer_event);
799 time_travel_set_event_fn(&time_travel_timer_event,
800 time_travel_periodic_timer);
801 time_travel_set_interval(interval);
802 time_travel_add_event(&time_travel_timer_event,
803 time_travel_time + interval);
804 }
805
806 if (time_travel_mode != TT_MODE_INFCPU &&
807 time_travel_mode != TT_MODE_EXTERNAL)
808 os_timer_set_interval(interval);
809
810 return 0;
811 }
812
itimer_next_event(unsigned long delta,struct clock_event_device * evt)813 static int itimer_next_event(unsigned long delta,
814 struct clock_event_device *evt)
815 {
816 delta += 1;
817
818 if (time_travel_mode != TT_MODE_OFF) {
819 time_travel_del_event(&time_travel_timer_event);
820 time_travel_set_event_fn(&time_travel_timer_event,
821 time_travel_oneshot_timer);
822 time_travel_add_event(&time_travel_timer_event,
823 time_travel_time + delta);
824 }
825
826 if (time_travel_mode != TT_MODE_INFCPU &&
827 time_travel_mode != TT_MODE_EXTERNAL)
828 return os_timer_one_shot(delta);
829
830 return 0;
831 }
832
itimer_one_shot(struct clock_event_device * evt)833 static int itimer_one_shot(struct clock_event_device *evt)
834 {
835 return itimer_next_event(0, evt);
836 }
837
838 static struct clock_event_device timer_clockevent = {
839 .name = "posix-timer",
840 .rating = 250,
841 .cpumask = cpu_possible_mask,
842 .features = CLOCK_EVT_FEAT_PERIODIC |
843 CLOCK_EVT_FEAT_ONESHOT,
844 .set_state_shutdown = itimer_shutdown,
845 .set_state_periodic = itimer_set_periodic,
846 .set_state_oneshot = itimer_one_shot,
847 .set_next_event = itimer_next_event,
848 .shift = 0,
849 .max_delta_ns = 0xffffffff,
850 .max_delta_ticks = 0xffffffff,
851 .min_delta_ns = TIMER_MIN_DELTA,
852 .min_delta_ticks = TIMER_MIN_DELTA, // microsecond resolution should be enough for anyone, same as 640K RAM
853 .irq = 0,
854 .mult = 1,
855 };
856
um_timer(int irq,void * dev)857 static irqreturn_t um_timer(int irq, void *dev)
858 {
859 /*
860 * Interrupt the (possibly) running userspace process, technically this
861 * should only happen if userspace is currently executing.
862 * With infinite CPU time-travel, we can only get here when userspace
863 * is not executing. Do not notify there and avoid spurious scheduling.
864 */
865 if (time_travel_mode != TT_MODE_INFCPU &&
866 time_travel_mode != TT_MODE_EXTERNAL &&
867 get_current()->mm)
868 os_alarm_process(get_current()->mm->context.id.pid);
869
870 (*timer_clockevent.event_handler)(&timer_clockevent);
871
872 return IRQ_HANDLED;
873 }
874
timer_read(struct clocksource * cs)875 static u64 timer_read(struct clocksource *cs)
876 {
877 if (time_travel_mode != TT_MODE_OFF) {
878 /*
879 * We make reading the timer cost a bit so that we don't get
880 * stuck in loops that expect time to move more than the
881 * exact requested sleep amount, e.g. python's socket server,
882 * see https://bugs.python.org/issue37026.
883 *
884 * However, don't do that when we're in interrupt or such as
885 * then we might recurse into our own processing, and get to
886 * even more waiting, and that's not good - it messes up the
887 * "what do I do next" and onstack event we use to know when
888 * to return from time_travel_update_time().
889 */
890 if (!irqs_disabled() && !in_interrupt() && !in_softirq() &&
891 !time_travel_ext_waiting)
892 time_travel_update_time_rel(TIMER_MULTIPLIER);
893 return time_travel_time / TIMER_MULTIPLIER;
894 }
895
896 return os_nsecs() / TIMER_MULTIPLIER;
897 }
898
899 static struct clocksource timer_clocksource = {
900 .name = "timer",
901 .rating = 300,
902 .read = timer_read,
903 .mask = CLOCKSOURCE_MASK(64),
904 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
905 };
906
um_timer_setup(void)907 static void __init um_timer_setup(void)
908 {
909 int err;
910
911 err = request_irq(TIMER_IRQ, um_timer, IRQF_TIMER, "hr timer", NULL);
912 if (err != 0)
913 printk(KERN_ERR "register_timer : request_irq failed - "
914 "errno = %d\n", -err);
915
916 err = os_timer_create();
917 if (err != 0) {
918 printk(KERN_ERR "creation of timer failed - errno = %d\n", -err);
919 return;
920 }
921
922 err = clocksource_register_hz(&timer_clocksource, NSEC_PER_SEC/TIMER_MULTIPLIER);
923 if (err) {
924 printk(KERN_ERR "clocksource_register_hz returned %d\n", err);
925 return;
926 }
927 clockevents_register_device(&timer_clockevent);
928 }
929
read_persistent_clock64(struct timespec64 * ts)930 void read_persistent_clock64(struct timespec64 *ts)
931 {
932 long long nsecs;
933
934 time_travel_set_start();
935
936 if (time_travel_mode != TT_MODE_OFF)
937 nsecs = time_travel_start + time_travel_time;
938 else
939 nsecs = os_persistent_clock_emulation();
940
941 set_normalized_timespec64(ts, nsecs / NSEC_PER_SEC,
942 nsecs % NSEC_PER_SEC);
943 }
944
time_init(void)945 void __init time_init(void)
946 {
947 timer_set_signal_handler();
948 late_time_init = um_timer_setup;
949 }
950
951 #ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
calibrate_delay_is_known(void)952 unsigned long calibrate_delay_is_known(void)
953 {
954 if (time_travel_mode == TT_MODE_INFCPU ||
955 time_travel_mode == TT_MODE_EXTERNAL)
956 return 1;
957 return 0;
958 }
959
setup_time_travel(char * str)960 static int setup_time_travel(char *str)
961 {
962 if (strcmp(str, "=inf-cpu") == 0) {
963 time_travel_mode = TT_MODE_INFCPU;
964 timer_clockevent.name = "time-travel-timer-infcpu";
965 timer_clocksource.name = "time-travel-clock";
966 return 1;
967 }
968
969 if (strncmp(str, "=ext:", 5) == 0) {
970 time_travel_mode = TT_MODE_EXTERNAL;
971 timer_clockevent.name = "time-travel-timer-external";
972 timer_clocksource.name = "time-travel-clock-external";
973 return time_travel_connect_external(str + 5);
974 }
975
976 if (!*str) {
977 time_travel_mode = TT_MODE_BASIC;
978 timer_clockevent.name = "time-travel-timer";
979 timer_clocksource.name = "time-travel-clock";
980 return 1;
981 }
982
983 return -EINVAL;
984 }
985
986 __setup("time-travel", setup_time_travel);
987 __uml_help(setup_time_travel,
988 "time-travel\n"
989 " This option just enables basic time travel mode, in which the clock/timers\n"
990 " inside the UML instance skip forward when there's nothing to do, rather than\n"
991 " waiting for real time to elapse. However, instance CPU speed is limited by\n"
992 " the real CPU speed, so e.g. a 10ms timer will always fire after ~10ms wall\n"
993 " clock (but quicker when there's nothing to do).\n"
994 "\n"
995 "time-travel=inf-cpu\n"
996 " This enables time travel mode with infinite processing power, in which there\n"
997 " are no wall clock timers, and any CPU processing happens - as seen from the\n"
998 " guest - instantly. This can be useful for accurate simulation regardless of\n"
999 " debug overhead, physical CPU speed, etc. but is somewhat dangerous as it can\n"
1000 " easily lead to getting stuck (e.g. if anything in the system busy loops).\n"
1001 "\n"
1002 "time-travel=ext:[ID:]/path/to/socket\n"
1003 " This enables time travel mode similar to =inf-cpu, except the system will\n"
1004 " use the given socket to coordinate with a central scheduler, in order to\n"
1005 " have more than one system simultaneously be on simulated time. The virtio\n"
1006 " driver code in UML knows about this so you can also simulate networks and\n"
1007 " devices using it, assuming the device has the right capabilities.\n"
1008 " The optional ID is a 64-bit integer that's sent to the central scheduler.\n\n");
1009
setup_time_travel_start(char * str)1010 static int setup_time_travel_start(char *str)
1011 {
1012 int err;
1013
1014 err = kstrtoull(str, 0, &time_travel_start);
1015 if (err)
1016 return err;
1017
1018 time_travel_start_set = 1;
1019 return 1;
1020 }
1021
1022 __setup("time-travel-start=", setup_time_travel_start);
1023 __uml_help(setup_time_travel_start,
1024 "time-travel-start=<nanoseconds>\n"
1025 " Configure the UML instance's wall clock to start at this value rather than\n"
1026 " the host's wall clock at the time of UML boot.\n\n");
1027
1028 static struct kobject *bc_time_kobject;
1029
bc_show(struct kobject * kobj,struct kobj_attribute * attr,char * buf)1030 static ssize_t bc_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
1031 {
1032 return sprintf(buf, "0x%llx", bc_message);
1033 }
1034
bc_store(struct kobject * kobj,struct kobj_attribute * attr,const char * buf,size_t count)1035 static ssize_t bc_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count)
1036 {
1037 int ret;
1038 u64 user_bc_message;
1039
1040 ret = kstrtou64(buf, 0, &user_bc_message);
1041 if (ret)
1042 return ret;
1043
1044 bc_message = user_bc_message;
1045
1046 time_travel_ext_req(UM_TIMETRAVEL_BROADCAST, bc_message);
1047 pr_info("um: time: sent broadcast message: 0x%llx\n", bc_message);
1048 return count;
1049 }
1050
1051 static struct kobj_attribute bc_attribute = __ATTR(bc-message, 0660, bc_show, bc_store);
1052
um_bc_start(void)1053 static int __init um_bc_start(void)
1054 {
1055 if (time_travel_mode != TT_MODE_EXTERNAL)
1056 return 0;
1057
1058 bc_time_kobject = kobject_create_and_add("um-ext-time", kernel_kobj);
1059 if (!bc_time_kobject)
1060 return 0;
1061
1062 if (sysfs_create_file(bc_time_kobject, &bc_attribute.attr))
1063 pr_debug("failed to create the bc file in /sys/kernel/um_time");
1064
1065 return 0;
1066 }
1067 late_initcall(um_bc_start);
1068 #endif
1069