1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright (C) 2015 Anton Ivanov (aivanov@{brocade.com,kot-begemot.co.uk})
4 * Copyright (C) 2015 Thomas Meyer (thomas@m3y3r.de)
5 * Copyright (C) 2012-2014 Cisco Systems
6 * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
7 * Copyright (C) 2019 Intel Corporation
8 */
9
10 #include <linux/clockchips.h>
11 #include <linux/init.h>
12 #include <linux/interrupt.h>
13 #include <linux/jiffies.h>
14 #include <linux/mm.h>
15 #include <linux/sched.h>
16 #include <linux/spinlock.h>
17 #include <linux/threads.h>
18 #include <asm/irq.h>
19 #include <asm/param.h>
20 #include <kern_util.h>
21 #include <os.h>
22 #include <linux/delay.h>
23 #include <linux/time-internal.h>
24 #include <linux/um_timetravel.h>
25 #include <shared/init.h>
26
27 #ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
28 #include <linux/sched/clock.h>
29
30 enum time_travel_mode time_travel_mode;
31 EXPORT_SYMBOL_GPL(time_travel_mode);
32
33 static bool time_travel_start_set;
34 static unsigned long long time_travel_start;
35 static unsigned long long time_travel_time;
36 static unsigned long long time_travel_shm_offset;
37 static LIST_HEAD(time_travel_events);
38 static LIST_HEAD(time_travel_irqs);
39 static unsigned long long time_travel_timer_interval;
40 static unsigned long long time_travel_next_event;
41 static struct time_travel_event time_travel_timer_event;
42 static int time_travel_ext_fd = -1;
43 static unsigned int time_travel_ext_waiting;
44 static bool time_travel_ext_prev_request_valid;
45 static unsigned long long time_travel_ext_prev_request;
46 static unsigned long long *time_travel_ext_free_until;
47 static unsigned long long _time_travel_ext_free_until;
48 static u16 time_travel_shm_id;
49 static struct um_timetravel_schedshm *time_travel_shm;
50 static union um_timetravel_schedshm_client *time_travel_shm_client;
51
52 unsigned long tt_extra_sched_jiffies;
53
sched_clock(void)54 notrace unsigned long long sched_clock(void)
55 {
56 return (unsigned long long)(jiffies - INITIAL_JIFFIES +
57 tt_extra_sched_jiffies)
58 * (NSEC_PER_SEC / HZ);
59 }
60
time_travel_set_time(unsigned long long ns)61 static void time_travel_set_time(unsigned long long ns)
62 {
63 if (unlikely(ns < time_travel_time))
64 panic("time-travel: time goes backwards %lld -> %lld\n",
65 time_travel_time, ns);
66 else if (unlikely(ns >= S64_MAX))
67 panic("The system was going to sleep forever, aborting");
68
69 time_travel_time = ns;
70 }
71
72 enum time_travel_message_handling {
73 TTMH_IDLE,
74 TTMH_POLL,
75 TTMH_READ,
76 TTMH_READ_START_ACK,
77 };
78
79 static u64 bc_message;
80 int time_travel_should_print_bc_msg;
81
_time_travel_print_bc_msg(void)82 void _time_travel_print_bc_msg(void)
83 {
84 time_travel_should_print_bc_msg = 0;
85 printk(KERN_INFO "time-travel: received broadcast 0x%llx\n", bc_message);
86 }
87
time_travel_setup_shm(int fd,u16 id)88 static void time_travel_setup_shm(int fd, u16 id)
89 {
90 u32 len;
91
92 time_travel_shm = os_mmap_rw_shared(fd, sizeof(*time_travel_shm));
93
94 if (!time_travel_shm)
95 goto out;
96
97 len = time_travel_shm->len;
98
99 if (time_travel_shm->version != UM_TIMETRAVEL_SCHEDSHM_VERSION ||
100 len < struct_size(time_travel_shm, clients, id + 1)) {
101 os_unmap_memory(time_travel_shm, sizeof(*time_travel_shm));
102 time_travel_shm = NULL;
103 goto out;
104 }
105
106 time_travel_shm = os_mremap_rw_shared(time_travel_shm,
107 sizeof(*time_travel_shm),
108 len);
109 if (!time_travel_shm)
110 goto out;
111
112 time_travel_shm_offset = time_travel_shm->current_time;
113 time_travel_shm_client = &time_travel_shm->clients[id];
114 time_travel_shm_client->capa |= UM_TIMETRAVEL_SCHEDSHM_CAP_TIME_SHARE;
115 time_travel_shm_id = id;
116 /* always look at that free_until from now on */
117 time_travel_ext_free_until = &time_travel_shm->free_until;
118 out:
119 os_close_file(fd);
120 }
121
time_travel_handle_message(struct um_timetravel_msg * msg,enum time_travel_message_handling mode)122 static void time_travel_handle_message(struct um_timetravel_msg *msg,
123 enum time_travel_message_handling mode)
124 {
125 struct um_timetravel_msg resp = {
126 .op = UM_TIMETRAVEL_ACK,
127 };
128 int ret;
129
130 /*
131 * We can't unlock here, but interrupt signals with a timetravel_handler
132 * (see um_request_irq_tt) get to the timetravel_handler anyway.
133 */
134 if (mode != TTMH_READ) {
135 BUG_ON(mode == TTMH_IDLE && !irqs_disabled());
136
137 while (os_poll(1, &time_travel_ext_fd) != 0) {
138 /* nothing */
139 }
140 }
141
142 if (unlikely(mode == TTMH_READ_START_ACK)) {
143 int fd[UM_TIMETRAVEL_SHARED_MAX_FDS];
144
145 ret = os_rcv_fd_msg(time_travel_ext_fd, fd,
146 ARRAY_SIZE(fd), msg, sizeof(*msg));
147 if (ret == sizeof(*msg)) {
148 time_travel_setup_shm(fd[UM_TIMETRAVEL_SHARED_MEMFD],
149 msg->time & UM_TIMETRAVEL_START_ACK_ID);
150 /* we don't use the logging for now */
151 os_close_file(fd[UM_TIMETRAVEL_SHARED_LOGFD]);
152 }
153 } else {
154 ret = os_read_file(time_travel_ext_fd, msg, sizeof(*msg));
155 }
156
157 if (ret == 0)
158 panic("time-travel external link is broken\n");
159 if (ret != sizeof(*msg))
160 panic("invalid time-travel message - %d bytes\n", ret);
161
162 switch (msg->op) {
163 default:
164 WARN_ONCE(1, "time-travel: unexpected message %lld\n",
165 (unsigned long long)msg->op);
166 break;
167 case UM_TIMETRAVEL_ACK:
168 return;
169 case UM_TIMETRAVEL_RUN:
170 time_travel_set_time(msg->time);
171 if (time_travel_shm) {
172 /* no request right now since we're running */
173 time_travel_shm_client->flags &=
174 ~UM_TIMETRAVEL_SCHEDSHM_FLAGS_REQ_RUN;
175 /* no ack for shared memory RUN */
176 return;
177 }
178 break;
179 case UM_TIMETRAVEL_FREE_UNTIL:
180 /* not supposed to get this with shm, but ignore it */
181 if (time_travel_shm)
182 break;
183 time_travel_ext_free_until = &_time_travel_ext_free_until;
184 _time_travel_ext_free_until = msg->time;
185 break;
186 case UM_TIMETRAVEL_BROADCAST:
187 bc_message = msg->time;
188 time_travel_should_print_bc_msg = 1;
189 break;
190 }
191
192 resp.seq = msg->seq;
193 os_write_file(time_travel_ext_fd, &resp, sizeof(resp));
194 }
195
time_travel_ext_req(u32 op,u64 time)196 static u64 time_travel_ext_req(u32 op, u64 time)
197 {
198 static int seq;
199 int mseq = ++seq;
200 struct um_timetravel_msg msg = {
201 .op = op,
202 .time = time,
203 .seq = mseq,
204 };
205
206 /*
207 * We need to block even the timetravel handlers of SIGIO here and
208 * only restore their use when we got the ACK - otherwise we may
209 * (will) get interrupted by that, try to queue the IRQ for future
210 * processing and thus send another request while we're still waiting
211 * for an ACK, but the peer doesn't know we got interrupted and will
212 * send the ACKs in the same order as the message, but we'd need to
213 * see them in the opposite order ...
214 *
215 * This wouldn't matter *too* much, but some ACKs carry the
216 * current time (for UM_TIMETRAVEL_GET) and getting another
217 * ACK without a time would confuse us a lot!
218 *
219 * The sequence number assignment that happens here lets us
220 * debug such message handling issues more easily.
221 */
222 block_signals_hard();
223 os_write_file(time_travel_ext_fd, &msg, sizeof(msg));
224
225 /* no ACK expected for WAIT in shared memory mode */
226 if (msg.op == UM_TIMETRAVEL_WAIT && time_travel_shm)
227 goto done;
228
229 while (msg.op != UM_TIMETRAVEL_ACK)
230 time_travel_handle_message(&msg,
231 op == UM_TIMETRAVEL_START ?
232 TTMH_READ_START_ACK :
233 TTMH_READ);
234
235 if (msg.seq != mseq)
236 panic("time-travel: ACK message has different seqno! op=%d, seq=%d != %d time=%lld\n",
237 msg.op, msg.seq, mseq, msg.time);
238
239 if (op == UM_TIMETRAVEL_GET)
240 time_travel_set_time(msg.time);
241 done:
242 unblock_signals_hard();
243
244 return msg.time;
245 }
246
__time_travel_wait_readable(int fd)247 void __time_travel_wait_readable(int fd)
248 {
249 int fds[2] = { fd, time_travel_ext_fd };
250 int ret;
251
252 if (time_travel_mode != TT_MODE_EXTERNAL)
253 return;
254
255 while ((ret = os_poll(2, fds))) {
256 struct um_timetravel_msg msg;
257
258 if (ret == 1)
259 time_travel_handle_message(&msg, TTMH_READ);
260 }
261 }
262 EXPORT_SYMBOL_GPL(__time_travel_wait_readable);
263
time_travel_ext_update_request(unsigned long long time)264 static void time_travel_ext_update_request(unsigned long long time)
265 {
266 if (time_travel_mode != TT_MODE_EXTERNAL)
267 return;
268
269 /* asked for exactly this time previously */
270 if (time_travel_ext_prev_request_valid &&
271 time == time_travel_ext_prev_request)
272 return;
273
274 /*
275 * if we're running and are allowed to run past the request
276 * then we don't need to update it either
277 *
278 * Note for shm we ignore FREE_UNTIL messages and leave the pointer
279 * to shared memory, and for non-shm the offset is 0.
280 */
281 if (!time_travel_ext_waiting && time_travel_ext_free_until &&
282 time < (*time_travel_ext_free_until - time_travel_shm_offset))
283 return;
284
285 time_travel_ext_prev_request = time;
286 time_travel_ext_prev_request_valid = true;
287
288 if (time_travel_shm) {
289 union um_timetravel_schedshm_client *running;
290
291 running = &time_travel_shm->clients[time_travel_shm->running_id];
292
293 if (running->capa & UM_TIMETRAVEL_SCHEDSHM_CAP_TIME_SHARE) {
294 time_travel_shm_client->flags |=
295 UM_TIMETRAVEL_SCHEDSHM_FLAGS_REQ_RUN;
296 time += time_travel_shm_offset;
297 time_travel_shm_client->req_time = time;
298 if (time < time_travel_shm->free_until)
299 time_travel_shm->free_until = time;
300 return;
301 }
302 }
303
304 time_travel_ext_req(UM_TIMETRAVEL_REQUEST, time);
305 }
306
__time_travel_propagate_time(void)307 void __time_travel_propagate_time(void)
308 {
309 static unsigned long long last_propagated;
310
311 if (time_travel_shm) {
312 if (time_travel_shm->running_id != time_travel_shm_id)
313 panic("time-travel: setting time while not running\n");
314 time_travel_shm->current_time = time_travel_time +
315 time_travel_shm_offset;
316 return;
317 }
318
319 if (last_propagated == time_travel_time)
320 return;
321
322 time_travel_ext_req(UM_TIMETRAVEL_UPDATE, time_travel_time);
323 last_propagated = time_travel_time;
324 }
325 EXPORT_SYMBOL_GPL(__time_travel_propagate_time);
326
327 /* returns true if we must do a wait to the simtime device */
time_travel_ext_request(unsigned long long time)328 static bool time_travel_ext_request(unsigned long long time)
329 {
330 /*
331 * If we received an external sync point ("free until") then we
332 * don't have to request/wait for anything until then, unless
333 * we're already waiting.
334 *
335 * Note for shm we ignore FREE_UNTIL messages and leave the pointer
336 * to shared memory, and for non-shm the offset is 0.
337 */
338 if (!time_travel_ext_waiting && time_travel_ext_free_until &&
339 time < (*time_travel_ext_free_until - time_travel_shm_offset))
340 return false;
341
342 time_travel_ext_update_request(time);
343 return true;
344 }
345
time_travel_ext_wait(bool idle)346 static void time_travel_ext_wait(bool idle)
347 {
348 struct um_timetravel_msg msg = {
349 .op = UM_TIMETRAVEL_ACK,
350 };
351
352 time_travel_ext_prev_request_valid = false;
353 if (!time_travel_shm)
354 time_travel_ext_free_until = NULL;
355 time_travel_ext_waiting++;
356
357 time_travel_ext_req(UM_TIMETRAVEL_WAIT, -1);
358
359 /*
360 * Here we are deep in the idle loop, so we have to break out of the
361 * kernel abstraction in a sense and implement this in terms of the
362 * UML system waiting on the VQ interrupt while sleeping, when we get
363 * the signal it'll call time_travel_ext_vq_notify_done() completing the
364 * call.
365 */
366 while (msg.op != UM_TIMETRAVEL_RUN)
367 time_travel_handle_message(&msg, idle ? TTMH_IDLE : TTMH_POLL);
368
369 time_travel_ext_waiting--;
370
371 /* we might request more stuff while polling - reset when we run */
372 time_travel_ext_prev_request_valid = false;
373 }
374
time_travel_ext_get_time(void)375 static void time_travel_ext_get_time(void)
376 {
377 if (time_travel_shm)
378 time_travel_set_time(time_travel_shm->current_time -
379 time_travel_shm_offset);
380 else
381 time_travel_ext_req(UM_TIMETRAVEL_GET, -1);
382 }
383
__time_travel_update_time(unsigned long long ns,bool idle)384 static void __time_travel_update_time(unsigned long long ns, bool idle)
385 {
386 if (time_travel_mode == TT_MODE_EXTERNAL && time_travel_ext_request(ns))
387 time_travel_ext_wait(idle);
388 else
389 time_travel_set_time(ns);
390 }
391
time_travel_first_event(void)392 static struct time_travel_event *time_travel_first_event(void)
393 {
394 return list_first_entry_or_null(&time_travel_events,
395 struct time_travel_event,
396 list);
397 }
398
__time_travel_add_event(struct time_travel_event * e,unsigned long long time)399 static void __time_travel_add_event(struct time_travel_event *e,
400 unsigned long long time)
401 {
402 struct time_travel_event *tmp;
403 bool inserted = false;
404 unsigned long flags;
405
406 if (e->pending)
407 return;
408
409 e->pending = true;
410 e->time = time;
411
412 local_irq_save(flags);
413 list_for_each_entry(tmp, &time_travel_events, list) {
414 /*
415 * Add the new entry before one with higher time,
416 * or if they're equal and both on stack, because
417 * in that case we need to unwind the stack in the
418 * right order, and the later event (timer sleep
419 * or such) must be dequeued first.
420 */
421 if ((tmp->time > e->time) ||
422 (tmp->time == e->time && tmp->onstack && e->onstack)) {
423 list_add_tail(&e->list, &tmp->list);
424 inserted = true;
425 break;
426 }
427 }
428
429 if (!inserted)
430 list_add_tail(&e->list, &time_travel_events);
431
432 tmp = time_travel_first_event();
433 time_travel_ext_update_request(tmp->time);
434 time_travel_next_event = tmp->time;
435 local_irq_restore(flags);
436 }
437
time_travel_add_event(struct time_travel_event * e,unsigned long long time)438 static void time_travel_add_event(struct time_travel_event *e,
439 unsigned long long time)
440 {
441 if (WARN_ON(!e->fn))
442 return;
443
444 __time_travel_add_event(e, time);
445 }
446
time_travel_add_event_rel(struct time_travel_event * e,unsigned long long delay_ns)447 void time_travel_add_event_rel(struct time_travel_event *e,
448 unsigned long long delay_ns)
449 {
450 time_travel_add_event(e, time_travel_time + delay_ns);
451 }
452
time_travel_periodic_timer(struct time_travel_event * e)453 static void time_travel_periodic_timer(struct time_travel_event *e)
454 {
455 time_travel_add_event(&time_travel_timer_event,
456 time_travel_time + time_travel_timer_interval);
457
458 /* clock tick; decrease extra jiffies by keeping sched_clock constant */
459 if (tt_extra_sched_jiffies > 0)
460 tt_extra_sched_jiffies -= 1;
461
462 deliver_alarm();
463 }
464
deliver_time_travel_irqs(void)465 void deliver_time_travel_irqs(void)
466 {
467 struct time_travel_event *e;
468 unsigned long flags;
469
470 /*
471 * Don't do anything for most cases. Note that because here we have
472 * to disable IRQs (and re-enable later) we'll actually recurse at
473 * the end of the function, so this is strictly necessary.
474 */
475 if (likely(list_empty(&time_travel_irqs)))
476 return;
477
478 local_irq_save(flags);
479 irq_enter();
480 while ((e = list_first_entry_or_null(&time_travel_irqs,
481 struct time_travel_event,
482 list))) {
483 list_del(&e->list);
484 e->pending = false;
485 e->fn(e);
486 }
487 irq_exit();
488 local_irq_restore(flags);
489 }
490
time_travel_deliver_event(struct time_travel_event * e)491 static void time_travel_deliver_event(struct time_travel_event *e)
492 {
493 if (e == &time_travel_timer_event) {
494 /*
495 * deliver_alarm() does the irq_enter/irq_exit
496 * by itself, so must handle it specially here
497 */
498 e->fn(e);
499 } else if (irqs_disabled()) {
500 list_add_tail(&e->list, &time_travel_irqs);
501 /*
502 * set pending again, it was set to false when the
503 * event was deleted from the original list, but
504 * now it's still pending until we deliver the IRQ.
505 */
506 e->pending = true;
507 } else {
508 unsigned long flags;
509
510 local_irq_save(flags);
511 irq_enter();
512 e->fn(e);
513 irq_exit();
514 local_irq_restore(flags);
515 }
516 }
517
time_travel_del_event(struct time_travel_event * e)518 bool time_travel_del_event(struct time_travel_event *e)
519 {
520 unsigned long flags;
521
522 if (!e->pending)
523 return false;
524 local_irq_save(flags);
525 list_del(&e->list);
526 e->pending = false;
527 local_irq_restore(flags);
528 return true;
529 }
530
time_travel_update_time(unsigned long long next,bool idle)531 static void time_travel_update_time(unsigned long long next, bool idle)
532 {
533 struct time_travel_event ne = {
534 .onstack = true,
535 };
536 struct time_travel_event *e;
537 bool finished = idle;
538
539 /* add it without a handler - we deal with that specifically below */
540 __time_travel_add_event(&ne, next);
541
542 do {
543 e = time_travel_first_event();
544
545 BUG_ON(!e);
546 __time_travel_update_time(e->time, idle);
547
548 /* new events may have been inserted while we were waiting */
549 if (e == time_travel_first_event()) {
550 BUG_ON(!time_travel_del_event(e));
551 BUG_ON(time_travel_time != e->time);
552
553 if (e == &ne) {
554 finished = true;
555 } else {
556 if (e->onstack)
557 panic("On-stack event dequeued outside of the stack! time=%lld, event time=%lld, event=%pS\n",
558 time_travel_time, e->time, e);
559 time_travel_deliver_event(e);
560 }
561 }
562
563 e = time_travel_first_event();
564 if (e)
565 time_travel_ext_update_request(e->time);
566 } while (ne.pending && !finished);
567
568 time_travel_del_event(&ne);
569 }
570
time_travel_update_time_rel(unsigned long long offs)571 static void time_travel_update_time_rel(unsigned long long offs)
572 {
573 unsigned long flags;
574
575 /*
576 * Disable interrupts before calculating the new time so
577 * that a real timer interrupt (signal) can't happen at
578 * a bad time e.g. after we read time_travel_time but
579 * before we've completed updating the time.
580 */
581 local_irq_save(flags);
582 time_travel_update_time(time_travel_time + offs, false);
583 local_irq_restore(flags);
584 }
585
time_travel_ndelay(unsigned long nsec)586 void time_travel_ndelay(unsigned long nsec)
587 {
588 /*
589 * Not strictly needed to use _rel() version since this is
590 * only used in INFCPU/EXT modes, but it doesn't hurt and
591 * is more readable too.
592 */
593 time_travel_update_time_rel(nsec);
594 }
595 EXPORT_SYMBOL(time_travel_ndelay);
596
time_travel_add_irq_event(struct time_travel_event * e)597 void time_travel_add_irq_event(struct time_travel_event *e)
598 {
599 BUG_ON(time_travel_mode != TT_MODE_EXTERNAL);
600
601 time_travel_ext_get_time();
602 /*
603 * We could model interrupt latency here, for now just
604 * don't have any latency at all and request the exact
605 * same time (again) to run the interrupt...
606 */
607 time_travel_add_event(e, time_travel_time);
608 }
609 EXPORT_SYMBOL_GPL(time_travel_add_irq_event);
610
time_travel_oneshot_timer(struct time_travel_event * e)611 static void time_travel_oneshot_timer(struct time_travel_event *e)
612 {
613 /* clock tick; decrease extra jiffies by keeping sched_clock constant */
614 if (tt_extra_sched_jiffies > 0)
615 tt_extra_sched_jiffies -= 1;
616
617 deliver_alarm();
618 }
619
time_travel_sleep(void)620 void time_travel_sleep(void)
621 {
622 /*
623 * Wait "forever" (using S64_MAX because there are some potential
624 * wrapping issues, especially with the current TT_MODE_EXTERNAL
625 * controller application.
626 */
627 unsigned long long next = S64_MAX;
628
629 if (time_travel_mode == TT_MODE_BASIC)
630 os_timer_disable();
631
632 time_travel_update_time(next, true);
633
634 if (time_travel_mode == TT_MODE_BASIC &&
635 time_travel_timer_event.pending) {
636 if (time_travel_timer_event.fn == time_travel_periodic_timer) {
637 /*
638 * This is somewhat wrong - we should get the first
639 * one sooner like the os_timer_one_shot() below...
640 */
641 os_timer_set_interval(time_travel_timer_interval);
642 } else {
643 os_timer_one_shot(time_travel_timer_event.time - next);
644 }
645 }
646 }
647
time_travel_handle_real_alarm(void)648 static void time_travel_handle_real_alarm(void)
649 {
650 time_travel_set_time(time_travel_next_event);
651
652 time_travel_del_event(&time_travel_timer_event);
653
654 if (time_travel_timer_event.fn == time_travel_periodic_timer)
655 time_travel_add_event(&time_travel_timer_event,
656 time_travel_time +
657 time_travel_timer_interval);
658 }
659
time_travel_set_interval(unsigned long long interval)660 static void time_travel_set_interval(unsigned long long interval)
661 {
662 time_travel_timer_interval = interval;
663 }
664
time_travel_connect_external(const char * socket)665 static int time_travel_connect_external(const char *socket)
666 {
667 const char *sep;
668 unsigned long long id = (unsigned long long)-1;
669 int rc;
670
671 if ((sep = strchr(socket, ':'))) {
672 char buf[25] = {};
673 if (sep - socket > sizeof(buf) - 1)
674 goto invalid_number;
675
676 memcpy(buf, socket, sep - socket);
677 if (kstrtoull(buf, 0, &id)) {
678 invalid_number:
679 panic("time-travel: invalid external ID in string '%s'\n",
680 socket);
681 return -EINVAL;
682 }
683
684 socket = sep + 1;
685 }
686
687 rc = os_connect_socket(socket);
688 if (rc < 0) {
689 panic("time-travel: failed to connect to external socket %s\n",
690 socket);
691 return rc;
692 }
693
694 time_travel_ext_fd = rc;
695
696 time_travel_ext_req(UM_TIMETRAVEL_START, id);
697
698 return 1;
699 }
700
time_travel_set_start(void)701 static void time_travel_set_start(void)
702 {
703 if (time_travel_start_set)
704 return;
705
706 switch (time_travel_mode) {
707 case TT_MODE_EXTERNAL:
708 time_travel_start = time_travel_ext_req(UM_TIMETRAVEL_GET_TOD, -1);
709 /* controller gave us the *current* time, so adjust by that */
710 time_travel_ext_get_time();
711 time_travel_start -= time_travel_time;
712 break;
713 case TT_MODE_INFCPU:
714 case TT_MODE_BASIC:
715 if (!time_travel_start_set)
716 time_travel_start = os_persistent_clock_emulation();
717 break;
718 case TT_MODE_OFF:
719 /* we just read the host clock with os_persistent_clock_emulation() */
720 break;
721 }
722
723 time_travel_start_set = true;
724 }
725 #else /* CONFIG_UML_TIME_TRAVEL_SUPPORT */
726 #define time_travel_start_set 0
727 #define time_travel_start 0
728 #define time_travel_time 0
729 #define time_travel_ext_waiting 0
730
time_travel_update_time(unsigned long long ns,bool idle)731 static inline void time_travel_update_time(unsigned long long ns, bool idle)
732 {
733 }
734
time_travel_update_time_rel(unsigned long long offs)735 static inline void time_travel_update_time_rel(unsigned long long offs)
736 {
737 }
738
time_travel_handle_real_alarm(void)739 static inline void time_travel_handle_real_alarm(void)
740 {
741 }
742
time_travel_set_interval(unsigned long long interval)743 static void time_travel_set_interval(unsigned long long interval)
744 {
745 }
746
time_travel_set_start(void)747 static inline void time_travel_set_start(void)
748 {
749 }
750
751 /* fail link if this actually gets used */
752 extern u64 time_travel_ext_req(u32 op, u64 time);
753
754 /* these are empty macros so the struct/fn need not exist */
755 #define time_travel_add_event(e, time) do { } while (0)
756 /* externally not usable - redefine here so we can */
757 #undef time_travel_del_event
758 #define time_travel_del_event(e) do { } while (0)
759 #endif
760
timer_handler(int sig,struct siginfo * unused_si,struct uml_pt_regs * regs)761 void timer_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
762 {
763 unsigned long flags;
764
765 /*
766 * In basic time-travel mode we still get real interrupts
767 * (signals) but since we don't read time from the OS, we
768 * must update the simulated time here to the expiry when
769 * we get a signal.
770 * This is not the case in inf-cpu mode, since there we
771 * never get any real signals from the OS.
772 */
773 if (time_travel_mode == TT_MODE_BASIC)
774 time_travel_handle_real_alarm();
775
776 local_irq_save(flags);
777 do_IRQ(TIMER_IRQ, regs);
778 local_irq_restore(flags);
779 }
780
itimer_shutdown(struct clock_event_device * evt)781 static int itimer_shutdown(struct clock_event_device *evt)
782 {
783 if (time_travel_mode != TT_MODE_OFF)
784 time_travel_del_event(&time_travel_timer_event);
785
786 if (time_travel_mode != TT_MODE_INFCPU &&
787 time_travel_mode != TT_MODE_EXTERNAL)
788 os_timer_disable();
789
790 return 0;
791 }
792
itimer_set_periodic(struct clock_event_device * evt)793 static int itimer_set_periodic(struct clock_event_device *evt)
794 {
795 unsigned long long interval = NSEC_PER_SEC / HZ;
796
797 if (time_travel_mode != TT_MODE_OFF) {
798 time_travel_del_event(&time_travel_timer_event);
799 time_travel_set_event_fn(&time_travel_timer_event,
800 time_travel_periodic_timer);
801 time_travel_set_interval(interval);
802 time_travel_add_event(&time_travel_timer_event,
803 time_travel_time + interval);
804 }
805
806 if (time_travel_mode != TT_MODE_INFCPU &&
807 time_travel_mode != TT_MODE_EXTERNAL)
808 os_timer_set_interval(interval);
809
810 return 0;
811 }
812
itimer_next_event(unsigned long delta,struct clock_event_device * evt)813 static int itimer_next_event(unsigned long delta,
814 struct clock_event_device *evt)
815 {
816 delta += 1;
817
818 if (time_travel_mode != TT_MODE_OFF) {
819 time_travel_del_event(&time_travel_timer_event);
820 time_travel_set_event_fn(&time_travel_timer_event,
821 time_travel_oneshot_timer);
822 time_travel_add_event(&time_travel_timer_event,
823 time_travel_time + delta);
824 }
825
826 if (time_travel_mode != TT_MODE_INFCPU &&
827 time_travel_mode != TT_MODE_EXTERNAL)
828 return os_timer_one_shot(delta);
829
830 return 0;
831 }
832
itimer_one_shot(struct clock_event_device * evt)833 static int itimer_one_shot(struct clock_event_device *evt)
834 {
835 return itimer_next_event(0, evt);
836 }
837
838 static struct clock_event_device timer_clockevent = {
839 .name = "posix-timer",
840 .rating = 250,
841 .cpumask = cpu_possible_mask,
842 .features = CLOCK_EVT_FEAT_PERIODIC |
843 CLOCK_EVT_FEAT_ONESHOT,
844 .set_state_shutdown = itimer_shutdown,
845 .set_state_periodic = itimer_set_periodic,
846 .set_state_oneshot = itimer_one_shot,
847 .set_next_event = itimer_next_event,
848 .shift = 0,
849 .max_delta_ns = 0xffffffff,
850 .max_delta_ticks = 0xffffffff,
851 .min_delta_ns = TIMER_MIN_DELTA,
852 .min_delta_ticks = TIMER_MIN_DELTA, // microsecond resolution should be enough for anyone, same as 640K RAM
853 .irq = 0,
854 .mult = 1,
855 };
856
um_timer(int irq,void * dev)857 static irqreturn_t um_timer(int irq, void *dev)
858 {
859 if (get_current()->mm != NULL)
860 {
861 /* userspace - relay signal, results in correct userspace timers */
862 os_alarm_process(get_current()->mm->context.id.pid);
863 }
864
865 (*timer_clockevent.event_handler)(&timer_clockevent);
866
867 return IRQ_HANDLED;
868 }
869
timer_read(struct clocksource * cs)870 static u64 timer_read(struct clocksource *cs)
871 {
872 if (time_travel_mode != TT_MODE_OFF) {
873 /*
874 * We make reading the timer cost a bit so that we don't get
875 * stuck in loops that expect time to move more than the
876 * exact requested sleep amount, e.g. python's socket server,
877 * see https://bugs.python.org/issue37026.
878 *
879 * However, don't do that when we're in interrupt or such as
880 * then we might recurse into our own processing, and get to
881 * even more waiting, and that's not good - it messes up the
882 * "what do I do next" and onstack event we use to know when
883 * to return from time_travel_update_time().
884 */
885 if (!irqs_disabled() && !in_interrupt() && !in_softirq() &&
886 !time_travel_ext_waiting)
887 time_travel_update_time_rel(TIMER_MULTIPLIER);
888 return time_travel_time / TIMER_MULTIPLIER;
889 }
890
891 return os_nsecs() / TIMER_MULTIPLIER;
892 }
893
894 static struct clocksource timer_clocksource = {
895 .name = "timer",
896 .rating = 300,
897 .read = timer_read,
898 .mask = CLOCKSOURCE_MASK(64),
899 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
900 };
901
um_timer_setup(void)902 static void __init um_timer_setup(void)
903 {
904 int err;
905
906 err = request_irq(TIMER_IRQ, um_timer, IRQF_TIMER, "hr timer", NULL);
907 if (err != 0)
908 printk(KERN_ERR "register_timer : request_irq failed - "
909 "errno = %d\n", -err);
910
911 err = os_timer_create();
912 if (err != 0) {
913 printk(KERN_ERR "creation of timer failed - errno = %d\n", -err);
914 return;
915 }
916
917 err = clocksource_register_hz(&timer_clocksource, NSEC_PER_SEC/TIMER_MULTIPLIER);
918 if (err) {
919 printk(KERN_ERR "clocksource_register_hz returned %d\n", err);
920 return;
921 }
922 clockevents_register_device(&timer_clockevent);
923 }
924
read_persistent_clock64(struct timespec64 * ts)925 void read_persistent_clock64(struct timespec64 *ts)
926 {
927 long long nsecs;
928
929 time_travel_set_start();
930
931 if (time_travel_mode != TT_MODE_OFF)
932 nsecs = time_travel_start + time_travel_time;
933 else
934 nsecs = os_persistent_clock_emulation();
935
936 set_normalized_timespec64(ts, nsecs / NSEC_PER_SEC,
937 nsecs % NSEC_PER_SEC);
938 }
939
time_init(void)940 void __init time_init(void)
941 {
942 timer_set_signal_handler();
943 late_time_init = um_timer_setup;
944 }
945
946 #ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
calibrate_delay_is_known(void)947 unsigned long calibrate_delay_is_known(void)
948 {
949 if (time_travel_mode == TT_MODE_INFCPU ||
950 time_travel_mode == TT_MODE_EXTERNAL)
951 return 1;
952 return 0;
953 }
954
setup_time_travel(char * str)955 static int setup_time_travel(char *str)
956 {
957 if (strcmp(str, "=inf-cpu") == 0) {
958 time_travel_mode = TT_MODE_INFCPU;
959 timer_clockevent.name = "time-travel-timer-infcpu";
960 timer_clocksource.name = "time-travel-clock";
961 return 1;
962 }
963
964 if (strncmp(str, "=ext:", 5) == 0) {
965 time_travel_mode = TT_MODE_EXTERNAL;
966 timer_clockevent.name = "time-travel-timer-external";
967 timer_clocksource.name = "time-travel-clock-external";
968 return time_travel_connect_external(str + 5);
969 }
970
971 if (!*str) {
972 time_travel_mode = TT_MODE_BASIC;
973 timer_clockevent.name = "time-travel-timer";
974 timer_clocksource.name = "time-travel-clock";
975 return 1;
976 }
977
978 return -EINVAL;
979 }
980
981 __setup("time-travel", setup_time_travel);
982 __uml_help(setup_time_travel,
983 "time-travel\n"
984 "This option just enables basic time travel mode, in which the clock/timers\n"
985 "inside the UML instance skip forward when there's nothing to do, rather than\n"
986 "waiting for real time to elapse. However, instance CPU speed is limited by\n"
987 "the real CPU speed, so e.g. a 10ms timer will always fire after ~10ms wall\n"
988 "clock (but quicker when there's nothing to do).\n"
989 "\n"
990 "time-travel=inf-cpu\n"
991 "This enables time travel mode with infinite processing power, in which there\n"
992 "are no wall clock timers, and any CPU processing happens - as seen from the\n"
993 "guest - instantly. This can be useful for accurate simulation regardless of\n"
994 "debug overhead, physical CPU speed, etc. but is somewhat dangerous as it can\n"
995 "easily lead to getting stuck (e.g. if anything in the system busy loops).\n"
996 "\n"
997 "time-travel=ext:[ID:]/path/to/socket\n"
998 "This enables time travel mode similar to =inf-cpu, except the system will\n"
999 "use the given socket to coordinate with a central scheduler, in order to\n"
1000 "have more than one system simultaneously be on simulated time. The virtio\n"
1001 "driver code in UML knows about this so you can also simulate networks and\n"
1002 "devices using it, assuming the device has the right capabilities.\n"
1003 "The optional ID is a 64-bit integer that's sent to the central scheduler.\n");
1004
setup_time_travel_start(char * str)1005 static int setup_time_travel_start(char *str)
1006 {
1007 int err;
1008
1009 err = kstrtoull(str, 0, &time_travel_start);
1010 if (err)
1011 return err;
1012
1013 time_travel_start_set = 1;
1014 return 1;
1015 }
1016
1017 __setup("time-travel-start=", setup_time_travel_start);
1018 __uml_help(setup_time_travel_start,
1019 "time-travel-start=<nanoseconds>\n"
1020 "Configure the UML instance's wall clock to start at this value rather than\n"
1021 "the host's wall clock at the time of UML boot.\n");
1022 static struct kobject *bc_time_kobject;
1023
bc_show(struct kobject * kobj,struct kobj_attribute * attr,char * buf)1024 static ssize_t bc_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
1025 {
1026 return sprintf(buf, "0x%llx", bc_message);
1027 }
1028
bc_store(struct kobject * kobj,struct kobj_attribute * attr,const char * buf,size_t count)1029 static ssize_t bc_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count)
1030 {
1031 int ret;
1032 u64 user_bc_message;
1033
1034 ret = kstrtou64(buf, 0, &user_bc_message);
1035 if (ret)
1036 return ret;
1037
1038 bc_message = user_bc_message;
1039
1040 time_travel_ext_req(UM_TIMETRAVEL_BROADCAST, bc_message);
1041 pr_info("um: time: sent broadcast message: 0x%llx\n", bc_message);
1042 return count;
1043 }
1044
1045 static struct kobj_attribute bc_attribute = __ATTR(bc-message, 0660, bc_show, bc_store);
1046
um_bc_start(void)1047 static int __init um_bc_start(void)
1048 {
1049 if (time_travel_mode != TT_MODE_EXTERNAL)
1050 return 0;
1051
1052 bc_time_kobject = kobject_create_and_add("um-ext-time", kernel_kobj);
1053 if (!bc_time_kobject)
1054 return 0;
1055
1056 if (sysfs_create_file(bc_time_kobject, &bc_attribute.attr))
1057 pr_debug("failed to create the bc file in /sys/kernel/um_time");
1058
1059 return 0;
1060 }
1061 late_initcall(um_bc_start);
1062 #endif
1063