1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2015 Anton Ivanov (aivanov@{brocade.com,kot-begemot.co.uk}) 4 * Copyright (C) 2015 Thomas Meyer (thomas@m3y3r.de) 5 * Copyright (C) 2012-2014 Cisco Systems 6 * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) 7 * Copyright (C) 2019 Intel Corporation 8 */ 9 10 #include <linux/clockchips.h> 11 #include <linux/init.h> 12 #include <linux/interrupt.h> 13 #include <linux/jiffies.h> 14 #include <linux/mm.h> 15 #include <linux/sched.h> 16 #include <linux/spinlock.h> 17 #include <linux/threads.h> 18 #include <asm/irq.h> 19 #include <asm/param.h> 20 #include <kern_util.h> 21 #include <os.h> 22 #include <linux/delay.h> 23 #include <linux/time-internal.h> 24 #include <linux/um_timetravel.h> 25 #include <shared/init.h> 26 27 #ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT 28 #include <linux/sched/clock.h> 29 30 enum time_travel_mode time_travel_mode; 31 EXPORT_SYMBOL_GPL(time_travel_mode); 32 33 static bool time_travel_start_set; 34 static unsigned long long time_travel_start; 35 static unsigned long long time_travel_time; 36 static unsigned long long time_travel_shm_offset; 37 static LIST_HEAD(time_travel_events); 38 static LIST_HEAD(time_travel_irqs); 39 static unsigned long long time_travel_timer_interval; 40 static unsigned long long time_travel_next_event; 41 static struct time_travel_event time_travel_timer_event; 42 static int time_travel_ext_fd = -1; 43 static unsigned int time_travel_ext_waiting; 44 static bool time_travel_ext_prev_request_valid; 45 static unsigned long long time_travel_ext_prev_request; 46 static unsigned long long *time_travel_ext_free_until; 47 static unsigned long long _time_travel_ext_free_until; 48 static u16 time_travel_shm_id; 49 static struct um_timetravel_schedshm *time_travel_shm; 50 static union um_timetravel_schedshm_client *time_travel_shm_client; 51 52 unsigned long tt_extra_sched_jiffies; 53 54 notrace unsigned long long sched_clock(void) 55 { 56 return (unsigned long long)(jiffies - INITIAL_JIFFIES + 57 tt_extra_sched_jiffies) 58 * (NSEC_PER_SEC / HZ); 59 } 60 61 static void time_travel_set_time(unsigned long long ns) 62 { 63 if (unlikely(ns < time_travel_time)) 64 panic("time-travel: time goes backwards %lld -> %lld\n", 65 time_travel_time, ns); 66 else if (unlikely(ns >= S64_MAX)) 67 panic("The system was going to sleep forever, aborting"); 68 69 time_travel_time = ns; 70 } 71 72 enum time_travel_message_handling { 73 TTMH_IDLE, 74 TTMH_POLL, 75 TTMH_READ, 76 TTMH_READ_START_ACK, 77 }; 78 79 static u64 bc_message; 80 int time_travel_should_print_bc_msg; 81 82 void _time_travel_print_bc_msg(void) 83 { 84 time_travel_should_print_bc_msg = 0; 85 printk(KERN_INFO "time-travel: received broadcast 0x%llx\n", bc_message); 86 } 87 88 static void time_travel_setup_shm(int fd, u16 id) 89 { 90 u32 len; 91 92 time_travel_shm = os_mmap_rw_shared(fd, sizeof(*time_travel_shm)); 93 94 if (!time_travel_shm) 95 goto out; 96 97 len = time_travel_shm->len; 98 99 if (time_travel_shm->version != UM_TIMETRAVEL_SCHEDSHM_VERSION || 100 len < struct_size(time_travel_shm, clients, id + 1)) { 101 os_unmap_memory(time_travel_shm, sizeof(*time_travel_shm)); 102 time_travel_shm = NULL; 103 goto out; 104 } 105 106 time_travel_shm = os_mremap_rw_shared(time_travel_shm, 107 sizeof(*time_travel_shm), 108 len); 109 if (!time_travel_shm) 110 goto out; 111 112 time_travel_shm_offset = time_travel_shm->current_time; 113 time_travel_shm_client = &time_travel_shm->clients[id]; 114 time_travel_shm_client->capa |= UM_TIMETRAVEL_SCHEDSHM_CAP_TIME_SHARE; 115 time_travel_shm_id = id; 116 /* always look at that free_until from now on */ 117 time_travel_ext_free_until = &time_travel_shm->free_until; 118 out: 119 os_close_file(fd); 120 } 121 122 static void time_travel_handle_message(struct um_timetravel_msg *msg, 123 enum time_travel_message_handling mode) 124 { 125 struct um_timetravel_msg resp = { 126 .op = UM_TIMETRAVEL_ACK, 127 }; 128 int ret; 129 130 /* 131 * We can't unlock here, but interrupt signals with a timetravel_handler 132 * (see um_request_irq_tt) get to the timetravel_handler anyway. 133 */ 134 if (mode != TTMH_READ) { 135 BUG_ON(mode == TTMH_IDLE && !irqs_disabled()); 136 137 while (os_poll(1, &time_travel_ext_fd) != 0) { 138 /* nothing */ 139 } 140 } 141 142 if (unlikely(mode == TTMH_READ_START_ACK)) { 143 int fd[UM_TIMETRAVEL_SHARED_MAX_FDS]; 144 145 ret = os_rcv_fd_msg(time_travel_ext_fd, fd, 146 ARRAY_SIZE(fd), msg, sizeof(*msg)); 147 if (ret == sizeof(*msg)) { 148 time_travel_setup_shm(fd[UM_TIMETRAVEL_SHARED_MEMFD], 149 msg->time & UM_TIMETRAVEL_START_ACK_ID); 150 /* we don't use the logging for now */ 151 os_close_file(fd[UM_TIMETRAVEL_SHARED_LOGFD]); 152 } 153 } else { 154 ret = os_read_file(time_travel_ext_fd, msg, sizeof(*msg)); 155 } 156 157 if (ret == 0) 158 panic("time-travel external link is broken\n"); 159 if (ret != sizeof(*msg)) 160 panic("invalid time-travel message - %d bytes\n", ret); 161 162 switch (msg->op) { 163 default: 164 WARN_ONCE(1, "time-travel: unexpected message %lld\n", 165 (unsigned long long)msg->op); 166 break; 167 case UM_TIMETRAVEL_ACK: 168 return; 169 case UM_TIMETRAVEL_RUN: 170 time_travel_set_time(msg->time); 171 if (time_travel_shm) { 172 /* no request right now since we're running */ 173 time_travel_shm_client->flags &= 174 ~UM_TIMETRAVEL_SCHEDSHM_FLAGS_REQ_RUN; 175 /* no ack for shared memory RUN */ 176 return; 177 } 178 break; 179 case UM_TIMETRAVEL_FREE_UNTIL: 180 /* not supposed to get this with shm, but ignore it */ 181 if (time_travel_shm) 182 break; 183 time_travel_ext_free_until = &_time_travel_ext_free_until; 184 _time_travel_ext_free_until = msg->time; 185 break; 186 case UM_TIMETRAVEL_BROADCAST: 187 bc_message = msg->time; 188 time_travel_should_print_bc_msg = 1; 189 break; 190 } 191 192 resp.seq = msg->seq; 193 os_write_file(time_travel_ext_fd, &resp, sizeof(resp)); 194 } 195 196 static u64 time_travel_ext_req(u32 op, u64 time) 197 { 198 static int seq; 199 int mseq = ++seq; 200 struct um_timetravel_msg msg = { 201 .op = op, 202 .time = time, 203 .seq = mseq, 204 }; 205 206 /* 207 * We need to block even the timetravel handlers of SIGIO here and 208 * only restore their use when we got the ACK - otherwise we may 209 * (will) get interrupted by that, try to queue the IRQ for future 210 * processing and thus send another request while we're still waiting 211 * for an ACK, but the peer doesn't know we got interrupted and will 212 * send the ACKs in the same order as the message, but we'd need to 213 * see them in the opposite order ... 214 * 215 * This wouldn't matter *too* much, but some ACKs carry the 216 * current time (for UM_TIMETRAVEL_GET) and getting another 217 * ACK without a time would confuse us a lot! 218 * 219 * The sequence number assignment that happens here lets us 220 * debug such message handling issues more easily. 221 */ 222 block_signals_hard(); 223 os_write_file(time_travel_ext_fd, &msg, sizeof(msg)); 224 225 /* no ACK expected for WAIT in shared memory mode */ 226 if (msg.op == UM_TIMETRAVEL_WAIT && time_travel_shm) 227 goto done; 228 229 while (msg.op != UM_TIMETRAVEL_ACK) 230 time_travel_handle_message(&msg, 231 op == UM_TIMETRAVEL_START ? 232 TTMH_READ_START_ACK : 233 TTMH_READ); 234 235 if (msg.seq != mseq) 236 panic("time-travel: ACK message has different seqno! op=%d, seq=%d != %d time=%lld\n", 237 msg.op, msg.seq, mseq, msg.time); 238 239 if (op == UM_TIMETRAVEL_GET) 240 time_travel_set_time(msg.time); 241 done: 242 unblock_signals_hard(); 243 244 return msg.time; 245 } 246 247 void __time_travel_wait_readable(int fd) 248 { 249 int fds[2] = { fd, time_travel_ext_fd }; 250 int ret; 251 252 if (time_travel_mode != TT_MODE_EXTERNAL) 253 return; 254 255 while ((ret = os_poll(2, fds))) { 256 struct um_timetravel_msg msg; 257 258 if (ret == 1) 259 time_travel_handle_message(&msg, TTMH_READ); 260 } 261 } 262 EXPORT_SYMBOL_GPL(__time_travel_wait_readable); 263 264 static void time_travel_ext_update_request(unsigned long long time) 265 { 266 if (time_travel_mode != TT_MODE_EXTERNAL) 267 return; 268 269 /* asked for exactly this time previously */ 270 if (time_travel_ext_prev_request_valid && 271 time == time_travel_ext_prev_request) 272 return; 273 274 /* 275 * if we're running and are allowed to run past the request 276 * then we don't need to update it either 277 * 278 * Note for shm we ignore FREE_UNTIL messages and leave the pointer 279 * to shared memory, and for non-shm the offset is 0. 280 */ 281 if (!time_travel_ext_waiting && time_travel_ext_free_until && 282 time < (*time_travel_ext_free_until - time_travel_shm_offset)) 283 return; 284 285 time_travel_ext_prev_request = time; 286 time_travel_ext_prev_request_valid = true; 287 288 if (time_travel_shm) { 289 union um_timetravel_schedshm_client *running; 290 291 running = &time_travel_shm->clients[time_travel_shm->running_id]; 292 293 if (running->capa & UM_TIMETRAVEL_SCHEDSHM_CAP_TIME_SHARE) { 294 time_travel_shm_client->flags |= 295 UM_TIMETRAVEL_SCHEDSHM_FLAGS_REQ_RUN; 296 time += time_travel_shm_offset; 297 time_travel_shm_client->req_time = time; 298 if (time < time_travel_shm->free_until) 299 time_travel_shm->free_until = time; 300 return; 301 } 302 } 303 304 time_travel_ext_req(UM_TIMETRAVEL_REQUEST, time); 305 } 306 307 void __time_travel_propagate_time(void) 308 { 309 static unsigned long long last_propagated; 310 311 if (time_travel_shm) { 312 if (time_travel_shm->running_id != time_travel_shm_id) 313 panic("time-travel: setting time while not running\n"); 314 time_travel_shm->current_time = time_travel_time + 315 time_travel_shm_offset; 316 return; 317 } 318 319 if (last_propagated == time_travel_time) 320 return; 321 322 time_travel_ext_req(UM_TIMETRAVEL_UPDATE, time_travel_time); 323 last_propagated = time_travel_time; 324 } 325 EXPORT_SYMBOL_GPL(__time_travel_propagate_time); 326 327 /* returns true if we must do a wait to the simtime device */ 328 static bool time_travel_ext_request(unsigned long long time) 329 { 330 /* 331 * If we received an external sync point ("free until") then we 332 * don't have to request/wait for anything until then, unless 333 * we're already waiting. 334 * 335 * Note for shm we ignore FREE_UNTIL messages and leave the pointer 336 * to shared memory, and for non-shm the offset is 0. 337 */ 338 if (!time_travel_ext_waiting && time_travel_ext_free_until && 339 time < (*time_travel_ext_free_until - time_travel_shm_offset)) 340 return false; 341 342 time_travel_ext_update_request(time); 343 return true; 344 } 345 346 static void time_travel_ext_wait(bool idle) 347 { 348 struct um_timetravel_msg msg = { 349 .op = UM_TIMETRAVEL_ACK, 350 }; 351 352 time_travel_ext_prev_request_valid = false; 353 if (!time_travel_shm) 354 time_travel_ext_free_until = NULL; 355 time_travel_ext_waiting++; 356 357 time_travel_ext_req(UM_TIMETRAVEL_WAIT, -1); 358 359 /* 360 * Here we are deep in the idle loop, so we have to break out of the 361 * kernel abstraction in a sense and implement this in terms of the 362 * UML system waiting on the VQ interrupt while sleeping, when we get 363 * the signal it'll call time_travel_ext_vq_notify_done() completing the 364 * call. 365 */ 366 while (msg.op != UM_TIMETRAVEL_RUN) 367 time_travel_handle_message(&msg, idle ? TTMH_IDLE : TTMH_POLL); 368 369 time_travel_ext_waiting--; 370 371 /* we might request more stuff while polling - reset when we run */ 372 time_travel_ext_prev_request_valid = false; 373 } 374 375 static void time_travel_ext_get_time(void) 376 { 377 if (time_travel_shm) 378 time_travel_set_time(time_travel_shm->current_time - 379 time_travel_shm_offset); 380 else 381 time_travel_ext_req(UM_TIMETRAVEL_GET, -1); 382 } 383 384 static void __time_travel_update_time(unsigned long long ns, bool idle) 385 { 386 if (time_travel_mode == TT_MODE_EXTERNAL && time_travel_ext_request(ns)) 387 time_travel_ext_wait(idle); 388 else 389 time_travel_set_time(ns); 390 } 391 392 static struct time_travel_event *time_travel_first_event(void) 393 { 394 return list_first_entry_or_null(&time_travel_events, 395 struct time_travel_event, 396 list); 397 } 398 399 static void __time_travel_add_event(struct time_travel_event *e, 400 unsigned long long time) 401 { 402 struct time_travel_event *tmp; 403 bool inserted = false; 404 unsigned long flags; 405 406 if (e->pending) 407 return; 408 409 e->pending = true; 410 e->time = time; 411 412 local_irq_save(flags); 413 list_for_each_entry(tmp, &time_travel_events, list) { 414 /* 415 * Add the new entry before one with higher time, 416 * or if they're equal and both on stack, because 417 * in that case we need to unwind the stack in the 418 * right order, and the later event (timer sleep 419 * or such) must be dequeued first. 420 */ 421 if ((tmp->time > e->time) || 422 (tmp->time == e->time && tmp->onstack && e->onstack)) { 423 list_add_tail(&e->list, &tmp->list); 424 inserted = true; 425 break; 426 } 427 } 428 429 if (!inserted) 430 list_add_tail(&e->list, &time_travel_events); 431 432 tmp = time_travel_first_event(); 433 time_travel_ext_update_request(tmp->time); 434 time_travel_next_event = tmp->time; 435 local_irq_restore(flags); 436 } 437 438 static void time_travel_add_event(struct time_travel_event *e, 439 unsigned long long time) 440 { 441 if (WARN_ON(!e->fn)) 442 return; 443 444 __time_travel_add_event(e, time); 445 } 446 447 void time_travel_add_event_rel(struct time_travel_event *e, 448 unsigned long long delay_ns) 449 { 450 time_travel_add_event(e, time_travel_time + delay_ns); 451 } 452 453 static void time_travel_periodic_timer(struct time_travel_event *e) 454 { 455 time_travel_add_event(&time_travel_timer_event, 456 time_travel_time + time_travel_timer_interval); 457 458 /* clock tick; decrease extra jiffies by keeping sched_clock constant */ 459 if (tt_extra_sched_jiffies > 0) 460 tt_extra_sched_jiffies -= 1; 461 462 deliver_alarm(); 463 } 464 465 void deliver_time_travel_irqs(void) 466 { 467 struct time_travel_event *e; 468 unsigned long flags; 469 470 /* 471 * Don't do anything for most cases. Note that because here we have 472 * to disable IRQs (and re-enable later) we'll actually recurse at 473 * the end of the function, so this is strictly necessary. 474 */ 475 if (likely(list_empty(&time_travel_irqs))) 476 return; 477 478 local_irq_save(flags); 479 irq_enter(); 480 while ((e = list_first_entry_or_null(&time_travel_irqs, 481 struct time_travel_event, 482 list))) { 483 list_del(&e->list); 484 e->pending = false; 485 e->fn(e); 486 } 487 irq_exit(); 488 local_irq_restore(flags); 489 } 490 491 static void time_travel_deliver_event(struct time_travel_event *e) 492 { 493 if (e == &time_travel_timer_event) { 494 /* 495 * deliver_alarm() does the irq_enter/irq_exit 496 * by itself, so must handle it specially here 497 */ 498 e->fn(e); 499 } else if (irqs_disabled()) { 500 list_add_tail(&e->list, &time_travel_irqs); 501 /* 502 * set pending again, it was set to false when the 503 * event was deleted from the original list, but 504 * now it's still pending until we deliver the IRQ. 505 */ 506 e->pending = true; 507 } else { 508 unsigned long flags; 509 510 local_irq_save(flags); 511 irq_enter(); 512 e->fn(e); 513 irq_exit(); 514 local_irq_restore(flags); 515 } 516 } 517 518 bool time_travel_del_event(struct time_travel_event *e) 519 { 520 unsigned long flags; 521 522 if (!e->pending) 523 return false; 524 local_irq_save(flags); 525 list_del(&e->list); 526 e->pending = false; 527 local_irq_restore(flags); 528 return true; 529 } 530 531 static void time_travel_update_time(unsigned long long next, bool idle) 532 { 533 struct time_travel_event ne = { 534 .onstack = true, 535 }; 536 struct time_travel_event *e; 537 bool finished = idle; 538 539 /* add it without a handler - we deal with that specifically below */ 540 __time_travel_add_event(&ne, next); 541 542 do { 543 e = time_travel_first_event(); 544 545 BUG_ON(!e); 546 __time_travel_update_time(e->time, idle); 547 548 /* new events may have been inserted while we were waiting */ 549 if (e == time_travel_first_event()) { 550 BUG_ON(!time_travel_del_event(e)); 551 BUG_ON(time_travel_time != e->time); 552 553 if (e == &ne) { 554 finished = true; 555 } else { 556 if (e->onstack) 557 panic("On-stack event dequeued outside of the stack! time=%lld, event time=%lld, event=%pS\n", 558 time_travel_time, e->time, e); 559 time_travel_deliver_event(e); 560 } 561 } 562 563 e = time_travel_first_event(); 564 if (e) 565 time_travel_ext_update_request(e->time); 566 } while (ne.pending && !finished); 567 568 time_travel_del_event(&ne); 569 } 570 571 static void time_travel_update_time_rel(unsigned long long offs) 572 { 573 unsigned long flags; 574 575 /* 576 * Disable interrupts before calculating the new time so 577 * that a real timer interrupt (signal) can't happen at 578 * a bad time e.g. after we read time_travel_time but 579 * before we've completed updating the time. 580 */ 581 local_irq_save(flags); 582 time_travel_update_time(time_travel_time + offs, false); 583 local_irq_restore(flags); 584 } 585 586 void time_travel_ndelay(unsigned long nsec) 587 { 588 /* 589 * Not strictly needed to use _rel() version since this is 590 * only used in INFCPU/EXT modes, but it doesn't hurt and 591 * is more readable too. 592 */ 593 time_travel_update_time_rel(nsec); 594 } 595 EXPORT_SYMBOL(time_travel_ndelay); 596 597 void time_travel_add_irq_event(struct time_travel_event *e) 598 { 599 BUG_ON(time_travel_mode != TT_MODE_EXTERNAL); 600 601 time_travel_ext_get_time(); 602 /* 603 * We could model interrupt latency here, for now just 604 * don't have any latency at all and request the exact 605 * same time (again) to run the interrupt... 606 */ 607 time_travel_add_event(e, time_travel_time); 608 } 609 EXPORT_SYMBOL_GPL(time_travel_add_irq_event); 610 611 static void time_travel_oneshot_timer(struct time_travel_event *e) 612 { 613 /* clock tick; decrease extra jiffies by keeping sched_clock constant */ 614 if (tt_extra_sched_jiffies > 0) 615 tt_extra_sched_jiffies -= 1; 616 617 deliver_alarm(); 618 } 619 620 void time_travel_sleep(void) 621 { 622 /* 623 * Wait "forever" (using S64_MAX because there are some potential 624 * wrapping issues, especially with the current TT_MODE_EXTERNAL 625 * controller application. 626 */ 627 unsigned long long next = S64_MAX; 628 int cpu = raw_smp_processor_id(); 629 630 if (time_travel_mode == TT_MODE_BASIC) 631 os_timer_disable(cpu); 632 633 time_travel_update_time(next, true); 634 635 if (time_travel_mode == TT_MODE_BASIC && 636 time_travel_timer_event.pending) { 637 if (time_travel_timer_event.fn == time_travel_periodic_timer) { 638 /* 639 * This is somewhat wrong - we should get the first 640 * one sooner like the os_timer_one_shot() below... 641 */ 642 os_timer_set_interval(cpu, time_travel_timer_interval); 643 } else { 644 os_timer_one_shot(cpu, time_travel_timer_event.time - next); 645 } 646 } 647 } 648 649 static void time_travel_handle_real_alarm(void) 650 { 651 time_travel_set_time(time_travel_next_event); 652 653 time_travel_del_event(&time_travel_timer_event); 654 655 if (time_travel_timer_event.fn == time_travel_periodic_timer) 656 time_travel_add_event(&time_travel_timer_event, 657 time_travel_time + 658 time_travel_timer_interval); 659 } 660 661 static void time_travel_set_interval(unsigned long long interval) 662 { 663 time_travel_timer_interval = interval; 664 } 665 666 static int time_travel_connect_external(const char *socket) 667 { 668 const char *sep; 669 unsigned long long id = (unsigned long long)-1; 670 int rc; 671 672 if ((sep = strchr(socket, ':'))) { 673 char buf[25] = {}; 674 if (sep - socket > sizeof(buf) - 1) 675 goto invalid_number; 676 677 memcpy(buf, socket, sep - socket); 678 if (kstrtoull(buf, 0, &id)) { 679 invalid_number: 680 panic("time-travel: invalid external ID in string '%s'\n", 681 socket); 682 return -EINVAL; 683 } 684 685 socket = sep + 1; 686 } 687 688 rc = os_connect_socket(socket); 689 if (rc < 0) { 690 panic("time-travel: failed to connect to external socket %s\n", 691 socket); 692 return rc; 693 } 694 695 time_travel_ext_fd = rc; 696 697 time_travel_ext_req(UM_TIMETRAVEL_START, id); 698 699 return 1; 700 } 701 702 static void time_travel_set_start(void) 703 { 704 if (time_travel_start_set) 705 return; 706 707 switch (time_travel_mode) { 708 case TT_MODE_EXTERNAL: 709 time_travel_start = time_travel_ext_req(UM_TIMETRAVEL_GET_TOD, -1); 710 /* controller gave us the *current* time, so adjust by that */ 711 time_travel_ext_get_time(); 712 time_travel_start -= time_travel_time; 713 break; 714 case TT_MODE_INFCPU: 715 case TT_MODE_BASIC: 716 if (!time_travel_start_set) 717 time_travel_start = os_persistent_clock_emulation(); 718 break; 719 case TT_MODE_OFF: 720 /* we just read the host clock with os_persistent_clock_emulation() */ 721 break; 722 } 723 724 time_travel_start_set = true; 725 } 726 #else /* CONFIG_UML_TIME_TRAVEL_SUPPORT */ 727 #define time_travel_start_set 0 728 #define time_travel_start 0 729 #define time_travel_time 0 730 #define time_travel_ext_waiting 0 731 732 static inline void time_travel_update_time(unsigned long long ns, bool idle) 733 { 734 } 735 736 static inline void time_travel_update_time_rel(unsigned long long offs) 737 { 738 } 739 740 static inline void time_travel_handle_real_alarm(void) 741 { 742 } 743 744 static void time_travel_set_interval(unsigned long long interval) 745 { 746 } 747 748 static inline void time_travel_set_start(void) 749 { 750 } 751 752 /* fail link if this actually gets used */ 753 extern u64 time_travel_ext_req(u32 op, u64 time); 754 755 /* these are empty macros so the struct/fn need not exist */ 756 #define time_travel_add_event(e, time) do { } while (0) 757 /* externally not usable - redefine here so we can */ 758 #undef time_travel_del_event 759 #define time_travel_del_event(e) do { } while (0) 760 #endif 761 762 static struct clock_event_device timer_clockevent[NR_CPUS]; 763 764 void timer_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs) 765 { 766 unsigned long flags; 767 768 /* 769 * In basic time-travel mode we still get real interrupts 770 * (signals) but since we don't read time from the OS, we 771 * must update the simulated time here to the expiry when 772 * we get a signal. 773 * This is not the case in inf-cpu mode, since there we 774 * never get any real signals from the OS. 775 */ 776 if (time_travel_mode == TT_MODE_BASIC) 777 time_travel_handle_real_alarm(); 778 779 local_irq_save(flags); 780 do_IRQ(TIMER_IRQ, regs); 781 local_irq_restore(flags); 782 } 783 784 static int itimer_shutdown(struct clock_event_device *evt) 785 { 786 int cpu = evt - &timer_clockevent[0]; 787 788 if (time_travel_mode != TT_MODE_OFF) 789 time_travel_del_event(&time_travel_timer_event); 790 791 if (time_travel_mode != TT_MODE_INFCPU && 792 time_travel_mode != TT_MODE_EXTERNAL) 793 os_timer_disable(cpu); 794 795 return 0; 796 } 797 798 static int itimer_set_periodic(struct clock_event_device *evt) 799 { 800 unsigned long long interval = NSEC_PER_SEC / HZ; 801 int cpu = evt - &timer_clockevent[0]; 802 803 if (time_travel_mode != TT_MODE_OFF) { 804 time_travel_del_event(&time_travel_timer_event); 805 time_travel_set_event_fn(&time_travel_timer_event, 806 time_travel_periodic_timer); 807 time_travel_set_interval(interval); 808 time_travel_add_event(&time_travel_timer_event, 809 time_travel_time + interval); 810 } 811 812 if (time_travel_mode != TT_MODE_INFCPU && 813 time_travel_mode != TT_MODE_EXTERNAL) 814 os_timer_set_interval(cpu, interval); 815 816 return 0; 817 } 818 819 static int itimer_next_event(unsigned long delta, 820 struct clock_event_device *evt) 821 { 822 delta += 1; 823 824 if (time_travel_mode != TT_MODE_OFF) { 825 time_travel_del_event(&time_travel_timer_event); 826 time_travel_set_event_fn(&time_travel_timer_event, 827 time_travel_oneshot_timer); 828 time_travel_add_event(&time_travel_timer_event, 829 time_travel_time + delta); 830 } 831 832 if (time_travel_mode != TT_MODE_INFCPU && 833 time_travel_mode != TT_MODE_EXTERNAL) 834 return os_timer_one_shot(raw_smp_processor_id(), delta); 835 836 return 0; 837 } 838 839 static int itimer_one_shot(struct clock_event_device *evt) 840 { 841 return itimer_next_event(0, evt); 842 } 843 844 static struct clock_event_device _timer_clockevent = { 845 .name = "posix-timer", 846 .rating = 250, 847 .features = CLOCK_EVT_FEAT_PERIODIC | 848 CLOCK_EVT_FEAT_ONESHOT, 849 .set_state_shutdown = itimer_shutdown, 850 .set_state_periodic = itimer_set_periodic, 851 .set_state_oneshot = itimer_one_shot, 852 .set_next_event = itimer_next_event, 853 .shift = 0, 854 .max_delta_ns = 0xffffffff, 855 .max_delta_ticks = 0xffffffff, 856 .min_delta_ns = TIMER_MIN_DELTA, 857 .min_delta_ticks = TIMER_MIN_DELTA, // microsecond resolution should be enough for anyone, same as 640K RAM 858 .irq = 0, 859 .mult = 1, 860 }; 861 862 static irqreturn_t um_timer(int irq, void *dev) 863 { 864 int cpu = raw_smp_processor_id(); 865 struct clock_event_device *evt = &timer_clockevent[cpu]; 866 867 /* 868 * Interrupt the (possibly) running userspace process, technically this 869 * should only happen if userspace is currently executing. 870 * With infinite CPU time-travel, we can only get here when userspace 871 * is not executing. Do not notify there and avoid spurious scheduling. 872 */ 873 if (time_travel_mode != TT_MODE_INFCPU && 874 time_travel_mode != TT_MODE_EXTERNAL && 875 get_current()->mm) 876 os_alarm_process(get_current()->mm->context.id.pid); 877 878 evt->event_handler(evt); 879 880 return IRQ_HANDLED; 881 } 882 883 static u64 timer_read(struct clocksource *cs) 884 { 885 if (time_travel_mode != TT_MODE_OFF) { 886 /* 887 * We make reading the timer cost a bit so that we don't get 888 * stuck in loops that expect time to move more than the 889 * exact requested sleep amount, e.g. python's socket server, 890 * see https://bugs.python.org/issue37026. 891 * 892 * However, don't do that when we're in interrupt or such as 893 * then we might recurse into our own processing, and get to 894 * even more waiting, and that's not good - it messes up the 895 * "what do I do next" and onstack event we use to know when 896 * to return from time_travel_update_time(). 897 */ 898 if (!irqs_disabled() && !in_interrupt() && !in_softirq() && 899 !time_travel_ext_waiting) 900 time_travel_update_time_rel(TIMER_MULTIPLIER); 901 return time_travel_time / TIMER_MULTIPLIER; 902 } 903 904 return os_nsecs() / TIMER_MULTIPLIER; 905 } 906 907 static struct clocksource timer_clocksource = { 908 .name = "timer", 909 .rating = 300, 910 .read = timer_read, 911 .mask = CLOCKSOURCE_MASK(64), 912 .flags = CLOCK_SOURCE_IS_CONTINUOUS, 913 }; 914 915 int um_setup_timer(void) 916 { 917 int cpu = raw_smp_processor_id(); 918 struct clock_event_device *evt = &timer_clockevent[cpu]; 919 int err; 920 921 err = os_timer_create(); 922 if (err) 923 return err; 924 925 memcpy(evt, &_timer_clockevent, sizeof(*evt)); 926 evt->cpumask = cpumask_of(cpu); 927 clockevents_register_device(evt); 928 929 return 0; 930 } 931 932 static void __init um_timer_init(void) 933 { 934 int err; 935 936 err = request_irq(TIMER_IRQ, um_timer, IRQF_TIMER, "hr timer", NULL); 937 if (err != 0) 938 printk(KERN_ERR "register_timer : request_irq failed - " 939 "errno = %d\n", -err); 940 941 err = um_setup_timer(); 942 if (err) { 943 printk(KERN_ERR "creation of timer failed - errno = %d\n", -err); 944 return; 945 } 946 947 err = clocksource_register_hz(&timer_clocksource, NSEC_PER_SEC/TIMER_MULTIPLIER); 948 if (err) { 949 printk(KERN_ERR "clocksource_register_hz returned %d\n", err); 950 return; 951 } 952 } 953 954 void read_persistent_clock64(struct timespec64 *ts) 955 { 956 long long nsecs; 957 958 time_travel_set_start(); 959 960 if (time_travel_mode != TT_MODE_OFF) 961 nsecs = time_travel_start + time_travel_time; 962 else 963 nsecs = os_persistent_clock_emulation(); 964 965 set_normalized_timespec64(ts, nsecs / NSEC_PER_SEC, 966 nsecs % NSEC_PER_SEC); 967 } 968 969 void __init time_init(void) 970 { 971 timer_set_signal_handler(); 972 late_time_init = um_timer_init; 973 } 974 975 #ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT 976 unsigned long calibrate_delay_is_known(void) 977 { 978 if (time_travel_mode == TT_MODE_INFCPU || 979 time_travel_mode == TT_MODE_EXTERNAL) 980 return 1; 981 return 0; 982 } 983 984 static int setup_time_travel(char *str) 985 { 986 if (strcmp(str, "=inf-cpu") == 0) { 987 time_travel_mode = TT_MODE_INFCPU; 988 _timer_clockevent.name = "time-travel-timer-infcpu"; 989 timer_clocksource.name = "time-travel-clock"; 990 return 1; 991 } 992 993 if (strncmp(str, "=ext:", 5) == 0) { 994 time_travel_mode = TT_MODE_EXTERNAL; 995 _timer_clockevent.name = "time-travel-timer-external"; 996 timer_clocksource.name = "time-travel-clock-external"; 997 return time_travel_connect_external(str + 5); 998 } 999 1000 if (!*str) { 1001 time_travel_mode = TT_MODE_BASIC; 1002 _timer_clockevent.name = "time-travel-timer"; 1003 timer_clocksource.name = "time-travel-clock"; 1004 return 1; 1005 } 1006 1007 return -EINVAL; 1008 } 1009 1010 __setup("time-travel", setup_time_travel); 1011 __uml_help(setup_time_travel, 1012 "time-travel\n" 1013 " This option just enables basic time travel mode, in which the clock/timers\n" 1014 " inside the UML instance skip forward when there's nothing to do, rather than\n" 1015 " waiting for real time to elapse. However, instance CPU speed is limited by\n" 1016 " the real CPU speed, so e.g. a 10ms timer will always fire after ~10ms wall\n" 1017 " clock (but quicker when there's nothing to do).\n" 1018 "\n" 1019 "time-travel=inf-cpu\n" 1020 " This enables time travel mode with infinite processing power, in which there\n" 1021 " are no wall clock timers, and any CPU processing happens - as seen from the\n" 1022 " guest - instantly. This can be useful for accurate simulation regardless of\n" 1023 " debug overhead, physical CPU speed, etc. but is somewhat dangerous as it can\n" 1024 " easily lead to getting stuck (e.g. if anything in the system busy loops).\n" 1025 "\n" 1026 "time-travel=ext:[ID:]/path/to/socket\n" 1027 " This enables time travel mode similar to =inf-cpu, except the system will\n" 1028 " use the given socket to coordinate with a central scheduler, in order to\n" 1029 " have more than one system simultaneously be on simulated time. The virtio\n" 1030 " driver code in UML knows about this so you can also simulate networks and\n" 1031 " devices using it, assuming the device has the right capabilities.\n" 1032 " The optional ID is a 64-bit integer that's sent to the central scheduler.\n\n"); 1033 1034 static int setup_time_travel_start(char *str) 1035 { 1036 int err; 1037 1038 err = kstrtoull(str, 0, &time_travel_start); 1039 if (err) 1040 return err; 1041 1042 time_travel_start_set = 1; 1043 return 1; 1044 } 1045 1046 __setup("time-travel-start=", setup_time_travel_start); 1047 __uml_help(setup_time_travel_start, 1048 "time-travel-start=<nanoseconds>\n" 1049 " Configure the UML instance's wall clock to start at this value rather than\n" 1050 " the host's wall clock at the time of UML boot.\n\n"); 1051 1052 static struct kobject *bc_time_kobject; 1053 1054 static ssize_t bc_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) 1055 { 1056 return sprintf(buf, "0x%llx", bc_message); 1057 } 1058 1059 static ssize_t bc_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) 1060 { 1061 int ret; 1062 u64 user_bc_message; 1063 1064 ret = kstrtou64(buf, 0, &user_bc_message); 1065 if (ret) 1066 return ret; 1067 1068 bc_message = user_bc_message; 1069 1070 time_travel_ext_req(UM_TIMETRAVEL_BROADCAST, bc_message); 1071 pr_info("um: time: sent broadcast message: 0x%llx\n", bc_message); 1072 return count; 1073 } 1074 1075 static struct kobj_attribute bc_attribute = __ATTR(bc-message, 0660, bc_show, bc_store); 1076 1077 static int __init um_bc_start(void) 1078 { 1079 if (time_travel_mode != TT_MODE_EXTERNAL) 1080 return 0; 1081 1082 bc_time_kobject = kobject_create_and_add("um-ext-time", kernel_kobj); 1083 if (!bc_time_kobject) 1084 return 0; 1085 1086 if (sysfs_create_file(bc_time_kobject, &bc_attribute.attr)) 1087 pr_debug("failed to create the bc file in /sys/kernel/um_time"); 1088 1089 return 0; 1090 } 1091 late_initcall(um_bc_start); 1092 #endif 1093