1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2017 - Cambridge Greys Ltd 4 * Copyright (C) 2011 - 2014 Cisco Systems Inc 5 * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) 6 * Derived (i.e. mostly copied) from arch/i386/kernel/irq.c: 7 * Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar 8 */ 9 10 #include <linux/cpumask.h> 11 #include <linux/hardirq.h> 12 #include <linux/interrupt.h> 13 #include <linux/kernel_stat.h> 14 #include <linux/module.h> 15 #include <linux/sched.h> 16 #include <linux/seq_file.h> 17 #include <linux/slab.h> 18 #include <as-layout.h> 19 #include <kern_util.h> 20 #include <os.h> 21 #include <irq_user.h> 22 #include <irq_kern.h> 23 #include <linux/time-internal.h> 24 25 26 /* When epoll triggers we do not know why it did so 27 * we can also have different IRQs for read and write. 28 * This is why we keep a small irq_reg array for each fd - 29 * one entry per IRQ type 30 */ 31 struct irq_reg { 32 void *id; 33 int irq; 34 /* it's cheaper to store this than to query it */ 35 int events; 36 bool active; 37 bool pending; 38 bool wakeup; 39 #ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT 40 bool pending_event; 41 void (*timetravel_handler)(int, int, void *, 42 struct time_travel_event *); 43 struct time_travel_event event; 44 #endif 45 }; 46 47 struct irq_entry { 48 struct list_head list; 49 int fd; 50 struct irq_reg reg[NUM_IRQ_TYPES]; 51 bool suspended; 52 bool sigio_workaround; 53 }; 54 55 static DEFINE_SPINLOCK(irq_lock); 56 static LIST_HEAD(active_fds); 57 static DECLARE_BITMAP(irqs_allocated, UM_LAST_SIGNAL_IRQ); 58 static bool irqs_suspended; 59 #ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT 60 static bool irqs_pending; 61 #endif 62 63 static void irq_io_loop(struct irq_reg *irq, struct uml_pt_regs *regs) 64 { 65 /* 66 * irq->active guards against reentry 67 * irq->pending accumulates pending requests 68 * if pending is raised the irq_handler is re-run 69 * until pending is cleared 70 */ 71 if (irq->active) { 72 irq->active = false; 73 74 do { 75 irq->pending = false; 76 do_IRQ(irq->irq, regs); 77 } while (irq->pending); 78 79 irq->active = true; 80 } else { 81 irq->pending = true; 82 } 83 } 84 85 #ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT 86 static void irq_event_handler(struct time_travel_event *ev) 87 { 88 struct irq_reg *reg = container_of(ev, struct irq_reg, event); 89 90 /* do nothing if suspended; just cause a wakeup and mark as pending */ 91 if (irqs_suspended) { 92 irqs_pending = true; 93 reg->pending_event = true; 94 return; 95 } 96 97 generic_handle_irq(reg->irq); 98 } 99 100 static bool irq_do_timetravel_handler(struct irq_entry *entry, 101 enum um_irq_type t) 102 { 103 struct irq_reg *reg = &entry->reg[t]; 104 105 if (!reg->timetravel_handler) 106 return false; 107 108 /* 109 * Handle all messages - we might get multiple even while 110 * interrupts are already suspended, due to suspend order 111 * etc. Note that time_travel_add_irq_event() will not add 112 * an event twice, if it's pending already "first wins". 113 */ 114 reg->timetravel_handler(reg->irq, entry->fd, reg->id, ®->event); 115 116 if (!reg->event.pending) 117 return false; 118 119 return true; 120 } 121 122 static void irq_do_pending_events(bool timetravel_handlers_only) 123 { 124 struct irq_entry *entry; 125 126 if (!irqs_pending || timetravel_handlers_only) 127 return; 128 129 irqs_pending = false; 130 131 list_for_each_entry(entry, &active_fds, list) { 132 enum um_irq_type t; 133 134 for (t = 0; t < NUM_IRQ_TYPES; t++) { 135 struct irq_reg *reg = &entry->reg[t]; 136 137 /* 138 * Any timetravel_handler was invoked already, just 139 * directly run the IRQ. 140 */ 141 if (reg->pending_event) { 142 irq_enter(); 143 generic_handle_irq(reg->irq); 144 irq_exit(); 145 reg->pending_event = false; 146 } 147 } 148 } 149 } 150 #else 151 static bool irq_do_timetravel_handler(struct irq_entry *entry, 152 enum um_irq_type t) 153 { 154 return false; 155 } 156 157 static void irq_do_pending_events(bool timetravel_handlers_only) 158 { 159 } 160 #endif 161 162 static void sigio_reg_handler(int idx, struct irq_entry *entry, enum um_irq_type t, 163 struct uml_pt_regs *regs, 164 bool timetravel_handlers_only) 165 { 166 struct irq_reg *reg = &entry->reg[t]; 167 168 if (!reg->events) 169 return; 170 171 if (os_epoll_triggered(idx, reg->events) <= 0) 172 return; 173 174 if (irq_do_timetravel_handler(entry, t)) 175 return; 176 177 /* 178 * If we're called to only run time-travel handlers then don't 179 * actually proceed but mark sigio as pending (if applicable). 180 * For suspend/resume, timetravel_handlers_only may be true 181 * despite time-travel not being configured and used. 182 */ 183 if (timetravel_handlers_only) { 184 #ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT 185 reg->pending_event = true; 186 irqs_pending = true; 187 mark_sigio_pending(); 188 #endif 189 return; 190 } 191 192 irq_io_loop(reg, regs); 193 } 194 195 static void _sigio_handler(struct uml_pt_regs *regs, 196 bool timetravel_handlers_only) 197 { 198 struct irq_entry *irq_entry; 199 int n, i; 200 201 if (timetravel_handlers_only && !um_irq_timetravel_handler_used()) 202 return; 203 204 /* Flush out pending events that were ignored due to time-travel. */ 205 if (!irqs_suspended) 206 irq_do_pending_events(timetravel_handlers_only); 207 208 while (1) { 209 /* This is now lockless - epoll keeps back-referencesto the irqs 210 * which have trigger it so there is no need to walk the irq 211 * list and lock it every time. We avoid locking by turning off 212 * IO for a specific fd by executing os_del_epoll_fd(fd) before 213 * we do any changes to the actual data structures 214 */ 215 n = os_waiting_for_events_epoll(); 216 217 if (n <= 0) { 218 if (n == -EINTR) 219 continue; 220 else 221 break; 222 } 223 224 for (i = 0; i < n ; i++) { 225 enum um_irq_type t; 226 227 irq_entry = os_epoll_get_data_pointer(i); 228 229 for (t = 0; t < NUM_IRQ_TYPES; t++) 230 sigio_reg_handler(i, irq_entry, t, regs, 231 timetravel_handlers_only); 232 } 233 } 234 235 if (!timetravel_handlers_only) 236 free_irqs(); 237 } 238 239 void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs) 240 { 241 _sigio_handler(regs, irqs_suspended); 242 } 243 244 static struct irq_entry *get_irq_entry_by_fd(int fd) 245 { 246 struct irq_entry *walk; 247 248 lockdep_assert_held(&irq_lock); 249 250 list_for_each_entry(walk, &active_fds, list) { 251 if (walk->fd == fd) 252 return walk; 253 } 254 255 return NULL; 256 } 257 258 static void free_irq_entry(struct irq_entry *to_free, bool remove) 259 { 260 if (!to_free) 261 return; 262 263 if (remove) 264 os_del_epoll_fd(to_free->fd); 265 list_del(&to_free->list); 266 kfree(to_free); 267 } 268 269 static bool update_irq_entry(struct irq_entry *entry) 270 { 271 enum um_irq_type i; 272 int events = 0; 273 274 for (i = 0; i < NUM_IRQ_TYPES; i++) 275 events |= entry->reg[i].events; 276 277 if (events) { 278 /* will modify (instead of add) if needed */ 279 os_add_epoll_fd(events, entry->fd, entry); 280 return true; 281 } 282 283 os_del_epoll_fd(entry->fd); 284 return false; 285 } 286 287 static void update_or_free_irq_entry(struct irq_entry *entry) 288 { 289 if (!update_irq_entry(entry)) 290 free_irq_entry(entry, false); 291 } 292 293 static int activate_fd(int irq, int fd, enum um_irq_type type, void *dev_id, 294 void (*timetravel_handler)(int, int, void *, 295 struct time_travel_event *)) 296 { 297 struct irq_entry *irq_entry; 298 int err, events = os_event_mask(type); 299 unsigned long flags; 300 301 err = os_set_fd_async(fd); 302 if (err < 0) 303 goto out; 304 305 spin_lock_irqsave(&irq_lock, flags); 306 irq_entry = get_irq_entry_by_fd(fd); 307 if (irq_entry) { 308 /* cannot register the same FD twice with the same type */ 309 if (WARN_ON(irq_entry->reg[type].events)) { 310 err = -EALREADY; 311 goto out_unlock; 312 } 313 314 /* temporarily disable to avoid IRQ-side locking */ 315 os_del_epoll_fd(fd); 316 } else { 317 irq_entry = kzalloc(sizeof(*irq_entry), GFP_ATOMIC); 318 if (!irq_entry) { 319 err = -ENOMEM; 320 goto out_unlock; 321 } 322 irq_entry->fd = fd; 323 list_add_tail(&irq_entry->list, &active_fds); 324 maybe_sigio_broken(fd); 325 } 326 327 irq_entry->reg[type].id = dev_id; 328 irq_entry->reg[type].irq = irq; 329 irq_entry->reg[type].active = true; 330 irq_entry->reg[type].events = events; 331 332 #ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT 333 if (um_irq_timetravel_handler_used()) { 334 irq_entry->reg[type].timetravel_handler = timetravel_handler; 335 irq_entry->reg[type].event.fn = irq_event_handler; 336 } 337 #endif 338 339 WARN_ON(!update_irq_entry(irq_entry)); 340 spin_unlock_irqrestore(&irq_lock, flags); 341 342 return 0; 343 out_unlock: 344 spin_unlock_irqrestore(&irq_lock, flags); 345 out: 346 return err; 347 } 348 349 /* 350 * Remove the entry or entries for a specific FD, if you 351 * don't want to remove all the possible entries then use 352 * um_free_irq() or deactivate_fd() instead. 353 */ 354 void free_irq_by_fd(int fd) 355 { 356 struct irq_entry *to_free; 357 unsigned long flags; 358 359 spin_lock_irqsave(&irq_lock, flags); 360 to_free = get_irq_entry_by_fd(fd); 361 free_irq_entry(to_free, true); 362 spin_unlock_irqrestore(&irq_lock, flags); 363 } 364 EXPORT_SYMBOL(free_irq_by_fd); 365 366 static void free_irq_by_irq_and_dev(unsigned int irq, void *dev) 367 { 368 struct irq_entry *entry; 369 unsigned long flags; 370 371 spin_lock_irqsave(&irq_lock, flags); 372 list_for_each_entry(entry, &active_fds, list) { 373 enum um_irq_type i; 374 375 for (i = 0; i < NUM_IRQ_TYPES; i++) { 376 struct irq_reg *reg = &entry->reg[i]; 377 378 if (!reg->events) 379 continue; 380 if (reg->irq != irq) 381 continue; 382 if (reg->id != dev) 383 continue; 384 385 os_del_epoll_fd(entry->fd); 386 reg->events = 0; 387 update_or_free_irq_entry(entry); 388 goto out; 389 } 390 } 391 out: 392 spin_unlock_irqrestore(&irq_lock, flags); 393 } 394 395 void deactivate_fd(int fd, int irqnum) 396 { 397 struct irq_entry *entry; 398 unsigned long flags; 399 enum um_irq_type i; 400 401 os_del_epoll_fd(fd); 402 403 spin_lock_irqsave(&irq_lock, flags); 404 entry = get_irq_entry_by_fd(fd); 405 if (!entry) 406 goto out; 407 408 for (i = 0; i < NUM_IRQ_TYPES; i++) { 409 if (!entry->reg[i].events) 410 continue; 411 if (entry->reg[i].irq == irqnum) 412 entry->reg[i].events = 0; 413 } 414 415 update_or_free_irq_entry(entry); 416 out: 417 spin_unlock_irqrestore(&irq_lock, flags); 418 419 ignore_sigio_fd(fd); 420 } 421 EXPORT_SYMBOL(deactivate_fd); 422 423 /* 424 * Called just before shutdown in order to provide a clean exec 425 * environment in case the system is rebooting. No locking because 426 * that would cause a pointless shutdown hang if something hadn't 427 * released the lock. 428 */ 429 int deactivate_all_fds(void) 430 { 431 struct irq_entry *entry; 432 433 /* Stop IO. The IRQ loop has no lock so this is our 434 * only way of making sure we are safe to dispose 435 * of all IRQ handlers 436 */ 437 os_set_ioignore(); 438 439 /* we can no longer call kfree() here so just deactivate */ 440 list_for_each_entry(entry, &active_fds, list) 441 os_del_epoll_fd(entry->fd); 442 os_close_epoll_fd(); 443 return 0; 444 } 445 446 /* 447 * do_IRQ handles all normal device IRQs (the special 448 * SMP cross-CPU interrupts have their own specific 449 * handlers). 450 */ 451 unsigned int do_IRQ(int irq, struct uml_pt_regs *regs) 452 { 453 struct pt_regs *old_regs = set_irq_regs((struct pt_regs *)regs); 454 irq_enter(); 455 generic_handle_irq(irq); 456 irq_exit(); 457 set_irq_regs(old_regs); 458 return 1; 459 } 460 461 void um_free_irq(int irq, void *dev) 462 { 463 if (WARN(irq < 0 || irq > UM_LAST_SIGNAL_IRQ, 464 "freeing invalid irq %d", irq)) 465 return; 466 467 free_irq_by_irq_and_dev(irq, dev); 468 free_irq(irq, dev); 469 clear_bit(irq, irqs_allocated); 470 } 471 EXPORT_SYMBOL(um_free_irq); 472 473 static int 474 _um_request_irq(int irq, int fd, enum um_irq_type type, 475 irq_handler_t handler, unsigned long irqflags, 476 const char *devname, void *dev_id, 477 void (*timetravel_handler)(int, int, void *, 478 struct time_travel_event *)) 479 { 480 int err; 481 482 if (irq == UM_IRQ_ALLOC) { 483 int i; 484 485 for (i = UM_FIRST_DYN_IRQ; i < NR_IRQS; i++) { 486 if (!test_and_set_bit(i, irqs_allocated)) { 487 irq = i; 488 break; 489 } 490 } 491 } 492 493 if (irq < 0) 494 return -ENOSPC; 495 496 if (fd != -1) { 497 err = activate_fd(irq, fd, type, dev_id, timetravel_handler); 498 if (err) 499 goto error; 500 } 501 502 err = request_irq(irq, handler, irqflags, devname, dev_id); 503 if (err < 0) 504 goto error; 505 506 return irq; 507 error: 508 clear_bit(irq, irqs_allocated); 509 return err; 510 } 511 512 int um_request_irq(int irq, int fd, enum um_irq_type type, 513 irq_handler_t handler, unsigned long irqflags, 514 const char *devname, void *dev_id) 515 { 516 return _um_request_irq(irq, fd, type, handler, irqflags, 517 devname, dev_id, NULL); 518 } 519 EXPORT_SYMBOL(um_request_irq); 520 521 #ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT 522 int um_request_irq_tt(int irq, int fd, enum um_irq_type type, 523 irq_handler_t handler, unsigned long irqflags, 524 const char *devname, void *dev_id, 525 void (*timetravel_handler)(int, int, void *, 526 struct time_travel_event *)) 527 { 528 return _um_request_irq(irq, fd, type, handler, irqflags, 529 devname, dev_id, timetravel_handler); 530 } 531 EXPORT_SYMBOL(um_request_irq_tt); 532 533 void sigio_run_timetravel_handlers(void) 534 { 535 _sigio_handler(NULL, true); 536 } 537 #endif 538 539 #ifdef CONFIG_PM_SLEEP 540 void um_irqs_suspend(void) 541 { 542 struct irq_entry *entry; 543 unsigned long flags; 544 545 irqs_suspended = true; 546 547 spin_lock_irqsave(&irq_lock, flags); 548 list_for_each_entry(entry, &active_fds, list) { 549 enum um_irq_type t; 550 bool clear = true; 551 552 for (t = 0; t < NUM_IRQ_TYPES; t++) { 553 if (!entry->reg[t].events) 554 continue; 555 556 /* 557 * For the SIGIO_WRITE_IRQ, which is used to handle the 558 * SIGIO workaround thread, we need special handling: 559 * enable wake for it itself, but below we tell it about 560 * any FDs that should be suspended. 561 */ 562 if (entry->reg[t].wakeup || 563 entry->reg[t].irq == SIGIO_WRITE_IRQ 564 #ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT 565 || entry->reg[t].timetravel_handler 566 #endif 567 ) { 568 clear = false; 569 break; 570 } 571 } 572 573 if (clear) { 574 entry->suspended = true; 575 os_clear_fd_async(entry->fd); 576 entry->sigio_workaround = 577 !__ignore_sigio_fd(entry->fd); 578 } 579 } 580 spin_unlock_irqrestore(&irq_lock, flags); 581 } 582 583 void um_irqs_resume(void) 584 { 585 struct irq_entry *entry; 586 unsigned long flags; 587 588 589 spin_lock_irqsave(&irq_lock, flags); 590 list_for_each_entry(entry, &active_fds, list) { 591 if (entry->suspended) { 592 int err = os_set_fd_async(entry->fd); 593 594 WARN(err < 0, "os_set_fd_async returned %d\n", err); 595 entry->suspended = false; 596 597 if (entry->sigio_workaround) { 598 err = __add_sigio_fd(entry->fd); 599 WARN(err < 0, "add_sigio_returned %d\n", err); 600 } 601 } 602 } 603 spin_unlock_irqrestore(&irq_lock, flags); 604 605 irqs_suspended = false; 606 send_sigio_to_self(); 607 } 608 609 static int normal_irq_set_wake(struct irq_data *d, unsigned int on) 610 { 611 struct irq_entry *entry; 612 unsigned long flags; 613 614 spin_lock_irqsave(&irq_lock, flags); 615 list_for_each_entry(entry, &active_fds, list) { 616 enum um_irq_type t; 617 618 for (t = 0; t < NUM_IRQ_TYPES; t++) { 619 if (!entry->reg[t].events) 620 continue; 621 622 if (entry->reg[t].irq != d->irq) 623 continue; 624 entry->reg[t].wakeup = on; 625 goto unlock; 626 } 627 } 628 unlock: 629 spin_unlock_irqrestore(&irq_lock, flags); 630 return 0; 631 } 632 #else 633 #define normal_irq_set_wake NULL 634 #endif 635 636 /* 637 * irq_chip must define at least enable/disable and ack when 638 * the edge handler is used. 639 */ 640 static void dummy(struct irq_data *d) 641 { 642 } 643 644 /* This is used for everything other than the timer. */ 645 static struct irq_chip normal_irq_type = { 646 .name = "SIGIO", 647 .irq_disable = dummy, 648 .irq_enable = dummy, 649 .irq_ack = dummy, 650 .irq_mask = dummy, 651 .irq_unmask = dummy, 652 .irq_set_wake = normal_irq_set_wake, 653 }; 654 655 static struct irq_chip alarm_irq_type = { 656 .name = "SIGALRM", 657 .irq_disable = dummy, 658 .irq_enable = dummy, 659 .irq_ack = dummy, 660 .irq_mask = dummy, 661 .irq_unmask = dummy, 662 }; 663 664 void __init init_IRQ(void) 665 { 666 int i; 667 668 irq_set_chip_and_handler(TIMER_IRQ, &alarm_irq_type, handle_edge_irq); 669 670 for (i = 1; i < UM_LAST_SIGNAL_IRQ; i++) 671 irq_set_chip_and_handler(i, &normal_irq_type, handle_edge_irq); 672 /* Initialize EPOLL Loop */ 673 os_setup_epoll(); 674 } 675 676 /* 677 * IRQ stack entry and exit: 678 * 679 * Unlike i386, UML doesn't receive IRQs on the normal kernel stack 680 * and switch over to the IRQ stack after some preparation. We use 681 * sigaltstack to receive signals on a separate stack from the start. 682 * These two functions make sure the rest of the kernel won't be too 683 * upset by being on a different stack. The IRQ stack has a 684 * thread_info structure at the bottom so that current et al continue 685 * to work. 686 * 687 * to_irq_stack copies the current task's thread_info to the IRQ stack 688 * thread_info and sets the tasks's stack to point to the IRQ stack. 689 * 690 * from_irq_stack copies the thread_info struct back (flags may have 691 * been modified) and resets the task's stack pointer. 692 * 693 * Tricky bits - 694 * 695 * What happens when two signals race each other? UML doesn't block 696 * signals with sigprocmask, SA_DEFER, or sa_mask, so a second signal 697 * could arrive while a previous one is still setting up the 698 * thread_info. 699 * 700 * There are three cases - 701 * The first interrupt on the stack - sets up the thread_info and 702 * handles the interrupt 703 * A nested interrupt interrupting the copying of the thread_info - 704 * can't handle the interrupt, as the stack is in an unknown state 705 * A nested interrupt not interrupting the copying of the 706 * thread_info - doesn't do any setup, just handles the interrupt 707 * 708 * The first job is to figure out whether we interrupted stack setup. 709 * This is done by xchging the signal mask with thread_info->pending. 710 * If the value that comes back is zero, then there is no setup in 711 * progress, and the interrupt can be handled. If the value is 712 * non-zero, then there is stack setup in progress. In order to have 713 * the interrupt handled, we leave our signal in the mask, and it will 714 * be handled by the upper handler after it has set up the stack. 715 * 716 * Next is to figure out whether we are the outer handler or a nested 717 * one. As part of setting up the stack, thread_info->real_thread is 718 * set to non-NULL (and is reset to NULL on exit). This is the 719 * nesting indicator. If it is non-NULL, then the stack is already 720 * set up and the handler can run. 721 */ 722 723 static unsigned long pending_mask; 724 725 unsigned long to_irq_stack(unsigned long *mask_out) 726 { 727 struct thread_info *ti; 728 unsigned long mask, old; 729 int nested; 730 731 mask = xchg(&pending_mask, *mask_out); 732 if (mask != 0) { 733 /* 734 * If any interrupts come in at this point, we want to 735 * make sure that their bits aren't lost by our 736 * putting our bit in. So, this loop accumulates bits 737 * until xchg returns the same value that we put in. 738 * When that happens, there were no new interrupts, 739 * and pending_mask contains a bit for each interrupt 740 * that came in. 741 */ 742 old = *mask_out; 743 do { 744 old |= mask; 745 mask = xchg(&pending_mask, old); 746 } while (mask != old); 747 return 1; 748 } 749 750 ti = current_thread_info(); 751 nested = (ti->real_thread != NULL); 752 if (!nested) { 753 struct task_struct *task; 754 struct thread_info *tti; 755 756 task = cpu_tasks[ti->cpu].task; 757 tti = task_thread_info(task); 758 759 *ti = *tti; 760 ti->real_thread = tti; 761 task->stack = ti; 762 } 763 764 mask = xchg(&pending_mask, 0); 765 *mask_out |= mask | nested; 766 return 0; 767 } 768 769 unsigned long from_irq_stack(int nested) 770 { 771 struct thread_info *ti, *to; 772 unsigned long mask; 773 774 ti = current_thread_info(); 775 776 pending_mask = 1; 777 778 to = ti->real_thread; 779 current->stack = to; 780 ti->real_thread = NULL; 781 *to = *ti; 782 783 mask = xchg(&pending_mask, 0); 784 return mask & ~1; 785 } 786 787