1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2017 - Cambridge Greys Ltd 4 * Copyright (C) 2011 - 2014 Cisco Systems Inc 5 * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) 6 * Derived (i.e. mostly copied) from arch/i386/kernel/irq.c: 7 * Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar 8 */ 9 10 #include <linux/cpumask.h> 11 #include <linux/hardirq.h> 12 #include <linux/interrupt.h> 13 #include <linux/kernel_stat.h> 14 #include <linux/module.h> 15 #include <linux/sched.h> 16 #include <linux/seq_file.h> 17 #include <linux/slab.h> 18 #include <as-layout.h> 19 #include <kern_util.h> 20 #include <os.h> 21 #include <irq_user.h> 22 #include <irq_kern.h> 23 #include <linux/time-internal.h> 24 25 26 /* When epoll triggers we do not know why it did so 27 * we can also have different IRQs for read and write. 28 * This is why we keep a small irq_reg array for each fd - 29 * one entry per IRQ type 30 */ 31 struct irq_reg { 32 void *id; 33 int irq; 34 /* it's cheaper to store this than to query it */ 35 int events; 36 bool active; 37 bool pending; 38 bool wakeup; 39 #ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT 40 bool pending_event; 41 void (*timetravel_handler)(int, int, void *, 42 struct time_travel_event *); 43 struct time_travel_event event; 44 #endif 45 }; 46 47 struct irq_entry { 48 struct list_head list; 49 int fd; 50 struct irq_reg reg[NUM_IRQ_TYPES]; 51 bool suspended; 52 bool sigio_workaround; 53 }; 54 55 static DEFINE_SPINLOCK(irq_lock); 56 static LIST_HEAD(active_fds); 57 static DECLARE_BITMAP(irqs_allocated, UM_LAST_SIGNAL_IRQ); 58 static bool irqs_suspended; 59 #ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT 60 static bool irqs_pending; 61 #endif 62 63 static void irq_io_loop(struct irq_reg *irq, struct uml_pt_regs *regs) 64 { 65 /* 66 * irq->active guards against reentry 67 * irq->pending accumulates pending requests 68 * if pending is raised the irq_handler is re-run 69 * until pending is cleared 70 */ 71 if (irq->active) { 72 irq->active = false; 73 74 do { 75 irq->pending = false; 76 do_IRQ(irq->irq, regs); 77 } while (irq->pending); 78 79 irq->active = true; 80 } else { 81 irq->pending = true; 82 } 83 } 84 85 #ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT 86 static void irq_event_handler(struct time_travel_event *ev) 87 { 88 struct irq_reg *reg = container_of(ev, struct irq_reg, event); 89 90 /* do nothing if suspended; just cause a wakeup and mark as pending */ 91 if (irqs_suspended) { 92 irqs_pending = true; 93 reg->pending_event = true; 94 return; 95 } 96 97 generic_handle_irq(reg->irq); 98 } 99 100 static bool irq_do_timetravel_handler(struct irq_entry *entry, 101 enum um_irq_type t) 102 { 103 struct irq_reg *reg = &entry->reg[t]; 104 105 if (!reg->timetravel_handler) 106 return false; 107 108 /* 109 * Handle all messages - we might get multiple even while 110 * interrupts are already suspended, due to suspend order 111 * etc. Note that time_travel_add_irq_event() will not add 112 * an event twice, if it's pending already "first wins". 113 */ 114 reg->timetravel_handler(reg->irq, entry->fd, reg->id, ®->event); 115 116 if (!reg->event.pending) 117 return false; 118 119 return true; 120 } 121 122 static void irq_do_pending_events(bool timetravel_handlers_only) 123 { 124 struct irq_entry *entry; 125 126 if (!irqs_pending || timetravel_handlers_only) 127 return; 128 129 irqs_pending = false; 130 131 list_for_each_entry(entry, &active_fds, list) { 132 enum um_irq_type t; 133 134 for (t = 0; t < NUM_IRQ_TYPES; t++) { 135 struct irq_reg *reg = &entry->reg[t]; 136 137 /* 138 * Any timetravel_handler was invoked already, just 139 * directly run the IRQ. 140 */ 141 if (reg->pending_event) { 142 irq_enter(); 143 generic_handle_irq(reg->irq); 144 irq_exit(); 145 reg->pending_event = false; 146 } 147 } 148 } 149 } 150 #else 151 static bool irq_do_timetravel_handler(struct irq_entry *entry, 152 enum um_irq_type t) 153 { 154 return false; 155 } 156 157 static void irq_do_pending_events(bool timetravel_handlers_only) 158 { 159 } 160 #endif 161 162 static void sigio_reg_handler(int idx, struct irq_entry *entry, enum um_irq_type t, 163 struct uml_pt_regs *regs, 164 bool timetravel_handlers_only) 165 { 166 struct irq_reg *reg = &entry->reg[t]; 167 168 if (!reg->events) 169 return; 170 171 if (os_epoll_triggered(idx, reg->events) <= 0) 172 return; 173 174 if (irq_do_timetravel_handler(entry, t)) 175 return; 176 177 /* 178 * If we're called to only run time-travel handlers then don't 179 * actually proceed but mark sigio as pending (if applicable). 180 * For suspend/resume, timetravel_handlers_only may be true 181 * despite time-travel not being configured and used. 182 */ 183 if (timetravel_handlers_only) { 184 #ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT 185 reg->pending_event = true; 186 irqs_pending = true; 187 mark_sigio_pending(); 188 #endif 189 return; 190 } 191 192 irq_io_loop(reg, regs); 193 } 194 195 static void _sigio_handler(struct uml_pt_regs *regs, 196 bool timetravel_handlers_only) 197 { 198 struct irq_entry *irq_entry; 199 int n, i; 200 201 if (timetravel_handlers_only && !um_irq_timetravel_handler_used()) 202 return; 203 204 /* Flush out pending events that were ignored due to time-travel. */ 205 if (!irqs_suspended) 206 irq_do_pending_events(timetravel_handlers_only); 207 208 while (1) { 209 /* This is now lockless - epoll keeps back-referencesto the irqs 210 * which have trigger it so there is no need to walk the irq 211 * list and lock it every time. We avoid locking by turning off 212 * IO for a specific fd by executing os_del_epoll_fd(fd) before 213 * we do any changes to the actual data structures 214 */ 215 n = os_waiting_for_events_epoll(); 216 217 if (n <= 0) { 218 if (n == -EINTR) 219 continue; 220 else 221 break; 222 } 223 224 for (i = 0; i < n ; i++) { 225 enum um_irq_type t; 226 227 irq_entry = os_epoll_get_data_pointer(i); 228 229 for (t = 0; t < NUM_IRQ_TYPES; t++) 230 sigio_reg_handler(i, irq_entry, t, regs, 231 timetravel_handlers_only); 232 } 233 } 234 235 if (!timetravel_handlers_only) 236 free_irqs(); 237 } 238 239 void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs) 240 { 241 preempt_disable(); 242 _sigio_handler(regs, irqs_suspended); 243 preempt_enable(); 244 } 245 246 static struct irq_entry *get_irq_entry_by_fd(int fd) 247 { 248 struct irq_entry *walk; 249 250 lockdep_assert_held(&irq_lock); 251 252 list_for_each_entry(walk, &active_fds, list) { 253 if (walk->fd == fd) 254 return walk; 255 } 256 257 return NULL; 258 } 259 260 static void free_irq_entry(struct irq_entry *to_free, bool remove) 261 { 262 if (!to_free) 263 return; 264 265 if (remove) 266 os_del_epoll_fd(to_free->fd); 267 list_del(&to_free->list); 268 kfree(to_free); 269 } 270 271 static bool update_irq_entry(struct irq_entry *entry) 272 { 273 enum um_irq_type i; 274 int events = 0; 275 276 for (i = 0; i < NUM_IRQ_TYPES; i++) 277 events |= entry->reg[i].events; 278 279 if (events) { 280 /* will modify (instead of add) if needed */ 281 os_add_epoll_fd(events, entry->fd, entry); 282 return true; 283 } 284 285 os_del_epoll_fd(entry->fd); 286 return false; 287 } 288 289 static void update_or_free_irq_entry(struct irq_entry *entry) 290 { 291 if (!update_irq_entry(entry)) 292 free_irq_entry(entry, false); 293 } 294 295 static int activate_fd(int irq, int fd, enum um_irq_type type, void *dev_id, 296 void (*timetravel_handler)(int, int, void *, 297 struct time_travel_event *)) 298 { 299 struct irq_entry *irq_entry; 300 int err, events = os_event_mask(type); 301 unsigned long flags; 302 303 err = os_set_fd_async(fd); 304 if (err < 0) 305 goto out; 306 307 spin_lock_irqsave(&irq_lock, flags); 308 irq_entry = get_irq_entry_by_fd(fd); 309 if (irq_entry) { 310 /* cannot register the same FD twice with the same type */ 311 if (WARN_ON(irq_entry->reg[type].events)) { 312 err = -EALREADY; 313 goto out_unlock; 314 } 315 316 /* temporarily disable to avoid IRQ-side locking */ 317 os_del_epoll_fd(fd); 318 } else { 319 irq_entry = kzalloc(sizeof(*irq_entry), GFP_ATOMIC); 320 if (!irq_entry) { 321 err = -ENOMEM; 322 goto out_unlock; 323 } 324 irq_entry->fd = fd; 325 list_add_tail(&irq_entry->list, &active_fds); 326 maybe_sigio_broken(fd); 327 } 328 329 irq_entry->reg[type].id = dev_id; 330 irq_entry->reg[type].irq = irq; 331 irq_entry->reg[type].active = true; 332 irq_entry->reg[type].events = events; 333 334 #ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT 335 if (um_irq_timetravel_handler_used()) { 336 irq_entry->reg[type].timetravel_handler = timetravel_handler; 337 irq_entry->reg[type].event.fn = irq_event_handler; 338 } 339 #endif 340 341 WARN_ON(!update_irq_entry(irq_entry)); 342 spin_unlock_irqrestore(&irq_lock, flags); 343 344 return 0; 345 out_unlock: 346 spin_unlock_irqrestore(&irq_lock, flags); 347 out: 348 return err; 349 } 350 351 /* 352 * Remove the entry or entries for a specific FD, if you 353 * don't want to remove all the possible entries then use 354 * um_free_irq() or deactivate_fd() instead. 355 */ 356 void free_irq_by_fd(int fd) 357 { 358 struct irq_entry *to_free; 359 unsigned long flags; 360 361 spin_lock_irqsave(&irq_lock, flags); 362 to_free = get_irq_entry_by_fd(fd); 363 free_irq_entry(to_free, true); 364 spin_unlock_irqrestore(&irq_lock, flags); 365 } 366 EXPORT_SYMBOL(free_irq_by_fd); 367 368 static void free_irq_by_irq_and_dev(unsigned int irq, void *dev) 369 { 370 struct irq_entry *entry; 371 unsigned long flags; 372 373 spin_lock_irqsave(&irq_lock, flags); 374 list_for_each_entry(entry, &active_fds, list) { 375 enum um_irq_type i; 376 377 for (i = 0; i < NUM_IRQ_TYPES; i++) { 378 struct irq_reg *reg = &entry->reg[i]; 379 380 if (!reg->events) 381 continue; 382 if (reg->irq != irq) 383 continue; 384 if (reg->id != dev) 385 continue; 386 387 os_del_epoll_fd(entry->fd); 388 reg->events = 0; 389 update_or_free_irq_entry(entry); 390 goto out; 391 } 392 } 393 out: 394 spin_unlock_irqrestore(&irq_lock, flags); 395 } 396 397 void deactivate_fd(int fd, int irqnum) 398 { 399 struct irq_entry *entry; 400 unsigned long flags; 401 enum um_irq_type i; 402 403 os_del_epoll_fd(fd); 404 405 spin_lock_irqsave(&irq_lock, flags); 406 entry = get_irq_entry_by_fd(fd); 407 if (!entry) 408 goto out; 409 410 for (i = 0; i < NUM_IRQ_TYPES; i++) { 411 if (!entry->reg[i].events) 412 continue; 413 if (entry->reg[i].irq == irqnum) 414 entry->reg[i].events = 0; 415 } 416 417 update_or_free_irq_entry(entry); 418 out: 419 spin_unlock_irqrestore(&irq_lock, flags); 420 421 ignore_sigio_fd(fd); 422 } 423 EXPORT_SYMBOL(deactivate_fd); 424 425 /* 426 * Called just before shutdown in order to provide a clean exec 427 * environment in case the system is rebooting. No locking because 428 * that would cause a pointless shutdown hang if something hadn't 429 * released the lock. 430 */ 431 int deactivate_all_fds(void) 432 { 433 struct irq_entry *entry; 434 435 /* Stop IO. The IRQ loop has no lock so this is our 436 * only way of making sure we are safe to dispose 437 * of all IRQ handlers 438 */ 439 os_set_ioignore(); 440 441 /* we can no longer call kfree() here so just deactivate */ 442 list_for_each_entry(entry, &active_fds, list) 443 os_del_epoll_fd(entry->fd); 444 os_close_epoll_fd(); 445 return 0; 446 } 447 448 /* 449 * do_IRQ handles all normal device IRQs (the special 450 * SMP cross-CPU interrupts have their own specific 451 * handlers). 452 */ 453 unsigned int do_IRQ(int irq, struct uml_pt_regs *regs) 454 { 455 struct pt_regs *old_regs = set_irq_regs((struct pt_regs *)regs); 456 irq_enter(); 457 generic_handle_irq(irq); 458 irq_exit(); 459 set_irq_regs(old_regs); 460 return 1; 461 } 462 463 void um_free_irq(int irq, void *dev) 464 { 465 if (WARN(irq < 0 || irq > UM_LAST_SIGNAL_IRQ, 466 "freeing invalid irq %d", irq)) 467 return; 468 469 free_irq_by_irq_and_dev(irq, dev); 470 free_irq(irq, dev); 471 clear_bit(irq, irqs_allocated); 472 } 473 EXPORT_SYMBOL(um_free_irq); 474 475 static int 476 _um_request_irq(int irq, int fd, enum um_irq_type type, 477 irq_handler_t handler, unsigned long irqflags, 478 const char *devname, void *dev_id, 479 void (*timetravel_handler)(int, int, void *, 480 struct time_travel_event *)) 481 { 482 int err; 483 484 if (irq == UM_IRQ_ALLOC) { 485 int i; 486 487 for (i = UM_FIRST_DYN_IRQ; i < NR_IRQS; i++) { 488 if (!test_and_set_bit(i, irqs_allocated)) { 489 irq = i; 490 break; 491 } 492 } 493 } 494 495 if (irq < 0) 496 return -ENOSPC; 497 498 if (fd != -1) { 499 err = activate_fd(irq, fd, type, dev_id, timetravel_handler); 500 if (err) 501 goto error; 502 } 503 504 err = request_irq(irq, handler, irqflags, devname, dev_id); 505 if (err < 0) 506 goto error; 507 508 return irq; 509 error: 510 clear_bit(irq, irqs_allocated); 511 return err; 512 } 513 514 int um_request_irq(int irq, int fd, enum um_irq_type type, 515 irq_handler_t handler, unsigned long irqflags, 516 const char *devname, void *dev_id) 517 { 518 return _um_request_irq(irq, fd, type, handler, irqflags, 519 devname, dev_id, NULL); 520 } 521 EXPORT_SYMBOL(um_request_irq); 522 523 #ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT 524 int um_request_irq_tt(int irq, int fd, enum um_irq_type type, 525 irq_handler_t handler, unsigned long irqflags, 526 const char *devname, void *dev_id, 527 void (*timetravel_handler)(int, int, void *, 528 struct time_travel_event *)) 529 { 530 return _um_request_irq(irq, fd, type, handler, irqflags, 531 devname, dev_id, timetravel_handler); 532 } 533 EXPORT_SYMBOL(um_request_irq_tt); 534 535 void sigio_run_timetravel_handlers(void) 536 { 537 _sigio_handler(NULL, true); 538 } 539 #endif 540 541 #ifdef CONFIG_PM_SLEEP 542 void um_irqs_suspend(void) 543 { 544 struct irq_entry *entry; 545 unsigned long flags; 546 547 irqs_suspended = true; 548 549 spin_lock_irqsave(&irq_lock, flags); 550 list_for_each_entry(entry, &active_fds, list) { 551 enum um_irq_type t; 552 bool clear = true; 553 554 for (t = 0; t < NUM_IRQ_TYPES; t++) { 555 if (!entry->reg[t].events) 556 continue; 557 558 /* 559 * For the SIGIO_WRITE_IRQ, which is used to handle the 560 * SIGIO workaround thread, we need special handling: 561 * enable wake for it itself, but below we tell it about 562 * any FDs that should be suspended. 563 */ 564 if (entry->reg[t].wakeup || 565 entry->reg[t].irq == SIGIO_WRITE_IRQ 566 #ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT 567 || entry->reg[t].timetravel_handler 568 #endif 569 ) { 570 clear = false; 571 break; 572 } 573 } 574 575 if (clear) { 576 entry->suspended = true; 577 os_clear_fd_async(entry->fd); 578 entry->sigio_workaround = 579 !__ignore_sigio_fd(entry->fd); 580 } 581 } 582 spin_unlock_irqrestore(&irq_lock, flags); 583 } 584 585 void um_irqs_resume(void) 586 { 587 struct irq_entry *entry; 588 unsigned long flags; 589 590 591 spin_lock_irqsave(&irq_lock, flags); 592 list_for_each_entry(entry, &active_fds, list) { 593 if (entry->suspended) { 594 int err = os_set_fd_async(entry->fd); 595 596 WARN(err < 0, "os_set_fd_async returned %d\n", err); 597 entry->suspended = false; 598 599 if (entry->sigio_workaround) { 600 err = __add_sigio_fd(entry->fd); 601 WARN(err < 0, "add_sigio_returned %d\n", err); 602 } 603 } 604 } 605 spin_unlock_irqrestore(&irq_lock, flags); 606 607 irqs_suspended = false; 608 send_sigio_to_self(); 609 } 610 611 static int normal_irq_set_wake(struct irq_data *d, unsigned int on) 612 { 613 struct irq_entry *entry; 614 unsigned long flags; 615 616 spin_lock_irqsave(&irq_lock, flags); 617 list_for_each_entry(entry, &active_fds, list) { 618 enum um_irq_type t; 619 620 for (t = 0; t < NUM_IRQ_TYPES; t++) { 621 if (!entry->reg[t].events) 622 continue; 623 624 if (entry->reg[t].irq != d->irq) 625 continue; 626 entry->reg[t].wakeup = on; 627 goto unlock; 628 } 629 } 630 unlock: 631 spin_unlock_irqrestore(&irq_lock, flags); 632 return 0; 633 } 634 #else 635 #define normal_irq_set_wake NULL 636 #endif 637 638 /* 639 * irq_chip must define at least enable/disable and ack when 640 * the edge handler is used. 641 */ 642 static void dummy(struct irq_data *d) 643 { 644 } 645 646 /* This is used for everything other than the timer. */ 647 static struct irq_chip normal_irq_type = { 648 .name = "SIGIO", 649 .irq_disable = dummy, 650 .irq_enable = dummy, 651 .irq_ack = dummy, 652 .irq_mask = dummy, 653 .irq_unmask = dummy, 654 .irq_set_wake = normal_irq_set_wake, 655 }; 656 657 static struct irq_chip alarm_irq_type = { 658 .name = "SIGALRM", 659 .irq_disable = dummy, 660 .irq_enable = dummy, 661 .irq_ack = dummy, 662 .irq_mask = dummy, 663 .irq_unmask = dummy, 664 }; 665 666 void __init init_IRQ(void) 667 { 668 int i; 669 670 irq_set_chip_and_handler(TIMER_IRQ, &alarm_irq_type, handle_edge_irq); 671 672 for (i = 1; i < UM_LAST_SIGNAL_IRQ; i++) 673 irq_set_chip_and_handler(i, &normal_irq_type, handle_edge_irq); 674 /* Initialize EPOLL Loop */ 675 os_setup_epoll(); 676 } 677 678 /* 679 * IRQ stack entry and exit: 680 * 681 * Unlike i386, UML doesn't receive IRQs on the normal kernel stack 682 * and switch over to the IRQ stack after some preparation. We use 683 * sigaltstack to receive signals on a separate stack from the start. 684 * These two functions make sure the rest of the kernel won't be too 685 * upset by being on a different stack. The IRQ stack has a 686 * thread_info structure at the bottom so that current et al continue 687 * to work. 688 * 689 * to_irq_stack copies the current task's thread_info to the IRQ stack 690 * thread_info and sets the tasks's stack to point to the IRQ stack. 691 * 692 * from_irq_stack copies the thread_info struct back (flags may have 693 * been modified) and resets the task's stack pointer. 694 * 695 * Tricky bits - 696 * 697 * What happens when two signals race each other? UML doesn't block 698 * signals with sigprocmask, SA_DEFER, or sa_mask, so a second signal 699 * could arrive while a previous one is still setting up the 700 * thread_info. 701 * 702 * There are three cases - 703 * The first interrupt on the stack - sets up the thread_info and 704 * handles the interrupt 705 * A nested interrupt interrupting the copying of the thread_info - 706 * can't handle the interrupt, as the stack is in an unknown state 707 * A nested interrupt not interrupting the copying of the 708 * thread_info - doesn't do any setup, just handles the interrupt 709 * 710 * The first job is to figure out whether we interrupted stack setup. 711 * This is done by xchging the signal mask with thread_info->pending. 712 * If the value that comes back is zero, then there is no setup in 713 * progress, and the interrupt can be handled. If the value is 714 * non-zero, then there is stack setup in progress. In order to have 715 * the interrupt handled, we leave our signal in the mask, and it will 716 * be handled by the upper handler after it has set up the stack. 717 * 718 * Next is to figure out whether we are the outer handler or a nested 719 * one. As part of setting up the stack, thread_info->real_thread is 720 * set to non-NULL (and is reset to NULL on exit). This is the 721 * nesting indicator. If it is non-NULL, then the stack is already 722 * set up and the handler can run. 723 */ 724 725 static unsigned long pending_mask; 726 727 unsigned long to_irq_stack(unsigned long *mask_out) 728 { 729 struct thread_info *ti; 730 unsigned long mask, old; 731 int nested; 732 733 mask = xchg(&pending_mask, *mask_out); 734 if (mask != 0) { 735 /* 736 * If any interrupts come in at this point, we want to 737 * make sure that their bits aren't lost by our 738 * putting our bit in. So, this loop accumulates bits 739 * until xchg returns the same value that we put in. 740 * When that happens, there were no new interrupts, 741 * and pending_mask contains a bit for each interrupt 742 * that came in. 743 */ 744 old = *mask_out; 745 do { 746 old |= mask; 747 mask = xchg(&pending_mask, old); 748 } while (mask != old); 749 return 1; 750 } 751 752 ti = current_thread_info(); 753 nested = (ti->real_thread != NULL); 754 if (!nested) { 755 struct task_struct *task; 756 struct thread_info *tti; 757 758 task = cpu_tasks[ti->cpu].task; 759 tti = task_thread_info(task); 760 761 *ti = *tti; 762 ti->real_thread = tti; 763 task->stack = ti; 764 } 765 766 mask = xchg(&pending_mask, 0); 767 *mask_out |= mask | nested; 768 return 0; 769 } 770 771 unsigned long from_irq_stack(int nested) 772 { 773 struct thread_info *ti, *to; 774 unsigned long mask; 775 776 ti = current_thread_info(); 777 778 pending_mask = 1; 779 780 to = ti->real_thread; 781 current->stack = to; 782 ti->real_thread = NULL; 783 *to = *ti; 784 785 mask = xchg(&pending_mask, 0); 786 return mask & ~1; 787 } 788 789