xref: /linux/arch/um/kernel/irq.c (revision 6fdcba32711044c35c0e1b094cbd8f3f0b4472c9)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2017 - Cambridge Greys Ltd
4  * Copyright (C) 2011 - 2014 Cisco Systems Inc
5  * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
6  * Derived (i.e. mostly copied) from arch/i386/kernel/irq.c:
7  *	Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar
8  */
9 
10 #include <linux/cpumask.h>
11 #include <linux/hardirq.h>
12 #include <linux/interrupt.h>
13 #include <linux/kernel_stat.h>
14 #include <linux/module.h>
15 #include <linux/sched.h>
16 #include <linux/seq_file.h>
17 #include <linux/slab.h>
18 #include <as-layout.h>
19 #include <kern_util.h>
20 #include <os.h>
21 #include <irq_user.h>
22 
23 
24 extern void free_irqs(void);
25 
26 /* When epoll triggers we do not know why it did so
27  * we can also have different IRQs for read and write.
28  * This is why we keep a small irq_fd array for each fd -
29  * one entry per IRQ type
30  */
31 
32 struct irq_entry {
33 	struct irq_entry *next;
34 	int fd;
35 	struct irq_fd *irq_array[MAX_IRQ_TYPE + 1];
36 };
37 
38 static struct irq_entry *active_fds;
39 
40 static DEFINE_SPINLOCK(irq_lock);
41 
42 static void irq_io_loop(struct irq_fd *irq, struct uml_pt_regs *regs)
43 {
44 /*
45  * irq->active guards against reentry
46  * irq->pending accumulates pending requests
47  * if pending is raised the irq_handler is re-run
48  * until pending is cleared
49  */
50 	if (irq->active) {
51 		irq->active = false;
52 		do {
53 			irq->pending = false;
54 			do_IRQ(irq->irq, regs);
55 		} while (irq->pending && (!irq->purge));
56 		if (!irq->purge)
57 			irq->active = true;
58 	} else {
59 		irq->pending = true;
60 	}
61 }
62 
63 void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
64 {
65 	struct irq_entry *irq_entry;
66 	struct irq_fd *irq;
67 
68 	int n, i, j;
69 
70 	while (1) {
71 		/* This is now lockless - epoll keeps back-referencesto the irqs
72 		 * which have trigger it so there is no need to walk the irq
73 		 * list and lock it every time. We avoid locking by turning off
74 		 * IO for a specific fd by executing os_del_epoll_fd(fd) before
75 		 * we do any changes to the actual data structures
76 		 */
77 		n = os_waiting_for_events_epoll();
78 
79 		if (n <= 0) {
80 			if (n == -EINTR)
81 				continue;
82 			else
83 				break;
84 		}
85 
86 		for (i = 0; i < n ; i++) {
87 			/* Epoll back reference is the entry with 3 irq_fd
88 			 * leaves - one for each irq type.
89 			 */
90 			irq_entry = (struct irq_entry *)
91 				os_epoll_get_data_pointer(i);
92 			for (j = 0; j < MAX_IRQ_TYPE ; j++) {
93 				irq = irq_entry->irq_array[j];
94 				if (irq == NULL)
95 					continue;
96 				if (os_epoll_triggered(i, irq->events) > 0)
97 					irq_io_loop(irq, regs);
98 				if (irq->purge) {
99 					irq_entry->irq_array[j] = NULL;
100 					kfree(irq);
101 				}
102 			}
103 		}
104 	}
105 
106 	free_irqs();
107 }
108 
109 static int assign_epoll_events_to_irq(struct irq_entry *irq_entry)
110 {
111 	int i;
112 	int events = 0;
113 	struct irq_fd *irq;
114 
115 	for (i = 0; i < MAX_IRQ_TYPE ; i++) {
116 		irq = irq_entry->irq_array[i];
117 		if (irq != NULL)
118 			events = irq->events | events;
119 	}
120 	if (events > 0) {
121 	/* os_add_epoll will call os_mod_epoll if this already exists */
122 		return os_add_epoll_fd(events, irq_entry->fd, irq_entry);
123 	}
124 	/* No events - delete */
125 	return os_del_epoll_fd(irq_entry->fd);
126 }
127 
128 
129 
130 static int activate_fd(int irq, int fd, int type, void *dev_id)
131 {
132 	struct irq_fd *new_fd;
133 	struct irq_entry *irq_entry;
134 	int i, err, events;
135 	unsigned long flags;
136 
137 	err = os_set_fd_async(fd);
138 	if (err < 0)
139 		goto out;
140 
141 	spin_lock_irqsave(&irq_lock, flags);
142 
143 	/* Check if we have an entry for this fd */
144 
145 	err = -EBUSY;
146 	for (irq_entry = active_fds;
147 		irq_entry != NULL; irq_entry = irq_entry->next) {
148 		if (irq_entry->fd == fd)
149 			break;
150 	}
151 
152 	if (irq_entry == NULL) {
153 		/* This needs to be atomic as it may be called from an
154 		 * IRQ context.
155 		 */
156 		irq_entry = kmalloc(sizeof(struct irq_entry), GFP_ATOMIC);
157 		if (irq_entry == NULL) {
158 			printk(KERN_ERR
159 				"Failed to allocate new IRQ entry\n");
160 			goto out_unlock;
161 		}
162 		irq_entry->fd = fd;
163 		for (i = 0; i < MAX_IRQ_TYPE; i++)
164 			irq_entry->irq_array[i] = NULL;
165 		irq_entry->next = active_fds;
166 		active_fds = irq_entry;
167 	}
168 
169 	/* Check if we are trying to re-register an interrupt for a
170 	 * particular fd
171 	 */
172 
173 	if (irq_entry->irq_array[type] != NULL) {
174 		printk(KERN_ERR
175 			"Trying to reregister IRQ %d FD %d TYPE %d ID %p\n",
176 			irq, fd, type, dev_id
177 		);
178 		goto out_unlock;
179 	} else {
180 		/* New entry for this fd */
181 
182 		err = -ENOMEM;
183 		new_fd = kmalloc(sizeof(struct irq_fd), GFP_ATOMIC);
184 		if (new_fd == NULL)
185 			goto out_unlock;
186 
187 		events = os_event_mask(type);
188 
189 		*new_fd = ((struct irq_fd) {
190 			.id		= dev_id,
191 			.irq		= irq,
192 			.type		= type,
193 			.events		= events,
194 			.active		= true,
195 			.pending	= false,
196 			.purge		= false
197 		});
198 		/* Turn off any IO on this fd - allows us to
199 		 * avoid locking the IRQ loop
200 		 */
201 		os_del_epoll_fd(irq_entry->fd);
202 		irq_entry->irq_array[type] = new_fd;
203 	}
204 
205 	/* Turn back IO on with the correct (new) IO event mask */
206 	assign_epoll_events_to_irq(irq_entry);
207 	spin_unlock_irqrestore(&irq_lock, flags);
208 	maybe_sigio_broken(fd, (type != IRQ_NONE));
209 
210 	return 0;
211 out_unlock:
212 	spin_unlock_irqrestore(&irq_lock, flags);
213 out:
214 	return err;
215 }
216 
217 /*
218  * Walk the IRQ list and dispose of any unused entries.
219  * Should be done under irq_lock.
220  */
221 
222 static void garbage_collect_irq_entries(void)
223 {
224 	int i;
225 	bool reap;
226 	struct irq_entry *walk;
227 	struct irq_entry *previous = NULL;
228 	struct irq_entry *to_free;
229 
230 	if (active_fds == NULL)
231 		return;
232 	walk = active_fds;
233 	while (walk != NULL) {
234 		reap = true;
235 		for (i = 0; i < MAX_IRQ_TYPE ; i++) {
236 			if (walk->irq_array[i] != NULL) {
237 				reap = false;
238 				break;
239 			}
240 		}
241 		if (reap) {
242 			if (previous == NULL)
243 				active_fds = walk->next;
244 			else
245 				previous->next = walk->next;
246 			to_free = walk;
247 		} else {
248 			to_free = NULL;
249 		}
250 		walk = walk->next;
251 		kfree(to_free);
252 	}
253 }
254 
255 /*
256  * Walk the IRQ list and get the descriptor for our FD
257  */
258 
259 static struct irq_entry *get_irq_entry_by_fd(int fd)
260 {
261 	struct irq_entry *walk = active_fds;
262 
263 	while (walk != NULL) {
264 		if (walk->fd == fd)
265 			return walk;
266 		walk = walk->next;
267 	}
268 	return NULL;
269 }
270 
271 
272 /*
273  * Walk the IRQ list and dispose of an entry for a specific
274  * device, fd and number. Note - if sharing an IRQ for read
275  * and writefor the same FD it will be disposed in either case.
276  * If this behaviour is undesirable use different IRQ ids.
277  */
278 
279 #define IGNORE_IRQ 1
280 #define IGNORE_DEV (1<<1)
281 
282 static void do_free_by_irq_and_dev(
283 	struct irq_entry *irq_entry,
284 	unsigned int irq,
285 	void *dev,
286 	int flags
287 )
288 {
289 	int i;
290 	struct irq_fd *to_free;
291 
292 	for (i = 0; i < MAX_IRQ_TYPE ; i++) {
293 		if (irq_entry->irq_array[i] != NULL) {
294 			if (
295 			((flags & IGNORE_IRQ) ||
296 				(irq_entry->irq_array[i]->irq == irq)) &&
297 			((flags & IGNORE_DEV) ||
298 				(irq_entry->irq_array[i]->id == dev))
299 			) {
300 				/* Turn off any IO on this fd - allows us to
301 				 * avoid locking the IRQ loop
302 				 */
303 				os_del_epoll_fd(irq_entry->fd);
304 				to_free = irq_entry->irq_array[i];
305 				irq_entry->irq_array[i] = NULL;
306 				assign_epoll_events_to_irq(irq_entry);
307 				if (to_free->active)
308 					to_free->purge = true;
309 				else
310 					kfree(to_free);
311 			}
312 		}
313 	}
314 }
315 
316 void free_irq_by_fd(int fd)
317 {
318 	struct irq_entry *to_free;
319 	unsigned long flags;
320 
321 	spin_lock_irqsave(&irq_lock, flags);
322 	to_free = get_irq_entry_by_fd(fd);
323 	if (to_free != NULL) {
324 		do_free_by_irq_and_dev(
325 			to_free,
326 			-1,
327 			NULL,
328 			IGNORE_IRQ | IGNORE_DEV
329 		);
330 	}
331 	garbage_collect_irq_entries();
332 	spin_unlock_irqrestore(&irq_lock, flags);
333 }
334 EXPORT_SYMBOL(free_irq_by_fd);
335 
336 static void free_irq_by_irq_and_dev(unsigned int irq, void *dev)
337 {
338 	struct irq_entry *to_free;
339 	unsigned long flags;
340 
341 	spin_lock_irqsave(&irq_lock, flags);
342 	to_free = active_fds;
343 	while (to_free != NULL) {
344 		do_free_by_irq_and_dev(
345 			to_free,
346 			irq,
347 			dev,
348 			0
349 		);
350 		to_free = to_free->next;
351 	}
352 	garbage_collect_irq_entries();
353 	spin_unlock_irqrestore(&irq_lock, flags);
354 }
355 
356 
357 void deactivate_fd(int fd, int irqnum)
358 {
359 	struct irq_entry *to_free;
360 	unsigned long flags;
361 
362 	os_del_epoll_fd(fd);
363 	spin_lock_irqsave(&irq_lock, flags);
364 	to_free = get_irq_entry_by_fd(fd);
365 	if (to_free != NULL) {
366 		do_free_by_irq_and_dev(
367 			to_free,
368 			irqnum,
369 			NULL,
370 			IGNORE_DEV
371 		);
372 	}
373 	garbage_collect_irq_entries();
374 	spin_unlock_irqrestore(&irq_lock, flags);
375 	ignore_sigio_fd(fd);
376 }
377 EXPORT_SYMBOL(deactivate_fd);
378 
379 /*
380  * Called just before shutdown in order to provide a clean exec
381  * environment in case the system is rebooting.  No locking because
382  * that would cause a pointless shutdown hang if something hadn't
383  * released the lock.
384  */
385 int deactivate_all_fds(void)
386 {
387 	struct irq_entry *to_free;
388 
389 	/* Stop IO. The IRQ loop has no lock so this is our
390 	 * only way of making sure we are safe to dispose
391 	 * of all IRQ handlers
392 	 */
393 	os_set_ioignore();
394 	to_free = active_fds;
395 	while (to_free != NULL) {
396 		do_free_by_irq_and_dev(
397 			to_free,
398 			-1,
399 			NULL,
400 			IGNORE_IRQ | IGNORE_DEV
401 		);
402 		to_free = to_free->next;
403 	}
404 	/* don't garbage collect - we can no longer call kfree() here */
405 	os_close_epoll_fd();
406 	return 0;
407 }
408 
409 /*
410  * do_IRQ handles all normal device IRQs (the special
411  * SMP cross-CPU interrupts have their own specific
412  * handlers).
413  */
414 unsigned int do_IRQ(int irq, struct uml_pt_regs *regs)
415 {
416 	struct pt_regs *old_regs = set_irq_regs((struct pt_regs *)regs);
417 	irq_enter();
418 	generic_handle_irq(irq);
419 	irq_exit();
420 	set_irq_regs(old_regs);
421 	return 1;
422 }
423 
424 void um_free_irq(unsigned int irq, void *dev)
425 {
426 	free_irq_by_irq_and_dev(irq, dev);
427 	free_irq(irq, dev);
428 }
429 EXPORT_SYMBOL(um_free_irq);
430 
431 int um_request_irq(unsigned int irq, int fd, int type,
432 		   irq_handler_t handler,
433 		   unsigned long irqflags, const char * devname,
434 		   void *dev_id)
435 {
436 	int err;
437 
438 	if (fd != -1) {
439 		err = activate_fd(irq, fd, type, dev_id);
440 		if (err)
441 			return err;
442 	}
443 
444 	return request_irq(irq, handler, irqflags, devname, dev_id);
445 }
446 
447 EXPORT_SYMBOL(um_request_irq);
448 
449 /*
450  * irq_chip must define at least enable/disable and ack when
451  * the edge handler is used.
452  */
453 static void dummy(struct irq_data *d)
454 {
455 }
456 
457 /* This is used for everything else than the timer. */
458 static struct irq_chip normal_irq_type = {
459 	.name = "SIGIO",
460 	.irq_disable = dummy,
461 	.irq_enable = dummy,
462 	.irq_ack = dummy,
463 	.irq_mask = dummy,
464 	.irq_unmask = dummy,
465 };
466 
467 static struct irq_chip SIGVTALRM_irq_type = {
468 	.name = "SIGVTALRM",
469 	.irq_disable = dummy,
470 	.irq_enable = dummy,
471 	.irq_ack = dummy,
472 	.irq_mask = dummy,
473 	.irq_unmask = dummy,
474 };
475 
476 void __init init_IRQ(void)
477 {
478 	int i;
479 
480 	irq_set_chip_and_handler(TIMER_IRQ, &SIGVTALRM_irq_type, handle_edge_irq);
481 
482 
483 	for (i = 1; i <= LAST_IRQ; i++)
484 		irq_set_chip_and_handler(i, &normal_irq_type, handle_edge_irq);
485 	/* Initialize EPOLL Loop */
486 	os_setup_epoll();
487 }
488 
489 /*
490  * IRQ stack entry and exit:
491  *
492  * Unlike i386, UML doesn't receive IRQs on the normal kernel stack
493  * and switch over to the IRQ stack after some preparation.  We use
494  * sigaltstack to receive signals on a separate stack from the start.
495  * These two functions make sure the rest of the kernel won't be too
496  * upset by being on a different stack.  The IRQ stack has a
497  * thread_info structure at the bottom so that current et al continue
498  * to work.
499  *
500  * to_irq_stack copies the current task's thread_info to the IRQ stack
501  * thread_info and sets the tasks's stack to point to the IRQ stack.
502  *
503  * from_irq_stack copies the thread_info struct back (flags may have
504  * been modified) and resets the task's stack pointer.
505  *
506  * Tricky bits -
507  *
508  * What happens when two signals race each other?  UML doesn't block
509  * signals with sigprocmask, SA_DEFER, or sa_mask, so a second signal
510  * could arrive while a previous one is still setting up the
511  * thread_info.
512  *
513  * There are three cases -
514  *     The first interrupt on the stack - sets up the thread_info and
515  * handles the interrupt
516  *     A nested interrupt interrupting the copying of the thread_info -
517  * can't handle the interrupt, as the stack is in an unknown state
518  *     A nested interrupt not interrupting the copying of the
519  * thread_info - doesn't do any setup, just handles the interrupt
520  *
521  * The first job is to figure out whether we interrupted stack setup.
522  * This is done by xchging the signal mask with thread_info->pending.
523  * If the value that comes back is zero, then there is no setup in
524  * progress, and the interrupt can be handled.  If the value is
525  * non-zero, then there is stack setup in progress.  In order to have
526  * the interrupt handled, we leave our signal in the mask, and it will
527  * be handled by the upper handler after it has set up the stack.
528  *
529  * Next is to figure out whether we are the outer handler or a nested
530  * one.  As part of setting up the stack, thread_info->real_thread is
531  * set to non-NULL (and is reset to NULL on exit).  This is the
532  * nesting indicator.  If it is non-NULL, then the stack is already
533  * set up and the handler can run.
534  */
535 
536 static unsigned long pending_mask;
537 
538 unsigned long to_irq_stack(unsigned long *mask_out)
539 {
540 	struct thread_info *ti;
541 	unsigned long mask, old;
542 	int nested;
543 
544 	mask = xchg(&pending_mask, *mask_out);
545 	if (mask != 0) {
546 		/*
547 		 * If any interrupts come in at this point, we want to
548 		 * make sure that their bits aren't lost by our
549 		 * putting our bit in.  So, this loop accumulates bits
550 		 * until xchg returns the same value that we put in.
551 		 * When that happens, there were no new interrupts,
552 		 * and pending_mask contains a bit for each interrupt
553 		 * that came in.
554 		 */
555 		old = *mask_out;
556 		do {
557 			old |= mask;
558 			mask = xchg(&pending_mask, old);
559 		} while (mask != old);
560 		return 1;
561 	}
562 
563 	ti = current_thread_info();
564 	nested = (ti->real_thread != NULL);
565 	if (!nested) {
566 		struct task_struct *task;
567 		struct thread_info *tti;
568 
569 		task = cpu_tasks[ti->cpu].task;
570 		tti = task_thread_info(task);
571 
572 		*ti = *tti;
573 		ti->real_thread = tti;
574 		task->stack = ti;
575 	}
576 
577 	mask = xchg(&pending_mask, 0);
578 	*mask_out |= mask | nested;
579 	return 0;
580 }
581 
582 unsigned long from_irq_stack(int nested)
583 {
584 	struct thread_info *ti, *to;
585 	unsigned long mask;
586 
587 	ti = current_thread_info();
588 
589 	pending_mask = 1;
590 
591 	to = ti->real_thread;
592 	current->stack = to;
593 	ti->real_thread = NULL;
594 	*to = *ti;
595 
596 	mask = xchg(&pending_mask, 0);
597 	return mask & ~1;
598 }
599 
600