xref: /linux/kernel/softirq.c (revision ed3174d93c342b8b2eeba6bbd124707d55304a7b)
1 /*
2  *	linux/kernel/softirq.c
3  *
4  *	Copyright (C) 1992 Linus Torvalds
5  *
6  *	Distribute under GPLv2.
7  *
8  *	Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903)
9  */
10 
11 #include <linux/module.h>
12 #include <linux/kernel_stat.h>
13 #include <linux/interrupt.h>
14 #include <linux/init.h>
15 #include <linux/mm.h>
16 #include <linux/notifier.h>
17 #include <linux/percpu.h>
18 #include <linux/cpu.h>
19 #include <linux/freezer.h>
20 #include <linux/kthread.h>
21 #include <linux/rcupdate.h>
22 #include <linux/smp.h>
23 #include <linux/tick.h>
24 
25 #include <asm/irq.h>
26 /*
27    - No shared variables, all the data are CPU local.
28    - If a softirq needs serialization, let it serialize itself
29      by its own spinlocks.
30    - Even if softirq is serialized, only local cpu is marked for
31      execution. Hence, we get something sort of weak cpu binding.
32      Though it is still not clear, will it result in better locality
33      or will not.
34 
35    Examples:
36    - NET RX softirq. It is multithreaded and does not require
37      any global serialization.
38    - NET TX softirq. It kicks software netdevice queues, hence
39      it is logically serialized per device, but this serialization
40      is invisible to common code.
41    - Tasklets: serialized wrt itself.
42  */
43 
44 #ifndef __ARCH_IRQ_STAT
45 irq_cpustat_t irq_stat[NR_CPUS] ____cacheline_aligned;
46 EXPORT_SYMBOL(irq_stat);
47 #endif
48 
49 static struct softirq_action softirq_vec[32] __cacheline_aligned_in_smp;
50 
51 static DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
52 
53 /*
54  * we cannot loop indefinitely here to avoid userspace starvation,
55  * but we also don't want to introduce a worst case 1/HZ latency
56  * to the pending events, so lets the scheduler to balance
57  * the softirq load for us.
58  */
59 static inline void wakeup_softirqd(void)
60 {
61 	/* Interrupts are disabled: no need to stop preemption */
62 	struct task_struct *tsk = __get_cpu_var(ksoftirqd);
63 
64 	if (tsk && tsk->state != TASK_RUNNING)
65 		wake_up_process(tsk);
66 }
67 
68 /*
69  * This one is for softirq.c-internal use,
70  * where hardirqs are disabled legitimately:
71  */
72 #ifdef CONFIG_TRACE_IRQFLAGS
73 static void __local_bh_disable(unsigned long ip)
74 {
75 	unsigned long flags;
76 
77 	WARN_ON_ONCE(in_irq());
78 
79 	raw_local_irq_save(flags);
80 	add_preempt_count(SOFTIRQ_OFFSET);
81 	/*
82 	 * Were softirqs turned off above:
83 	 */
84 	if (softirq_count() == SOFTIRQ_OFFSET)
85 		trace_softirqs_off(ip);
86 	raw_local_irq_restore(flags);
87 }
88 #else /* !CONFIG_TRACE_IRQFLAGS */
89 static inline void __local_bh_disable(unsigned long ip)
90 {
91 	add_preempt_count(SOFTIRQ_OFFSET);
92 	barrier();
93 }
94 #endif /* CONFIG_TRACE_IRQFLAGS */
95 
96 void local_bh_disable(void)
97 {
98 	__local_bh_disable((unsigned long)__builtin_return_address(0));
99 }
100 
101 EXPORT_SYMBOL(local_bh_disable);
102 
103 void __local_bh_enable(void)
104 {
105 	WARN_ON_ONCE(in_irq());
106 
107 	/*
108 	 * softirqs should never be enabled by __local_bh_enable(),
109 	 * it always nests inside local_bh_enable() sections:
110 	 */
111 	WARN_ON_ONCE(softirq_count() == SOFTIRQ_OFFSET);
112 
113 	sub_preempt_count(SOFTIRQ_OFFSET);
114 }
115 EXPORT_SYMBOL_GPL(__local_bh_enable);
116 
117 /*
118  * Special-case - softirqs can safely be enabled in
119  * cond_resched_softirq(), or by __do_softirq(),
120  * without processing still-pending softirqs:
121  */
122 void _local_bh_enable(void)
123 {
124 	WARN_ON_ONCE(in_irq());
125 	WARN_ON_ONCE(!irqs_disabled());
126 
127 	if (softirq_count() == SOFTIRQ_OFFSET)
128 		trace_softirqs_on((unsigned long)__builtin_return_address(0));
129 	sub_preempt_count(SOFTIRQ_OFFSET);
130 }
131 
132 EXPORT_SYMBOL(_local_bh_enable);
133 
134 void local_bh_enable(void)
135 {
136 #ifdef CONFIG_TRACE_IRQFLAGS
137 	unsigned long flags;
138 
139 	WARN_ON_ONCE(in_irq());
140 #endif
141 	WARN_ON_ONCE(irqs_disabled());
142 
143 #ifdef CONFIG_TRACE_IRQFLAGS
144 	local_irq_save(flags);
145 #endif
146 	/*
147 	 * Are softirqs going to be turned on now:
148 	 */
149 	if (softirq_count() == SOFTIRQ_OFFSET)
150 		trace_softirqs_on((unsigned long)__builtin_return_address(0));
151 	/*
152 	 * Keep preemption disabled until we are done with
153 	 * softirq processing:
154  	 */
155  	sub_preempt_count(SOFTIRQ_OFFSET - 1);
156 
157 	if (unlikely(!in_interrupt() && local_softirq_pending()))
158 		do_softirq();
159 
160 	dec_preempt_count();
161 #ifdef CONFIG_TRACE_IRQFLAGS
162 	local_irq_restore(flags);
163 #endif
164 	preempt_check_resched();
165 }
166 EXPORT_SYMBOL(local_bh_enable);
167 
168 void local_bh_enable_ip(unsigned long ip)
169 {
170 #ifdef CONFIG_TRACE_IRQFLAGS
171 	unsigned long flags;
172 
173 	WARN_ON_ONCE(in_irq());
174 
175 	local_irq_save(flags);
176 #endif
177 	/*
178 	 * Are softirqs going to be turned on now:
179 	 */
180 	if (softirq_count() == SOFTIRQ_OFFSET)
181 		trace_softirqs_on(ip);
182 	/*
183 	 * Keep preemption disabled until we are done with
184 	 * softirq processing:
185  	 */
186  	sub_preempt_count(SOFTIRQ_OFFSET - 1);
187 
188 	if (unlikely(!in_interrupt() && local_softirq_pending()))
189 		do_softirq();
190 
191 	dec_preempt_count();
192 #ifdef CONFIG_TRACE_IRQFLAGS
193 	local_irq_restore(flags);
194 #endif
195 	preempt_check_resched();
196 }
197 EXPORT_SYMBOL(local_bh_enable_ip);
198 
199 /*
200  * We restart softirq processing MAX_SOFTIRQ_RESTART times,
201  * and we fall back to softirqd after that.
202  *
203  * This number has been established via experimentation.
204  * The two things to balance is latency against fairness -
205  * we want to handle softirqs as soon as possible, but they
206  * should not be able to lock up the box.
207  */
208 #define MAX_SOFTIRQ_RESTART 10
209 
210 asmlinkage void __do_softirq(void)
211 {
212 	struct softirq_action *h;
213 	__u32 pending;
214 	int max_restart = MAX_SOFTIRQ_RESTART;
215 	int cpu;
216 
217 	pending = local_softirq_pending();
218 	account_system_vtime(current);
219 
220 	__local_bh_disable((unsigned long)__builtin_return_address(0));
221 	trace_softirq_enter();
222 
223 	cpu = smp_processor_id();
224 restart:
225 	/* Reset the pending bitmask before enabling irqs */
226 	set_softirq_pending(0);
227 
228 	local_irq_enable();
229 
230 	h = softirq_vec;
231 
232 	do {
233 		if (pending & 1) {
234 			h->action(h);
235 			rcu_bh_qsctr_inc(cpu);
236 		}
237 		h++;
238 		pending >>= 1;
239 	} while (pending);
240 
241 	local_irq_disable();
242 
243 	pending = local_softirq_pending();
244 	if (pending && --max_restart)
245 		goto restart;
246 
247 	if (pending)
248 		wakeup_softirqd();
249 
250 	trace_softirq_exit();
251 
252 	account_system_vtime(current);
253 	_local_bh_enable();
254 }
255 
256 #ifndef __ARCH_HAS_DO_SOFTIRQ
257 
258 asmlinkage void do_softirq(void)
259 {
260 	__u32 pending;
261 	unsigned long flags;
262 
263 	if (in_interrupt())
264 		return;
265 
266 	local_irq_save(flags);
267 
268 	pending = local_softirq_pending();
269 
270 	if (pending)
271 		__do_softirq();
272 
273 	local_irq_restore(flags);
274 }
275 
276 #endif
277 
278 /*
279  * Enter an interrupt context.
280  */
281 void irq_enter(void)
282 {
283 #ifdef CONFIG_NO_HZ
284 	int cpu = smp_processor_id();
285 	if (idle_cpu(cpu) && !in_interrupt())
286 		tick_nohz_stop_idle(cpu);
287 #endif
288 	__irq_enter();
289 #ifdef CONFIG_NO_HZ
290 	if (idle_cpu(cpu))
291 		tick_nohz_update_jiffies();
292 #endif
293 }
294 
295 #ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED
296 # define invoke_softirq()	__do_softirq()
297 #else
298 # define invoke_softirq()	do_softirq()
299 #endif
300 
301 /*
302  * Exit an interrupt context. Process softirqs if needed and possible:
303  */
304 void irq_exit(void)
305 {
306 	account_system_vtime(current);
307 	trace_hardirq_exit();
308 	sub_preempt_count(IRQ_EXIT_OFFSET);
309 	if (!in_interrupt() && local_softirq_pending())
310 		invoke_softirq();
311 
312 #ifdef CONFIG_NO_HZ
313 	/* Make sure that timer wheel updates are propagated */
314 	if (!in_interrupt() && idle_cpu(smp_processor_id()) && !need_resched())
315 		tick_nohz_stop_sched_tick();
316 #endif
317 	preempt_enable_no_resched();
318 }
319 
320 /*
321  * This function must run with irqs disabled!
322  */
323 inline void raise_softirq_irqoff(unsigned int nr)
324 {
325 	__raise_softirq_irqoff(nr);
326 
327 	/*
328 	 * If we're in an interrupt or softirq, we're done
329 	 * (this also catches softirq-disabled code). We will
330 	 * actually run the softirq once we return from
331 	 * the irq or softirq.
332 	 *
333 	 * Otherwise we wake up ksoftirqd to make sure we
334 	 * schedule the softirq soon.
335 	 */
336 	if (!in_interrupt())
337 		wakeup_softirqd();
338 }
339 
340 void raise_softirq(unsigned int nr)
341 {
342 	unsigned long flags;
343 
344 	local_irq_save(flags);
345 	raise_softirq_irqoff(nr);
346 	local_irq_restore(flags);
347 }
348 
349 void open_softirq(int nr, void (*action)(struct softirq_action*), void *data)
350 {
351 	softirq_vec[nr].data = data;
352 	softirq_vec[nr].action = action;
353 }
354 
355 /* Tasklets */
356 struct tasklet_head
357 {
358 	struct tasklet_struct *list;
359 };
360 
361 /* Some compilers disobey section attribute on statics when not
362    initialized -- RR */
363 static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec) = { NULL };
364 static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec) = { NULL };
365 
366 void __tasklet_schedule(struct tasklet_struct *t)
367 {
368 	unsigned long flags;
369 
370 	local_irq_save(flags);
371 	t->next = __get_cpu_var(tasklet_vec).list;
372 	__get_cpu_var(tasklet_vec).list = t;
373 	raise_softirq_irqoff(TASKLET_SOFTIRQ);
374 	local_irq_restore(flags);
375 }
376 
377 EXPORT_SYMBOL(__tasklet_schedule);
378 
379 void __tasklet_hi_schedule(struct tasklet_struct *t)
380 {
381 	unsigned long flags;
382 
383 	local_irq_save(flags);
384 	t->next = __get_cpu_var(tasklet_hi_vec).list;
385 	__get_cpu_var(tasklet_hi_vec).list = t;
386 	raise_softirq_irqoff(HI_SOFTIRQ);
387 	local_irq_restore(flags);
388 }
389 
390 EXPORT_SYMBOL(__tasklet_hi_schedule);
391 
392 static void tasklet_action(struct softirq_action *a)
393 {
394 	struct tasklet_struct *list;
395 
396 	local_irq_disable();
397 	list = __get_cpu_var(tasklet_vec).list;
398 	__get_cpu_var(tasklet_vec).list = NULL;
399 	local_irq_enable();
400 
401 	while (list) {
402 		struct tasklet_struct *t = list;
403 
404 		list = list->next;
405 
406 		if (tasklet_trylock(t)) {
407 			if (!atomic_read(&t->count)) {
408 				if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
409 					BUG();
410 				t->func(t->data);
411 				tasklet_unlock(t);
412 				continue;
413 			}
414 			tasklet_unlock(t);
415 		}
416 
417 		local_irq_disable();
418 		t->next = __get_cpu_var(tasklet_vec).list;
419 		__get_cpu_var(tasklet_vec).list = t;
420 		__raise_softirq_irqoff(TASKLET_SOFTIRQ);
421 		local_irq_enable();
422 	}
423 }
424 
425 static void tasklet_hi_action(struct softirq_action *a)
426 {
427 	struct tasklet_struct *list;
428 
429 	local_irq_disable();
430 	list = __get_cpu_var(tasklet_hi_vec).list;
431 	__get_cpu_var(tasklet_hi_vec).list = NULL;
432 	local_irq_enable();
433 
434 	while (list) {
435 		struct tasklet_struct *t = list;
436 
437 		list = list->next;
438 
439 		if (tasklet_trylock(t)) {
440 			if (!atomic_read(&t->count)) {
441 				if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
442 					BUG();
443 				t->func(t->data);
444 				tasklet_unlock(t);
445 				continue;
446 			}
447 			tasklet_unlock(t);
448 		}
449 
450 		local_irq_disable();
451 		t->next = __get_cpu_var(tasklet_hi_vec).list;
452 		__get_cpu_var(tasklet_hi_vec).list = t;
453 		__raise_softirq_irqoff(HI_SOFTIRQ);
454 		local_irq_enable();
455 	}
456 }
457 
458 
459 void tasklet_init(struct tasklet_struct *t,
460 		  void (*func)(unsigned long), unsigned long data)
461 {
462 	t->next = NULL;
463 	t->state = 0;
464 	atomic_set(&t->count, 0);
465 	t->func = func;
466 	t->data = data;
467 }
468 
469 EXPORT_SYMBOL(tasklet_init);
470 
471 void tasklet_kill(struct tasklet_struct *t)
472 {
473 	if (in_interrupt())
474 		printk("Attempt to kill tasklet from interrupt\n");
475 
476 	while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) {
477 		do
478 			yield();
479 		while (test_bit(TASKLET_STATE_SCHED, &t->state));
480 	}
481 	tasklet_unlock_wait(t);
482 	clear_bit(TASKLET_STATE_SCHED, &t->state);
483 }
484 
485 EXPORT_SYMBOL(tasklet_kill);
486 
487 void __init softirq_init(void)
488 {
489 	open_softirq(TASKLET_SOFTIRQ, tasklet_action, NULL);
490 	open_softirq(HI_SOFTIRQ, tasklet_hi_action, NULL);
491 }
492 
493 static int ksoftirqd(void * __bind_cpu)
494 {
495 	set_current_state(TASK_INTERRUPTIBLE);
496 
497 	while (!kthread_should_stop()) {
498 		preempt_disable();
499 		if (!local_softirq_pending()) {
500 			preempt_enable_no_resched();
501 			schedule();
502 			preempt_disable();
503 		}
504 
505 		__set_current_state(TASK_RUNNING);
506 
507 		while (local_softirq_pending()) {
508 			/* Preempt disable stops cpu going offline.
509 			   If already offline, we'll be on wrong CPU:
510 			   don't process */
511 			if (cpu_is_offline((long)__bind_cpu))
512 				goto wait_to_die;
513 			do_softirq();
514 			preempt_enable_no_resched();
515 			cond_resched();
516 			preempt_disable();
517 		}
518 		preempt_enable();
519 		set_current_state(TASK_INTERRUPTIBLE);
520 	}
521 	__set_current_state(TASK_RUNNING);
522 	return 0;
523 
524 wait_to_die:
525 	preempt_enable();
526 	/* Wait for kthread_stop */
527 	set_current_state(TASK_INTERRUPTIBLE);
528 	while (!kthread_should_stop()) {
529 		schedule();
530 		set_current_state(TASK_INTERRUPTIBLE);
531 	}
532 	__set_current_state(TASK_RUNNING);
533 	return 0;
534 }
535 
536 #ifdef CONFIG_HOTPLUG_CPU
537 /*
538  * tasklet_kill_immediate is called to remove a tasklet which can already be
539  * scheduled for execution on @cpu.
540  *
541  * Unlike tasklet_kill, this function removes the tasklet
542  * _immediately_, even if the tasklet is in TASKLET_STATE_SCHED state.
543  *
544  * When this function is called, @cpu must be in the CPU_DEAD state.
545  */
546 void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu)
547 {
548 	struct tasklet_struct **i;
549 
550 	BUG_ON(cpu_online(cpu));
551 	BUG_ON(test_bit(TASKLET_STATE_RUN, &t->state));
552 
553 	if (!test_bit(TASKLET_STATE_SCHED, &t->state))
554 		return;
555 
556 	/* CPU is dead, so no lock needed. */
557 	for (i = &per_cpu(tasklet_vec, cpu).list; *i; i = &(*i)->next) {
558 		if (*i == t) {
559 			*i = t->next;
560 			return;
561 		}
562 	}
563 	BUG();
564 }
565 
566 static void takeover_tasklets(unsigned int cpu)
567 {
568 	struct tasklet_struct **i;
569 
570 	/* CPU is dead, so no lock needed. */
571 	local_irq_disable();
572 
573 	/* Find end, append list for that CPU. */
574 	for (i = &__get_cpu_var(tasklet_vec).list; *i; i = &(*i)->next);
575 	*i = per_cpu(tasklet_vec, cpu).list;
576 	per_cpu(tasklet_vec, cpu).list = NULL;
577 	raise_softirq_irqoff(TASKLET_SOFTIRQ);
578 
579 	for (i = &__get_cpu_var(tasklet_hi_vec).list; *i; i = &(*i)->next);
580 	*i = per_cpu(tasklet_hi_vec, cpu).list;
581 	per_cpu(tasklet_hi_vec, cpu).list = NULL;
582 	raise_softirq_irqoff(HI_SOFTIRQ);
583 
584 	local_irq_enable();
585 }
586 #endif /* CONFIG_HOTPLUG_CPU */
587 
588 static int __cpuinit cpu_callback(struct notifier_block *nfb,
589 				  unsigned long action,
590 				  void *hcpu)
591 {
592 	int hotcpu = (unsigned long)hcpu;
593 	struct task_struct *p;
594 
595 	switch (action) {
596 	case CPU_UP_PREPARE:
597 	case CPU_UP_PREPARE_FROZEN:
598 		p = kthread_create(ksoftirqd, hcpu, "ksoftirqd/%d", hotcpu);
599 		if (IS_ERR(p)) {
600 			printk("ksoftirqd for %i failed\n", hotcpu);
601 			return NOTIFY_BAD;
602 		}
603 		kthread_bind(p, hotcpu);
604   		per_cpu(ksoftirqd, hotcpu) = p;
605  		break;
606 	case CPU_ONLINE:
607 	case CPU_ONLINE_FROZEN:
608 		wake_up_process(per_cpu(ksoftirqd, hotcpu));
609 		break;
610 #ifdef CONFIG_HOTPLUG_CPU
611 	case CPU_UP_CANCELED:
612 	case CPU_UP_CANCELED_FROZEN:
613 		if (!per_cpu(ksoftirqd, hotcpu))
614 			break;
615 		/* Unbind so it can run.  Fall thru. */
616 		kthread_bind(per_cpu(ksoftirqd, hotcpu),
617 			     any_online_cpu(cpu_online_map));
618 	case CPU_DEAD:
619 	case CPU_DEAD_FROZEN: {
620 		struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
621 
622 		p = per_cpu(ksoftirqd, hotcpu);
623 		per_cpu(ksoftirqd, hotcpu) = NULL;
624 		sched_setscheduler(p, SCHED_FIFO, &param);
625 		kthread_stop(p);
626 		takeover_tasklets(hotcpu);
627 		break;
628 	}
629 #endif /* CONFIG_HOTPLUG_CPU */
630  	}
631 	return NOTIFY_OK;
632 }
633 
634 static struct notifier_block __cpuinitdata cpu_nfb = {
635 	.notifier_call = cpu_callback
636 };
637 
638 __init int spawn_ksoftirqd(void)
639 {
640 	void *cpu = (void *)(long)smp_processor_id();
641 	int err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
642 
643 	BUG_ON(err == NOTIFY_BAD);
644 	cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
645 	register_cpu_notifier(&cpu_nfb);
646 	return 0;
647 }
648 
649 #ifdef CONFIG_SMP
650 /*
651  * Call a function on all processors
652  */
653 int on_each_cpu(void (*func) (void *info), void *info, int retry, int wait)
654 {
655 	int ret = 0;
656 
657 	preempt_disable();
658 	ret = smp_call_function(func, info, retry, wait);
659 	local_irq_disable();
660 	func(info);
661 	local_irq_enable();
662 	preempt_enable();
663 	return ret;
664 }
665 EXPORT_SYMBOL(on_each_cpu);
666 #endif
667