xref: /linux/kernel/softirq.c (revision 8b4a40809e5330c9da5d20107d693d92d73b31dc)
1 /*
2  *	linux/kernel/softirq.c
3  *
4  *	Copyright (C) 1992 Linus Torvalds
5  *
6  * Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903)
7  */
8 
9 #include <linux/module.h>
10 #include <linux/kernel_stat.h>
11 #include <linux/interrupt.h>
12 #include <linux/init.h>
13 #include <linux/mm.h>
14 #include <linux/notifier.h>
15 #include <linux/percpu.h>
16 #include <linux/cpu.h>
17 #include <linux/freezer.h>
18 #include <linux/kthread.h>
19 #include <linux/rcupdate.h>
20 #include <linux/smp.h>
21 #include <linux/tick.h>
22 
23 #include <asm/irq.h>
24 /*
25    - No shared variables, all the data are CPU local.
26    - If a softirq needs serialization, let it serialize itself
27      by its own spinlocks.
28    - Even if softirq is serialized, only local cpu is marked for
29      execution. Hence, we get something sort of weak cpu binding.
30      Though it is still not clear, will it result in better locality
31      or will not.
32 
33    Examples:
34    - NET RX softirq. It is multithreaded and does not require
35      any global serialization.
36    - NET TX softirq. It kicks software netdevice queues, hence
37      it is logically serialized per device, but this serialization
38      is invisible to common code.
39    - Tasklets: serialized wrt itself.
40  */
41 
42 #ifndef __ARCH_IRQ_STAT
43 irq_cpustat_t irq_stat[NR_CPUS] ____cacheline_aligned;
44 EXPORT_SYMBOL(irq_stat);
45 #endif
46 
47 static struct softirq_action softirq_vec[32] __cacheline_aligned_in_smp;
48 
49 static DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
50 
51 /*
52  * we cannot loop indefinitely here to avoid userspace starvation,
53  * but we also don't want to introduce a worst case 1/HZ latency
54  * to the pending events, so lets the scheduler to balance
55  * the softirq load for us.
56  */
57 static inline void wakeup_softirqd(void)
58 {
59 	/* Interrupts are disabled: no need to stop preemption */
60 	struct task_struct *tsk = __get_cpu_var(ksoftirqd);
61 
62 	if (tsk && tsk->state != TASK_RUNNING)
63 		wake_up_process(tsk);
64 }
65 
66 /*
67  * This one is for softirq.c-internal use,
68  * where hardirqs are disabled legitimately:
69  */
70 #ifdef CONFIG_TRACE_IRQFLAGS
71 static void __local_bh_disable(unsigned long ip)
72 {
73 	unsigned long flags;
74 
75 	WARN_ON_ONCE(in_irq());
76 
77 	raw_local_irq_save(flags);
78 	add_preempt_count(SOFTIRQ_OFFSET);
79 	/*
80 	 * Were softirqs turned off above:
81 	 */
82 	if (softirq_count() == SOFTIRQ_OFFSET)
83 		trace_softirqs_off(ip);
84 	raw_local_irq_restore(flags);
85 }
86 #else /* !CONFIG_TRACE_IRQFLAGS */
87 static inline void __local_bh_disable(unsigned long ip)
88 {
89 	add_preempt_count(SOFTIRQ_OFFSET);
90 	barrier();
91 }
92 #endif /* CONFIG_TRACE_IRQFLAGS */
93 
94 void local_bh_disable(void)
95 {
96 	__local_bh_disable((unsigned long)__builtin_return_address(0));
97 }
98 
99 EXPORT_SYMBOL(local_bh_disable);
100 
101 void __local_bh_enable(void)
102 {
103 	WARN_ON_ONCE(in_irq());
104 
105 	/*
106 	 * softirqs should never be enabled by __local_bh_enable(),
107 	 * it always nests inside local_bh_enable() sections:
108 	 */
109 	WARN_ON_ONCE(softirq_count() == SOFTIRQ_OFFSET);
110 
111 	sub_preempt_count(SOFTIRQ_OFFSET);
112 }
113 EXPORT_SYMBOL_GPL(__local_bh_enable);
114 
115 /*
116  * Special-case - softirqs can safely be enabled in
117  * cond_resched_softirq(), or by __do_softirq(),
118  * without processing still-pending softirqs:
119  */
120 void _local_bh_enable(void)
121 {
122 	WARN_ON_ONCE(in_irq());
123 	WARN_ON_ONCE(!irqs_disabled());
124 
125 	if (softirq_count() == SOFTIRQ_OFFSET)
126 		trace_softirqs_on((unsigned long)__builtin_return_address(0));
127 	sub_preempt_count(SOFTIRQ_OFFSET);
128 }
129 
130 EXPORT_SYMBOL(_local_bh_enable);
131 
132 void local_bh_enable(void)
133 {
134 #ifdef CONFIG_TRACE_IRQFLAGS
135 	unsigned long flags;
136 
137 	WARN_ON_ONCE(in_irq());
138 #endif
139 	WARN_ON_ONCE(irqs_disabled());
140 
141 #ifdef CONFIG_TRACE_IRQFLAGS
142 	local_irq_save(flags);
143 #endif
144 	/*
145 	 * Are softirqs going to be turned on now:
146 	 */
147 	if (softirq_count() == SOFTIRQ_OFFSET)
148 		trace_softirqs_on((unsigned long)__builtin_return_address(0));
149 	/*
150 	 * Keep preemption disabled until we are done with
151 	 * softirq processing:
152  	 */
153  	sub_preempt_count(SOFTIRQ_OFFSET - 1);
154 
155 	if (unlikely(!in_interrupt() && local_softirq_pending()))
156 		do_softirq();
157 
158 	dec_preempt_count();
159 #ifdef CONFIG_TRACE_IRQFLAGS
160 	local_irq_restore(flags);
161 #endif
162 	preempt_check_resched();
163 }
164 EXPORT_SYMBOL(local_bh_enable);
165 
166 void local_bh_enable_ip(unsigned long ip)
167 {
168 #ifdef CONFIG_TRACE_IRQFLAGS
169 	unsigned long flags;
170 
171 	WARN_ON_ONCE(in_irq());
172 
173 	local_irq_save(flags);
174 #endif
175 	/*
176 	 * Are softirqs going to be turned on now:
177 	 */
178 	if (softirq_count() == SOFTIRQ_OFFSET)
179 		trace_softirqs_on(ip);
180 	/*
181 	 * Keep preemption disabled until we are done with
182 	 * softirq processing:
183  	 */
184  	sub_preempt_count(SOFTIRQ_OFFSET - 1);
185 
186 	if (unlikely(!in_interrupt() && local_softirq_pending()))
187 		do_softirq();
188 
189 	dec_preempt_count();
190 #ifdef CONFIG_TRACE_IRQFLAGS
191 	local_irq_restore(flags);
192 #endif
193 	preempt_check_resched();
194 }
195 EXPORT_SYMBOL(local_bh_enable_ip);
196 
197 /*
198  * We restart softirq processing MAX_SOFTIRQ_RESTART times,
199  * and we fall back to softirqd after that.
200  *
201  * This number has been established via experimentation.
202  * The two things to balance is latency against fairness -
203  * we want to handle softirqs as soon as possible, but they
204  * should not be able to lock up the box.
205  */
206 #define MAX_SOFTIRQ_RESTART 10
207 
208 asmlinkage void __do_softirq(void)
209 {
210 	struct softirq_action *h;
211 	__u32 pending;
212 	int max_restart = MAX_SOFTIRQ_RESTART;
213 	int cpu;
214 
215 	pending = local_softirq_pending();
216 	account_system_vtime(current);
217 
218 	__local_bh_disable((unsigned long)__builtin_return_address(0));
219 	trace_softirq_enter();
220 
221 	cpu = smp_processor_id();
222 restart:
223 	/* Reset the pending bitmask before enabling irqs */
224 	set_softirq_pending(0);
225 
226 	local_irq_enable();
227 
228 	h = softirq_vec;
229 
230 	do {
231 		if (pending & 1) {
232 			h->action(h);
233 			rcu_bh_qsctr_inc(cpu);
234 		}
235 		h++;
236 		pending >>= 1;
237 	} while (pending);
238 
239 	local_irq_disable();
240 
241 	pending = local_softirq_pending();
242 	if (pending && --max_restart)
243 		goto restart;
244 
245 	if (pending)
246 		wakeup_softirqd();
247 
248 	trace_softirq_exit();
249 
250 	account_system_vtime(current);
251 	_local_bh_enable();
252 }
253 
254 #ifndef __ARCH_HAS_DO_SOFTIRQ
255 
256 asmlinkage void do_softirq(void)
257 {
258 	__u32 pending;
259 	unsigned long flags;
260 
261 	if (in_interrupt())
262 		return;
263 
264 	local_irq_save(flags);
265 
266 	pending = local_softirq_pending();
267 
268 	if (pending)
269 		__do_softirq();
270 
271 	local_irq_restore(flags);
272 }
273 
274 EXPORT_SYMBOL(do_softirq);
275 
276 #endif
277 
278 /*
279  * Enter an interrupt context.
280  */
281 void irq_enter(void)
282 {
283 	__irq_enter();
284 #ifdef CONFIG_NO_HZ
285 	if (idle_cpu(smp_processor_id()))
286 		tick_nohz_update_jiffies();
287 #endif
288 }
289 
290 #ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED
291 # define invoke_softirq()	__do_softirq()
292 #else
293 # define invoke_softirq()	do_softirq()
294 #endif
295 
296 /*
297  * Exit an interrupt context. Process softirqs if needed and possible:
298  */
299 void irq_exit(void)
300 {
301 	account_system_vtime(current);
302 	trace_hardirq_exit();
303 	sub_preempt_count(IRQ_EXIT_OFFSET);
304 	if (!in_interrupt() && local_softirq_pending())
305 		invoke_softirq();
306 
307 #ifdef CONFIG_NO_HZ
308 	/* Make sure that timer wheel updates are propagated */
309 	if (!in_interrupt() && idle_cpu(smp_processor_id()) && !need_resched())
310 		tick_nohz_stop_sched_tick();
311 #endif
312 	preempt_enable_no_resched();
313 }
314 
315 /*
316  * This function must run with irqs disabled!
317  */
318 inline fastcall void raise_softirq_irqoff(unsigned int nr)
319 {
320 	__raise_softirq_irqoff(nr);
321 
322 	/*
323 	 * If we're in an interrupt or softirq, we're done
324 	 * (this also catches softirq-disabled code). We will
325 	 * actually run the softirq once we return from
326 	 * the irq or softirq.
327 	 *
328 	 * Otherwise we wake up ksoftirqd to make sure we
329 	 * schedule the softirq soon.
330 	 */
331 	if (!in_interrupt())
332 		wakeup_softirqd();
333 }
334 
335 EXPORT_SYMBOL(raise_softirq_irqoff);
336 
337 void fastcall raise_softirq(unsigned int nr)
338 {
339 	unsigned long flags;
340 
341 	local_irq_save(flags);
342 	raise_softirq_irqoff(nr);
343 	local_irq_restore(flags);
344 }
345 
346 void open_softirq(int nr, void (*action)(struct softirq_action*), void *data)
347 {
348 	softirq_vec[nr].data = data;
349 	softirq_vec[nr].action = action;
350 }
351 
352 /* Tasklets */
353 struct tasklet_head
354 {
355 	struct tasklet_struct *list;
356 };
357 
358 /* Some compilers disobey section attribute on statics when not
359    initialized -- RR */
360 static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec) = { NULL };
361 static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec) = { NULL };
362 
363 void fastcall __tasklet_schedule(struct tasklet_struct *t)
364 {
365 	unsigned long flags;
366 
367 	local_irq_save(flags);
368 	t->next = __get_cpu_var(tasklet_vec).list;
369 	__get_cpu_var(tasklet_vec).list = t;
370 	raise_softirq_irqoff(TASKLET_SOFTIRQ);
371 	local_irq_restore(flags);
372 }
373 
374 EXPORT_SYMBOL(__tasklet_schedule);
375 
376 void fastcall __tasklet_hi_schedule(struct tasklet_struct *t)
377 {
378 	unsigned long flags;
379 
380 	local_irq_save(flags);
381 	t->next = __get_cpu_var(tasklet_hi_vec).list;
382 	__get_cpu_var(tasklet_hi_vec).list = t;
383 	raise_softirq_irqoff(HI_SOFTIRQ);
384 	local_irq_restore(flags);
385 }
386 
387 EXPORT_SYMBOL(__tasklet_hi_schedule);
388 
389 static void tasklet_action(struct softirq_action *a)
390 {
391 	struct tasklet_struct *list;
392 
393 	local_irq_disable();
394 	list = __get_cpu_var(tasklet_vec).list;
395 	__get_cpu_var(tasklet_vec).list = NULL;
396 	local_irq_enable();
397 
398 	while (list) {
399 		struct tasklet_struct *t = list;
400 
401 		list = list->next;
402 
403 		if (tasklet_trylock(t)) {
404 			if (!atomic_read(&t->count)) {
405 				if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
406 					BUG();
407 				t->func(t->data);
408 				tasklet_unlock(t);
409 				continue;
410 			}
411 			tasklet_unlock(t);
412 		}
413 
414 		local_irq_disable();
415 		t->next = __get_cpu_var(tasklet_vec).list;
416 		__get_cpu_var(tasklet_vec).list = t;
417 		__raise_softirq_irqoff(TASKLET_SOFTIRQ);
418 		local_irq_enable();
419 	}
420 }
421 
422 static void tasklet_hi_action(struct softirq_action *a)
423 {
424 	struct tasklet_struct *list;
425 
426 	local_irq_disable();
427 	list = __get_cpu_var(tasklet_hi_vec).list;
428 	__get_cpu_var(tasklet_hi_vec).list = NULL;
429 	local_irq_enable();
430 
431 	while (list) {
432 		struct tasklet_struct *t = list;
433 
434 		list = list->next;
435 
436 		if (tasklet_trylock(t)) {
437 			if (!atomic_read(&t->count)) {
438 				if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
439 					BUG();
440 				t->func(t->data);
441 				tasklet_unlock(t);
442 				continue;
443 			}
444 			tasklet_unlock(t);
445 		}
446 
447 		local_irq_disable();
448 		t->next = __get_cpu_var(tasklet_hi_vec).list;
449 		__get_cpu_var(tasklet_hi_vec).list = t;
450 		__raise_softirq_irqoff(HI_SOFTIRQ);
451 		local_irq_enable();
452 	}
453 }
454 
455 
456 void tasklet_init(struct tasklet_struct *t,
457 		  void (*func)(unsigned long), unsigned long data)
458 {
459 	t->next = NULL;
460 	t->state = 0;
461 	atomic_set(&t->count, 0);
462 	t->func = func;
463 	t->data = data;
464 }
465 
466 EXPORT_SYMBOL(tasklet_init);
467 
468 void tasklet_kill(struct tasklet_struct *t)
469 {
470 	if (in_interrupt())
471 		printk("Attempt to kill tasklet from interrupt\n");
472 
473 	while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) {
474 		do
475 			yield();
476 		while (test_bit(TASKLET_STATE_SCHED, &t->state));
477 	}
478 	tasklet_unlock_wait(t);
479 	clear_bit(TASKLET_STATE_SCHED, &t->state);
480 }
481 
482 EXPORT_SYMBOL(tasklet_kill);
483 
484 void __init softirq_init(void)
485 {
486 	open_softirq(TASKLET_SOFTIRQ, tasklet_action, NULL);
487 	open_softirq(HI_SOFTIRQ, tasklet_hi_action, NULL);
488 }
489 
490 static int ksoftirqd(void * __bind_cpu)
491 {
492 	set_current_state(TASK_INTERRUPTIBLE);
493 
494 	while (!kthread_should_stop()) {
495 		preempt_disable();
496 		if (!local_softirq_pending()) {
497 			preempt_enable_no_resched();
498 			schedule();
499 			preempt_disable();
500 		}
501 
502 		__set_current_state(TASK_RUNNING);
503 
504 		while (local_softirq_pending()) {
505 			/* Preempt disable stops cpu going offline.
506 			   If already offline, we'll be on wrong CPU:
507 			   don't process */
508 			if (cpu_is_offline((long)__bind_cpu))
509 				goto wait_to_die;
510 			do_softirq();
511 			preempt_enable_no_resched();
512 			cond_resched();
513 			preempt_disable();
514 		}
515 		preempt_enable();
516 		set_current_state(TASK_INTERRUPTIBLE);
517 	}
518 	__set_current_state(TASK_RUNNING);
519 	return 0;
520 
521 wait_to_die:
522 	preempt_enable();
523 	/* Wait for kthread_stop */
524 	set_current_state(TASK_INTERRUPTIBLE);
525 	while (!kthread_should_stop()) {
526 		schedule();
527 		set_current_state(TASK_INTERRUPTIBLE);
528 	}
529 	__set_current_state(TASK_RUNNING);
530 	return 0;
531 }
532 
533 #ifdef CONFIG_HOTPLUG_CPU
534 /*
535  * tasklet_kill_immediate is called to remove a tasklet which can already be
536  * scheduled for execution on @cpu.
537  *
538  * Unlike tasklet_kill, this function removes the tasklet
539  * _immediately_, even if the tasklet is in TASKLET_STATE_SCHED state.
540  *
541  * When this function is called, @cpu must be in the CPU_DEAD state.
542  */
543 void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu)
544 {
545 	struct tasklet_struct **i;
546 
547 	BUG_ON(cpu_online(cpu));
548 	BUG_ON(test_bit(TASKLET_STATE_RUN, &t->state));
549 
550 	if (!test_bit(TASKLET_STATE_SCHED, &t->state))
551 		return;
552 
553 	/* CPU is dead, so no lock needed. */
554 	for (i = &per_cpu(tasklet_vec, cpu).list; *i; i = &(*i)->next) {
555 		if (*i == t) {
556 			*i = t->next;
557 			return;
558 		}
559 	}
560 	BUG();
561 }
562 
563 static void takeover_tasklets(unsigned int cpu)
564 {
565 	struct tasklet_struct **i;
566 
567 	/* CPU is dead, so no lock needed. */
568 	local_irq_disable();
569 
570 	/* Find end, append list for that CPU. */
571 	for (i = &__get_cpu_var(tasklet_vec).list; *i; i = &(*i)->next);
572 	*i = per_cpu(tasklet_vec, cpu).list;
573 	per_cpu(tasklet_vec, cpu).list = NULL;
574 	raise_softirq_irqoff(TASKLET_SOFTIRQ);
575 
576 	for (i = &__get_cpu_var(tasklet_hi_vec).list; *i; i = &(*i)->next);
577 	*i = per_cpu(tasklet_hi_vec, cpu).list;
578 	per_cpu(tasklet_hi_vec, cpu).list = NULL;
579 	raise_softirq_irqoff(HI_SOFTIRQ);
580 
581 	local_irq_enable();
582 }
583 #endif /* CONFIG_HOTPLUG_CPU */
584 
585 static int __cpuinit cpu_callback(struct notifier_block *nfb,
586 				  unsigned long action,
587 				  void *hcpu)
588 {
589 	int hotcpu = (unsigned long)hcpu;
590 	struct task_struct *p;
591 
592 	switch (action) {
593 	case CPU_UP_PREPARE:
594 	case CPU_UP_PREPARE_FROZEN:
595 		p = kthread_create(ksoftirqd, hcpu, "ksoftirqd/%d", hotcpu);
596 		if (IS_ERR(p)) {
597 			printk("ksoftirqd for %i failed\n", hotcpu);
598 			return NOTIFY_BAD;
599 		}
600 		kthread_bind(p, hotcpu);
601   		per_cpu(ksoftirqd, hotcpu) = p;
602  		break;
603 	case CPU_ONLINE:
604 	case CPU_ONLINE_FROZEN:
605 		wake_up_process(per_cpu(ksoftirqd, hotcpu));
606 		break;
607 #ifdef CONFIG_HOTPLUG_CPU
608 	case CPU_UP_CANCELED:
609 	case CPU_UP_CANCELED_FROZEN:
610 		if (!per_cpu(ksoftirqd, hotcpu))
611 			break;
612 		/* Unbind so it can run.  Fall thru. */
613 		kthread_bind(per_cpu(ksoftirqd, hotcpu),
614 			     any_online_cpu(cpu_online_map));
615 	case CPU_DEAD:
616 	case CPU_DEAD_FROZEN: {
617 		struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
618 
619 		p = per_cpu(ksoftirqd, hotcpu);
620 		per_cpu(ksoftirqd, hotcpu) = NULL;
621 		sched_setscheduler(p, SCHED_FIFO, &param);
622 		kthread_stop(p);
623 		takeover_tasklets(hotcpu);
624 		break;
625 	}
626 #endif /* CONFIG_HOTPLUG_CPU */
627  	}
628 	return NOTIFY_OK;
629 }
630 
631 static struct notifier_block __cpuinitdata cpu_nfb = {
632 	.notifier_call = cpu_callback
633 };
634 
635 __init int spawn_ksoftirqd(void)
636 {
637 	void *cpu = (void *)(long)smp_processor_id();
638 	int err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
639 
640 	BUG_ON(err == NOTIFY_BAD);
641 	cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
642 	register_cpu_notifier(&cpu_nfb);
643 	return 0;
644 }
645 
646 #ifdef CONFIG_SMP
647 /*
648  * Call a function on all processors
649  */
650 int on_each_cpu(void (*func) (void *info), void *info, int retry, int wait)
651 {
652 	int ret = 0;
653 
654 	preempt_disable();
655 	ret = smp_call_function(func, info, retry, wait);
656 	local_irq_disable();
657 	func(info);
658 	local_irq_enable();
659 	preempt_enable();
660 	return ret;
661 }
662 EXPORT_SYMBOL(on_each_cpu);
663 #endif
664