xref: /linux/kernel/softirq.c (revision 6e8331ac6973435b1e7604c30f2ad394035b46e1)
1 /*
2  *	linux/kernel/softirq.c
3  *
4  *	Copyright (C) 1992 Linus Torvalds
5  *
6  * Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903)
7  */
8 
9 #include <linux/module.h>
10 #include <linux/kernel_stat.h>
11 #include <linux/interrupt.h>
12 #include <linux/init.h>
13 #include <linux/mm.h>
14 #include <linux/notifier.h>
15 #include <linux/percpu.h>
16 #include <linux/cpu.h>
17 #include <linux/kthread.h>
18 #include <linux/rcupdate.h>
19 #include <linux/smp.h>
20 
21 #include <asm/irq.h>
22 /*
23    - No shared variables, all the data are CPU local.
24    - If a softirq needs serialization, let it serialize itself
25      by its own spinlocks.
26    - Even if softirq is serialized, only local cpu is marked for
27      execution. Hence, we get something sort of weak cpu binding.
28      Though it is still not clear, will it result in better locality
29      or will not.
30 
31    Examples:
32    - NET RX softirq. It is multithreaded and does not require
33      any global serialization.
34    - NET TX softirq. It kicks software netdevice queues, hence
35      it is logically serialized per device, but this serialization
36      is invisible to common code.
37    - Tasklets: serialized wrt itself.
38  */
39 
40 #ifndef __ARCH_IRQ_STAT
41 irq_cpustat_t irq_stat[NR_CPUS] ____cacheline_aligned;
42 EXPORT_SYMBOL(irq_stat);
43 #endif
44 
45 static struct softirq_action softirq_vec[32] __cacheline_aligned_in_smp;
46 
47 static DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
48 
49 /*
50  * we cannot loop indefinitely here to avoid userspace starvation,
51  * but we also don't want to introduce a worst case 1/HZ latency
52  * to the pending events, so lets the scheduler to balance
53  * the softirq load for us.
54  */
55 static inline void wakeup_softirqd(void)
56 {
57 	/* Interrupts are disabled: no need to stop preemption */
58 	struct task_struct *tsk = __get_cpu_var(ksoftirqd);
59 
60 	if (tsk && tsk->state != TASK_RUNNING)
61 		wake_up_process(tsk);
62 }
63 
64 /*
65  * This one is for softirq.c-internal use,
66  * where hardirqs are disabled legitimately:
67  */
68 #ifdef CONFIG_TRACE_IRQFLAGS
69 static void __local_bh_disable(unsigned long ip)
70 {
71 	unsigned long flags;
72 
73 	WARN_ON_ONCE(in_irq());
74 
75 	raw_local_irq_save(flags);
76 	add_preempt_count(SOFTIRQ_OFFSET);
77 	/*
78 	 * Were softirqs turned off above:
79 	 */
80 	if (softirq_count() == SOFTIRQ_OFFSET)
81 		trace_softirqs_off(ip);
82 	raw_local_irq_restore(flags);
83 }
84 #else /* !CONFIG_TRACE_IRQFLAGS */
85 static inline void __local_bh_disable(unsigned long ip)
86 {
87 	add_preempt_count(SOFTIRQ_OFFSET);
88 	barrier();
89 }
90 #endif /* CONFIG_TRACE_IRQFLAGS */
91 
92 void local_bh_disable(void)
93 {
94 	__local_bh_disable((unsigned long)__builtin_return_address(0));
95 }
96 
97 EXPORT_SYMBOL(local_bh_disable);
98 
99 void __local_bh_enable(void)
100 {
101 	WARN_ON_ONCE(in_irq());
102 
103 	/*
104 	 * softirqs should never be enabled by __local_bh_enable(),
105 	 * it always nests inside local_bh_enable() sections:
106 	 */
107 	WARN_ON_ONCE(softirq_count() == SOFTIRQ_OFFSET);
108 
109 	sub_preempt_count(SOFTIRQ_OFFSET);
110 }
111 EXPORT_SYMBOL_GPL(__local_bh_enable);
112 
113 /*
114  * Special-case - softirqs can safely be enabled in
115  * cond_resched_softirq(), or by __do_softirq(),
116  * without processing still-pending softirqs:
117  */
118 void _local_bh_enable(void)
119 {
120 	WARN_ON_ONCE(in_irq());
121 	WARN_ON_ONCE(!irqs_disabled());
122 
123 	if (softirq_count() == SOFTIRQ_OFFSET)
124 		trace_softirqs_on((unsigned long)__builtin_return_address(0));
125 	sub_preempt_count(SOFTIRQ_OFFSET);
126 }
127 
128 EXPORT_SYMBOL(_local_bh_enable);
129 
130 void local_bh_enable(void)
131 {
132 #ifdef CONFIG_TRACE_IRQFLAGS
133 	unsigned long flags;
134 
135 	WARN_ON_ONCE(in_irq());
136 #endif
137 	WARN_ON_ONCE(irqs_disabled());
138 
139 #ifdef CONFIG_TRACE_IRQFLAGS
140 	local_irq_save(flags);
141 #endif
142 	/*
143 	 * Are softirqs going to be turned on now:
144 	 */
145 	if (softirq_count() == SOFTIRQ_OFFSET)
146 		trace_softirqs_on((unsigned long)__builtin_return_address(0));
147 	/*
148 	 * Keep preemption disabled until we are done with
149 	 * softirq processing:
150  	 */
151  	sub_preempt_count(SOFTIRQ_OFFSET - 1);
152 
153 	if (unlikely(!in_interrupt() && local_softirq_pending()))
154 		do_softirq();
155 
156 	dec_preempt_count();
157 #ifdef CONFIG_TRACE_IRQFLAGS
158 	local_irq_restore(flags);
159 #endif
160 	preempt_check_resched();
161 }
162 EXPORT_SYMBOL(local_bh_enable);
163 
164 void local_bh_enable_ip(unsigned long ip)
165 {
166 #ifdef CONFIG_TRACE_IRQFLAGS
167 	unsigned long flags;
168 
169 	WARN_ON_ONCE(in_irq());
170 
171 	local_irq_save(flags);
172 #endif
173 	/*
174 	 * Are softirqs going to be turned on now:
175 	 */
176 	if (softirq_count() == SOFTIRQ_OFFSET)
177 		trace_softirqs_on(ip);
178 	/*
179 	 * Keep preemption disabled until we are done with
180 	 * softirq processing:
181  	 */
182  	sub_preempt_count(SOFTIRQ_OFFSET - 1);
183 
184 	if (unlikely(!in_interrupt() && local_softirq_pending()))
185 		do_softirq();
186 
187 	dec_preempt_count();
188 #ifdef CONFIG_TRACE_IRQFLAGS
189 	local_irq_restore(flags);
190 #endif
191 	preempt_check_resched();
192 }
193 EXPORT_SYMBOL(local_bh_enable_ip);
194 
195 /*
196  * We restart softirq processing MAX_SOFTIRQ_RESTART times,
197  * and we fall back to softirqd after that.
198  *
199  * This number has been established via experimentation.
200  * The two things to balance is latency against fairness -
201  * we want to handle softirqs as soon as possible, but they
202  * should not be able to lock up the box.
203  */
204 #define MAX_SOFTIRQ_RESTART 10
205 
206 asmlinkage void __do_softirq(void)
207 {
208 	struct softirq_action *h;
209 	__u32 pending;
210 	int max_restart = MAX_SOFTIRQ_RESTART;
211 	int cpu;
212 
213 	pending = local_softirq_pending();
214 	account_system_vtime(current);
215 
216 	__local_bh_disable((unsigned long)__builtin_return_address(0));
217 	trace_softirq_enter();
218 
219 	cpu = smp_processor_id();
220 restart:
221 	/* Reset the pending bitmask before enabling irqs */
222 	set_softirq_pending(0);
223 
224 	local_irq_enable();
225 
226 	h = softirq_vec;
227 
228 	do {
229 		if (pending & 1) {
230 			h->action(h);
231 			rcu_bh_qsctr_inc(cpu);
232 		}
233 		h++;
234 		pending >>= 1;
235 	} while (pending);
236 
237 	local_irq_disable();
238 
239 	pending = local_softirq_pending();
240 	if (pending && --max_restart)
241 		goto restart;
242 
243 	if (pending)
244 		wakeup_softirqd();
245 
246 	trace_softirq_exit();
247 
248 	account_system_vtime(current);
249 	_local_bh_enable();
250 }
251 
252 #ifndef __ARCH_HAS_DO_SOFTIRQ
253 
254 asmlinkage void do_softirq(void)
255 {
256 	__u32 pending;
257 	unsigned long flags;
258 
259 	if (in_interrupt())
260 		return;
261 
262 	local_irq_save(flags);
263 
264 	pending = local_softirq_pending();
265 
266 	if (pending)
267 		__do_softirq();
268 
269 	local_irq_restore(flags);
270 }
271 
272 EXPORT_SYMBOL(do_softirq);
273 
274 #endif
275 
276 #ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED
277 # define invoke_softirq()	__do_softirq()
278 #else
279 # define invoke_softirq()	do_softirq()
280 #endif
281 
282 /*
283  * Exit an interrupt context. Process softirqs if needed and possible:
284  */
285 void irq_exit(void)
286 {
287 	account_system_vtime(current);
288 	trace_hardirq_exit();
289 	sub_preempt_count(IRQ_EXIT_OFFSET);
290 	if (!in_interrupt() && local_softirq_pending())
291 		invoke_softirq();
292 	preempt_enable_no_resched();
293 }
294 
295 /*
296  * This function must run with irqs disabled!
297  */
298 inline fastcall void raise_softirq_irqoff(unsigned int nr)
299 {
300 	__raise_softirq_irqoff(nr);
301 
302 	/*
303 	 * If we're in an interrupt or softirq, we're done
304 	 * (this also catches softirq-disabled code). We will
305 	 * actually run the softirq once we return from
306 	 * the irq or softirq.
307 	 *
308 	 * Otherwise we wake up ksoftirqd to make sure we
309 	 * schedule the softirq soon.
310 	 */
311 	if (!in_interrupt())
312 		wakeup_softirqd();
313 }
314 
315 EXPORT_SYMBOL(raise_softirq_irqoff);
316 
317 void fastcall raise_softirq(unsigned int nr)
318 {
319 	unsigned long flags;
320 
321 	local_irq_save(flags);
322 	raise_softirq_irqoff(nr);
323 	local_irq_restore(flags);
324 }
325 
326 void open_softirq(int nr, void (*action)(struct softirq_action*), void *data)
327 {
328 	softirq_vec[nr].data = data;
329 	softirq_vec[nr].action = action;
330 }
331 
332 /* Tasklets */
333 struct tasklet_head
334 {
335 	struct tasklet_struct *list;
336 };
337 
338 /* Some compilers disobey section attribute on statics when not
339    initialized -- RR */
340 static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec) = { NULL };
341 static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec) = { NULL };
342 
343 void fastcall __tasklet_schedule(struct tasklet_struct *t)
344 {
345 	unsigned long flags;
346 
347 	local_irq_save(flags);
348 	t->next = __get_cpu_var(tasklet_vec).list;
349 	__get_cpu_var(tasklet_vec).list = t;
350 	raise_softirq_irqoff(TASKLET_SOFTIRQ);
351 	local_irq_restore(flags);
352 }
353 
354 EXPORT_SYMBOL(__tasklet_schedule);
355 
356 void fastcall __tasklet_hi_schedule(struct tasklet_struct *t)
357 {
358 	unsigned long flags;
359 
360 	local_irq_save(flags);
361 	t->next = __get_cpu_var(tasklet_hi_vec).list;
362 	__get_cpu_var(tasklet_hi_vec).list = t;
363 	raise_softirq_irqoff(HI_SOFTIRQ);
364 	local_irq_restore(flags);
365 }
366 
367 EXPORT_SYMBOL(__tasklet_hi_schedule);
368 
369 static void tasklet_action(struct softirq_action *a)
370 {
371 	struct tasklet_struct *list;
372 
373 	local_irq_disable();
374 	list = __get_cpu_var(tasklet_vec).list;
375 	__get_cpu_var(tasklet_vec).list = NULL;
376 	local_irq_enable();
377 
378 	while (list) {
379 		struct tasklet_struct *t = list;
380 
381 		list = list->next;
382 
383 		if (tasklet_trylock(t)) {
384 			if (!atomic_read(&t->count)) {
385 				if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
386 					BUG();
387 				t->func(t->data);
388 				tasklet_unlock(t);
389 				continue;
390 			}
391 			tasklet_unlock(t);
392 		}
393 
394 		local_irq_disable();
395 		t->next = __get_cpu_var(tasklet_vec).list;
396 		__get_cpu_var(tasklet_vec).list = t;
397 		__raise_softirq_irqoff(TASKLET_SOFTIRQ);
398 		local_irq_enable();
399 	}
400 }
401 
402 static void tasklet_hi_action(struct softirq_action *a)
403 {
404 	struct tasklet_struct *list;
405 
406 	local_irq_disable();
407 	list = __get_cpu_var(tasklet_hi_vec).list;
408 	__get_cpu_var(tasklet_hi_vec).list = NULL;
409 	local_irq_enable();
410 
411 	while (list) {
412 		struct tasklet_struct *t = list;
413 
414 		list = list->next;
415 
416 		if (tasklet_trylock(t)) {
417 			if (!atomic_read(&t->count)) {
418 				if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
419 					BUG();
420 				t->func(t->data);
421 				tasklet_unlock(t);
422 				continue;
423 			}
424 			tasklet_unlock(t);
425 		}
426 
427 		local_irq_disable();
428 		t->next = __get_cpu_var(tasklet_hi_vec).list;
429 		__get_cpu_var(tasklet_hi_vec).list = t;
430 		__raise_softirq_irqoff(HI_SOFTIRQ);
431 		local_irq_enable();
432 	}
433 }
434 
435 
436 void tasklet_init(struct tasklet_struct *t,
437 		  void (*func)(unsigned long), unsigned long data)
438 {
439 	t->next = NULL;
440 	t->state = 0;
441 	atomic_set(&t->count, 0);
442 	t->func = func;
443 	t->data = data;
444 }
445 
446 EXPORT_SYMBOL(tasklet_init);
447 
448 void tasklet_kill(struct tasklet_struct *t)
449 {
450 	if (in_interrupt())
451 		printk("Attempt to kill tasklet from interrupt\n");
452 
453 	while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) {
454 		do
455 			yield();
456 		while (test_bit(TASKLET_STATE_SCHED, &t->state));
457 	}
458 	tasklet_unlock_wait(t);
459 	clear_bit(TASKLET_STATE_SCHED, &t->state);
460 }
461 
462 EXPORT_SYMBOL(tasklet_kill);
463 
464 void __init softirq_init(void)
465 {
466 	open_softirq(TASKLET_SOFTIRQ, tasklet_action, NULL);
467 	open_softirq(HI_SOFTIRQ, tasklet_hi_action, NULL);
468 }
469 
470 static int ksoftirqd(void * __bind_cpu)
471 {
472 	set_user_nice(current, 19);
473 	current->flags |= PF_NOFREEZE;
474 
475 	set_current_state(TASK_INTERRUPTIBLE);
476 
477 	while (!kthread_should_stop()) {
478 		preempt_disable();
479 		if (!local_softirq_pending()) {
480 			preempt_enable_no_resched();
481 			schedule();
482 			preempt_disable();
483 		}
484 
485 		__set_current_state(TASK_RUNNING);
486 
487 		while (local_softirq_pending()) {
488 			/* Preempt disable stops cpu going offline.
489 			   If already offline, we'll be on wrong CPU:
490 			   don't process */
491 			if (cpu_is_offline((long)__bind_cpu))
492 				goto wait_to_die;
493 			do_softirq();
494 			preempt_enable_no_resched();
495 			cond_resched();
496 			preempt_disable();
497 		}
498 		preempt_enable();
499 		set_current_state(TASK_INTERRUPTIBLE);
500 	}
501 	__set_current_state(TASK_RUNNING);
502 	return 0;
503 
504 wait_to_die:
505 	preempt_enable();
506 	/* Wait for kthread_stop */
507 	set_current_state(TASK_INTERRUPTIBLE);
508 	while (!kthread_should_stop()) {
509 		schedule();
510 		set_current_state(TASK_INTERRUPTIBLE);
511 	}
512 	__set_current_state(TASK_RUNNING);
513 	return 0;
514 }
515 
516 #ifdef CONFIG_HOTPLUG_CPU
517 /*
518  * tasklet_kill_immediate is called to remove a tasklet which can already be
519  * scheduled for execution on @cpu.
520  *
521  * Unlike tasklet_kill, this function removes the tasklet
522  * _immediately_, even if the tasklet is in TASKLET_STATE_SCHED state.
523  *
524  * When this function is called, @cpu must be in the CPU_DEAD state.
525  */
526 void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu)
527 {
528 	struct tasklet_struct **i;
529 
530 	BUG_ON(cpu_online(cpu));
531 	BUG_ON(test_bit(TASKLET_STATE_RUN, &t->state));
532 
533 	if (!test_bit(TASKLET_STATE_SCHED, &t->state))
534 		return;
535 
536 	/* CPU is dead, so no lock needed. */
537 	for (i = &per_cpu(tasklet_vec, cpu).list; *i; i = &(*i)->next) {
538 		if (*i == t) {
539 			*i = t->next;
540 			return;
541 		}
542 	}
543 	BUG();
544 }
545 
546 static void takeover_tasklets(unsigned int cpu)
547 {
548 	struct tasklet_struct **i;
549 
550 	/* CPU is dead, so no lock needed. */
551 	local_irq_disable();
552 
553 	/* Find end, append list for that CPU. */
554 	for (i = &__get_cpu_var(tasklet_vec).list; *i; i = &(*i)->next);
555 	*i = per_cpu(tasklet_vec, cpu).list;
556 	per_cpu(tasklet_vec, cpu).list = NULL;
557 	raise_softirq_irqoff(TASKLET_SOFTIRQ);
558 
559 	for (i = &__get_cpu_var(tasklet_hi_vec).list; *i; i = &(*i)->next);
560 	*i = per_cpu(tasklet_hi_vec, cpu).list;
561 	per_cpu(tasklet_hi_vec, cpu).list = NULL;
562 	raise_softirq_irqoff(HI_SOFTIRQ);
563 
564 	local_irq_enable();
565 }
566 #endif /* CONFIG_HOTPLUG_CPU */
567 
568 static int __cpuinit cpu_callback(struct notifier_block *nfb,
569 				  unsigned long action,
570 				  void *hcpu)
571 {
572 	int hotcpu = (unsigned long)hcpu;
573 	struct task_struct *p;
574 
575 	switch (action) {
576 	case CPU_UP_PREPARE:
577 		BUG_ON(per_cpu(tasklet_vec, hotcpu).list);
578 		BUG_ON(per_cpu(tasklet_hi_vec, hotcpu).list);
579 		p = kthread_create(ksoftirqd, hcpu, "ksoftirqd/%d", hotcpu);
580 		if (IS_ERR(p)) {
581 			printk("ksoftirqd for %i failed\n", hotcpu);
582 			return NOTIFY_BAD;
583 		}
584 		kthread_bind(p, hotcpu);
585   		per_cpu(ksoftirqd, hotcpu) = p;
586  		break;
587 	case CPU_ONLINE:
588 		wake_up_process(per_cpu(ksoftirqd, hotcpu));
589 		break;
590 #ifdef CONFIG_HOTPLUG_CPU
591 	case CPU_UP_CANCELED:
592 		if (!per_cpu(ksoftirqd, hotcpu))
593 			break;
594 		/* Unbind so it can run.  Fall thru. */
595 		kthread_bind(per_cpu(ksoftirqd, hotcpu),
596 			     any_online_cpu(cpu_online_map));
597 	case CPU_DEAD:
598 		p = per_cpu(ksoftirqd, hotcpu);
599 		per_cpu(ksoftirqd, hotcpu) = NULL;
600 		kthread_stop(p);
601 		takeover_tasklets(hotcpu);
602 		break;
603 #endif /* CONFIG_HOTPLUG_CPU */
604  	}
605 	return NOTIFY_OK;
606 }
607 
608 static struct notifier_block __cpuinitdata cpu_nfb = {
609 	.notifier_call = cpu_callback
610 };
611 
612 __init int spawn_ksoftirqd(void)
613 {
614 	void *cpu = (void *)(long)smp_processor_id();
615 	cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
616 	cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
617 	register_cpu_notifier(&cpu_nfb);
618 	return 0;
619 }
620 
621 #ifdef CONFIG_SMP
622 /*
623  * Call a function on all processors
624  */
625 int on_each_cpu(void (*func) (void *info), void *info, int retry, int wait)
626 {
627 	int ret = 0;
628 
629 	preempt_disable();
630 	ret = smp_call_function(func, info, retry, wait);
631 	local_irq_disable();
632 	func(info);
633 	local_irq_enable();
634 	preempt_enable();
635 	return ret;
636 }
637 EXPORT_SYMBOL(on_each_cpu);
638 #endif
639