xref: /linux/kernel/softirq.c (revision 14b42963f64b98ab61fa9723c03d71aa5ef4f862)
1 /*
2  *	linux/kernel/softirq.c
3  *
4  *	Copyright (C) 1992 Linus Torvalds
5  *
6  * Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903)
7  */
8 
9 #include <linux/module.h>
10 #include <linux/kernel_stat.h>
11 #include <linux/interrupt.h>
12 #include <linux/init.h>
13 #include <linux/mm.h>
14 #include <linux/notifier.h>
15 #include <linux/percpu.h>
16 #include <linux/cpu.h>
17 #include <linux/kthread.h>
18 #include <linux/rcupdate.h>
19 #include <linux/smp.h>
20 
21 #include <asm/irq.h>
22 /*
23    - No shared variables, all the data are CPU local.
24    - If a softirq needs serialization, let it serialize itself
25      by its own spinlocks.
26    - Even if softirq is serialized, only local cpu is marked for
27      execution. Hence, we get something sort of weak cpu binding.
28      Though it is still not clear, will it result in better locality
29      or will not.
30 
31    Examples:
32    - NET RX softirq. It is multithreaded and does not require
33      any global serialization.
34    - NET TX softirq. It kicks software netdevice queues, hence
35      it is logically serialized per device, but this serialization
36      is invisible to common code.
37    - Tasklets: serialized wrt itself.
38  */
39 
40 #ifndef __ARCH_IRQ_STAT
41 irq_cpustat_t irq_stat[NR_CPUS] ____cacheline_aligned;
42 EXPORT_SYMBOL(irq_stat);
43 #endif
44 
45 static struct softirq_action softirq_vec[32] __cacheline_aligned_in_smp;
46 
47 static DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
48 
49 /*
50  * we cannot loop indefinitely here to avoid userspace starvation,
51  * but we also don't want to introduce a worst case 1/HZ latency
52  * to the pending events, so lets the scheduler to balance
53  * the softirq load for us.
54  */
55 static inline void wakeup_softirqd(void)
56 {
57 	/* Interrupts are disabled: no need to stop preemption */
58 	struct task_struct *tsk = __get_cpu_var(ksoftirqd);
59 
60 	if (tsk && tsk->state != TASK_RUNNING)
61 		wake_up_process(tsk);
62 }
63 
64 /*
65  * This one is for softirq.c-internal use,
66  * where hardirqs are disabled legitimately:
67  */
68 static void __local_bh_disable(unsigned long ip)
69 {
70 	unsigned long flags;
71 
72 	WARN_ON_ONCE(in_irq());
73 
74 	raw_local_irq_save(flags);
75 	add_preempt_count(SOFTIRQ_OFFSET);
76 	/*
77 	 * Were softirqs turned off above:
78 	 */
79 	if (softirq_count() == SOFTIRQ_OFFSET)
80 		trace_softirqs_off(ip);
81 	raw_local_irq_restore(flags);
82 }
83 
84 void local_bh_disable(void)
85 {
86 	__local_bh_disable((unsigned long)__builtin_return_address(0));
87 }
88 
89 EXPORT_SYMBOL(local_bh_disable);
90 
91 void __local_bh_enable(void)
92 {
93 	WARN_ON_ONCE(in_irq());
94 
95 	/*
96 	 * softirqs should never be enabled by __local_bh_enable(),
97 	 * it always nests inside local_bh_enable() sections:
98 	 */
99 	WARN_ON_ONCE(softirq_count() == SOFTIRQ_OFFSET);
100 
101 	sub_preempt_count(SOFTIRQ_OFFSET);
102 }
103 EXPORT_SYMBOL_GPL(__local_bh_enable);
104 
105 /*
106  * Special-case - softirqs can safely be enabled in
107  * cond_resched_softirq(), or by __do_softirq(),
108  * without processing still-pending softirqs:
109  */
110 void _local_bh_enable(void)
111 {
112 	WARN_ON_ONCE(in_irq());
113 	WARN_ON_ONCE(!irqs_disabled());
114 
115 	if (softirq_count() == SOFTIRQ_OFFSET)
116 		trace_softirqs_on((unsigned long)__builtin_return_address(0));
117 	sub_preempt_count(SOFTIRQ_OFFSET);
118 }
119 
120 EXPORT_SYMBOL(_local_bh_enable);
121 
122 void local_bh_enable(void)
123 {
124 	unsigned long flags;
125 
126 	WARN_ON_ONCE(in_irq());
127 	WARN_ON_ONCE(irqs_disabled());
128 
129 	local_irq_save(flags);
130 	/*
131 	 * Are softirqs going to be turned on now:
132 	 */
133 	if (softirq_count() == SOFTIRQ_OFFSET)
134 		trace_softirqs_on((unsigned long)__builtin_return_address(0));
135 	/*
136 	 * Keep preemption disabled until we are done with
137 	 * softirq processing:
138  	 */
139  	sub_preempt_count(SOFTIRQ_OFFSET - 1);
140 
141 	if (unlikely(!in_interrupt() && local_softirq_pending()))
142 		do_softirq();
143 
144 	dec_preempt_count();
145 	local_irq_restore(flags);
146 	preempt_check_resched();
147 }
148 EXPORT_SYMBOL(local_bh_enable);
149 
150 void local_bh_enable_ip(unsigned long ip)
151 {
152 	unsigned long flags;
153 
154 	WARN_ON_ONCE(in_irq());
155 
156 	local_irq_save(flags);
157 	/*
158 	 * Are softirqs going to be turned on now:
159 	 */
160 	if (softirq_count() == SOFTIRQ_OFFSET)
161 		trace_softirqs_on(ip);
162 	/*
163 	 * Keep preemption disabled until we are done with
164 	 * softirq processing:
165  	 */
166  	sub_preempt_count(SOFTIRQ_OFFSET - 1);
167 
168 	if (unlikely(!in_interrupt() && local_softirq_pending()))
169 		do_softirq();
170 
171 	dec_preempt_count();
172 	local_irq_restore(flags);
173 	preempt_check_resched();
174 }
175 EXPORT_SYMBOL(local_bh_enable_ip);
176 
177 /*
178  * We restart softirq processing MAX_SOFTIRQ_RESTART times,
179  * and we fall back to softirqd after that.
180  *
181  * This number has been established via experimentation.
182  * The two things to balance is latency against fairness -
183  * we want to handle softirqs as soon as possible, but they
184  * should not be able to lock up the box.
185  */
186 #define MAX_SOFTIRQ_RESTART 10
187 
188 asmlinkage void __do_softirq(void)
189 {
190 	struct softirq_action *h;
191 	__u32 pending;
192 	int max_restart = MAX_SOFTIRQ_RESTART;
193 	int cpu;
194 
195 	pending = local_softirq_pending();
196 	account_system_vtime(current);
197 
198 	__local_bh_disable((unsigned long)__builtin_return_address(0));
199 	trace_softirq_enter();
200 
201 	cpu = smp_processor_id();
202 restart:
203 	/* Reset the pending bitmask before enabling irqs */
204 	set_softirq_pending(0);
205 
206 	local_irq_enable();
207 
208 	h = softirq_vec;
209 
210 	do {
211 		if (pending & 1) {
212 			h->action(h);
213 			rcu_bh_qsctr_inc(cpu);
214 		}
215 		h++;
216 		pending >>= 1;
217 	} while (pending);
218 
219 	local_irq_disable();
220 
221 	pending = local_softirq_pending();
222 	if (pending && --max_restart)
223 		goto restart;
224 
225 	if (pending)
226 		wakeup_softirqd();
227 
228 	trace_softirq_exit();
229 
230 	account_system_vtime(current);
231 	_local_bh_enable();
232 }
233 
234 #ifndef __ARCH_HAS_DO_SOFTIRQ
235 
236 asmlinkage void do_softirq(void)
237 {
238 	__u32 pending;
239 	unsigned long flags;
240 
241 	if (in_interrupt())
242 		return;
243 
244 	local_irq_save(flags);
245 
246 	pending = local_softirq_pending();
247 
248 	if (pending)
249 		__do_softirq();
250 
251 	local_irq_restore(flags);
252 }
253 
254 EXPORT_SYMBOL(do_softirq);
255 
256 #endif
257 
258 #ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED
259 # define invoke_softirq()	__do_softirq()
260 #else
261 # define invoke_softirq()	do_softirq()
262 #endif
263 
264 /*
265  * Exit an interrupt context. Process softirqs if needed and possible:
266  */
267 void irq_exit(void)
268 {
269 	account_system_vtime(current);
270 	trace_hardirq_exit();
271 	sub_preempt_count(IRQ_EXIT_OFFSET);
272 	if (!in_interrupt() && local_softirq_pending())
273 		invoke_softirq();
274 	preempt_enable_no_resched();
275 }
276 
277 /*
278  * This function must run with irqs disabled!
279  */
280 inline fastcall void raise_softirq_irqoff(unsigned int nr)
281 {
282 	__raise_softirq_irqoff(nr);
283 
284 	/*
285 	 * If we're in an interrupt or softirq, we're done
286 	 * (this also catches softirq-disabled code). We will
287 	 * actually run the softirq once we return from
288 	 * the irq or softirq.
289 	 *
290 	 * Otherwise we wake up ksoftirqd to make sure we
291 	 * schedule the softirq soon.
292 	 */
293 	if (!in_interrupt())
294 		wakeup_softirqd();
295 }
296 
297 EXPORT_SYMBOL(raise_softirq_irqoff);
298 
299 void fastcall raise_softirq(unsigned int nr)
300 {
301 	unsigned long flags;
302 
303 	local_irq_save(flags);
304 	raise_softirq_irqoff(nr);
305 	local_irq_restore(flags);
306 }
307 
308 void open_softirq(int nr, void (*action)(struct softirq_action*), void *data)
309 {
310 	softirq_vec[nr].data = data;
311 	softirq_vec[nr].action = action;
312 }
313 
314 EXPORT_UNUSED_SYMBOL(open_softirq);  /*  June 2006  */
315 
316 /* Tasklets */
317 struct tasklet_head
318 {
319 	struct tasklet_struct *list;
320 };
321 
322 /* Some compilers disobey section attribute on statics when not
323    initialized -- RR */
324 static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec) = { NULL };
325 static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec) = { NULL };
326 
327 void fastcall __tasklet_schedule(struct tasklet_struct *t)
328 {
329 	unsigned long flags;
330 
331 	local_irq_save(flags);
332 	t->next = __get_cpu_var(tasklet_vec).list;
333 	__get_cpu_var(tasklet_vec).list = t;
334 	raise_softirq_irqoff(TASKLET_SOFTIRQ);
335 	local_irq_restore(flags);
336 }
337 
338 EXPORT_SYMBOL(__tasklet_schedule);
339 
340 void fastcall __tasklet_hi_schedule(struct tasklet_struct *t)
341 {
342 	unsigned long flags;
343 
344 	local_irq_save(flags);
345 	t->next = __get_cpu_var(tasklet_hi_vec).list;
346 	__get_cpu_var(tasklet_hi_vec).list = t;
347 	raise_softirq_irqoff(HI_SOFTIRQ);
348 	local_irq_restore(flags);
349 }
350 
351 EXPORT_SYMBOL(__tasklet_hi_schedule);
352 
353 static void tasklet_action(struct softirq_action *a)
354 {
355 	struct tasklet_struct *list;
356 
357 	local_irq_disable();
358 	list = __get_cpu_var(tasklet_vec).list;
359 	__get_cpu_var(tasklet_vec).list = NULL;
360 	local_irq_enable();
361 
362 	while (list) {
363 		struct tasklet_struct *t = list;
364 
365 		list = list->next;
366 
367 		if (tasklet_trylock(t)) {
368 			if (!atomic_read(&t->count)) {
369 				if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
370 					BUG();
371 				t->func(t->data);
372 				tasklet_unlock(t);
373 				continue;
374 			}
375 			tasklet_unlock(t);
376 		}
377 
378 		local_irq_disable();
379 		t->next = __get_cpu_var(tasklet_vec).list;
380 		__get_cpu_var(tasklet_vec).list = t;
381 		__raise_softirq_irqoff(TASKLET_SOFTIRQ);
382 		local_irq_enable();
383 	}
384 }
385 
386 static void tasklet_hi_action(struct softirq_action *a)
387 {
388 	struct tasklet_struct *list;
389 
390 	local_irq_disable();
391 	list = __get_cpu_var(tasklet_hi_vec).list;
392 	__get_cpu_var(tasklet_hi_vec).list = NULL;
393 	local_irq_enable();
394 
395 	while (list) {
396 		struct tasklet_struct *t = list;
397 
398 		list = list->next;
399 
400 		if (tasklet_trylock(t)) {
401 			if (!atomic_read(&t->count)) {
402 				if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
403 					BUG();
404 				t->func(t->data);
405 				tasklet_unlock(t);
406 				continue;
407 			}
408 			tasklet_unlock(t);
409 		}
410 
411 		local_irq_disable();
412 		t->next = __get_cpu_var(tasklet_hi_vec).list;
413 		__get_cpu_var(tasklet_hi_vec).list = t;
414 		__raise_softirq_irqoff(HI_SOFTIRQ);
415 		local_irq_enable();
416 	}
417 }
418 
419 
420 void tasklet_init(struct tasklet_struct *t,
421 		  void (*func)(unsigned long), unsigned long data)
422 {
423 	t->next = NULL;
424 	t->state = 0;
425 	atomic_set(&t->count, 0);
426 	t->func = func;
427 	t->data = data;
428 }
429 
430 EXPORT_SYMBOL(tasklet_init);
431 
432 void tasklet_kill(struct tasklet_struct *t)
433 {
434 	if (in_interrupt())
435 		printk("Attempt to kill tasklet from interrupt\n");
436 
437 	while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) {
438 		do
439 			yield();
440 		while (test_bit(TASKLET_STATE_SCHED, &t->state));
441 	}
442 	tasklet_unlock_wait(t);
443 	clear_bit(TASKLET_STATE_SCHED, &t->state);
444 }
445 
446 EXPORT_SYMBOL(tasklet_kill);
447 
448 void __init softirq_init(void)
449 {
450 	open_softirq(TASKLET_SOFTIRQ, tasklet_action, NULL);
451 	open_softirq(HI_SOFTIRQ, tasklet_hi_action, NULL);
452 }
453 
454 static int ksoftirqd(void * __bind_cpu)
455 {
456 	set_user_nice(current, 19);
457 	current->flags |= PF_NOFREEZE;
458 
459 	set_current_state(TASK_INTERRUPTIBLE);
460 
461 	while (!kthread_should_stop()) {
462 		preempt_disable();
463 		if (!local_softirq_pending()) {
464 			preempt_enable_no_resched();
465 			schedule();
466 			preempt_disable();
467 		}
468 
469 		__set_current_state(TASK_RUNNING);
470 
471 		while (local_softirq_pending()) {
472 			/* Preempt disable stops cpu going offline.
473 			   If already offline, we'll be on wrong CPU:
474 			   don't process */
475 			if (cpu_is_offline((long)__bind_cpu))
476 				goto wait_to_die;
477 			do_softirq();
478 			preempt_enable_no_resched();
479 			cond_resched();
480 			preempt_disable();
481 		}
482 		preempt_enable();
483 		set_current_state(TASK_INTERRUPTIBLE);
484 	}
485 	__set_current_state(TASK_RUNNING);
486 	return 0;
487 
488 wait_to_die:
489 	preempt_enable();
490 	/* Wait for kthread_stop */
491 	set_current_state(TASK_INTERRUPTIBLE);
492 	while (!kthread_should_stop()) {
493 		schedule();
494 		set_current_state(TASK_INTERRUPTIBLE);
495 	}
496 	__set_current_state(TASK_RUNNING);
497 	return 0;
498 }
499 
500 #ifdef CONFIG_HOTPLUG_CPU
501 /*
502  * tasklet_kill_immediate is called to remove a tasklet which can already be
503  * scheduled for execution on @cpu.
504  *
505  * Unlike tasklet_kill, this function removes the tasklet
506  * _immediately_, even if the tasklet is in TASKLET_STATE_SCHED state.
507  *
508  * When this function is called, @cpu must be in the CPU_DEAD state.
509  */
510 void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu)
511 {
512 	struct tasklet_struct **i;
513 
514 	BUG_ON(cpu_online(cpu));
515 	BUG_ON(test_bit(TASKLET_STATE_RUN, &t->state));
516 
517 	if (!test_bit(TASKLET_STATE_SCHED, &t->state))
518 		return;
519 
520 	/* CPU is dead, so no lock needed. */
521 	for (i = &per_cpu(tasklet_vec, cpu).list; *i; i = &(*i)->next) {
522 		if (*i == t) {
523 			*i = t->next;
524 			return;
525 		}
526 	}
527 	BUG();
528 }
529 
530 static void takeover_tasklets(unsigned int cpu)
531 {
532 	struct tasklet_struct **i;
533 
534 	/* CPU is dead, so no lock needed. */
535 	local_irq_disable();
536 
537 	/* Find end, append list for that CPU. */
538 	for (i = &__get_cpu_var(tasklet_vec).list; *i; i = &(*i)->next);
539 	*i = per_cpu(tasklet_vec, cpu).list;
540 	per_cpu(tasklet_vec, cpu).list = NULL;
541 	raise_softirq_irqoff(TASKLET_SOFTIRQ);
542 
543 	for (i = &__get_cpu_var(tasklet_hi_vec).list; *i; i = &(*i)->next);
544 	*i = per_cpu(tasklet_hi_vec, cpu).list;
545 	per_cpu(tasklet_hi_vec, cpu).list = NULL;
546 	raise_softirq_irqoff(HI_SOFTIRQ);
547 
548 	local_irq_enable();
549 }
550 #endif /* CONFIG_HOTPLUG_CPU */
551 
552 static int __devinit cpu_callback(struct notifier_block *nfb,
553 				  unsigned long action,
554 				  void *hcpu)
555 {
556 	int hotcpu = (unsigned long)hcpu;
557 	struct task_struct *p;
558 
559 	switch (action) {
560 	case CPU_UP_PREPARE:
561 		BUG_ON(per_cpu(tasklet_vec, hotcpu).list);
562 		BUG_ON(per_cpu(tasklet_hi_vec, hotcpu).list);
563 		p = kthread_create(ksoftirqd, hcpu, "ksoftirqd/%d", hotcpu);
564 		if (IS_ERR(p)) {
565 			printk("ksoftirqd for %i failed\n", hotcpu);
566 			return NOTIFY_BAD;
567 		}
568 		kthread_bind(p, hotcpu);
569   		per_cpu(ksoftirqd, hotcpu) = p;
570  		break;
571 	case CPU_ONLINE:
572 		wake_up_process(per_cpu(ksoftirqd, hotcpu));
573 		break;
574 #ifdef CONFIG_HOTPLUG_CPU
575 	case CPU_UP_CANCELED:
576 		if (!per_cpu(ksoftirqd, hotcpu))
577 			break;
578 		/* Unbind so it can run.  Fall thru. */
579 		kthread_bind(per_cpu(ksoftirqd, hotcpu),
580 			     any_online_cpu(cpu_online_map));
581 	case CPU_DEAD:
582 		p = per_cpu(ksoftirqd, hotcpu);
583 		per_cpu(ksoftirqd, hotcpu) = NULL;
584 		kthread_stop(p);
585 		takeover_tasklets(hotcpu);
586 		break;
587 #endif /* CONFIG_HOTPLUG_CPU */
588  	}
589 	return NOTIFY_OK;
590 }
591 
592 static struct notifier_block __devinitdata cpu_nfb = {
593 	.notifier_call = cpu_callback
594 };
595 
596 __init int spawn_ksoftirqd(void)
597 {
598 	void *cpu = (void *)(long)smp_processor_id();
599 	cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
600 	cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
601 	register_cpu_notifier(&cpu_nfb);
602 	return 0;
603 }
604 
605 #ifdef CONFIG_SMP
606 /*
607  * Call a function on all processors
608  */
609 int on_each_cpu(void (*func) (void *info), void *info, int retry, int wait)
610 {
611 	int ret = 0;
612 
613 	preempt_disable();
614 	ret = smp_call_function(func, info, retry, wait);
615 	local_irq_disable();
616 	func(info);
617 	local_irq_enable();
618 	preempt_enable();
619 	return ret;
620 }
621 EXPORT_SYMBOL(on_each_cpu);
622 #endif
623