1 /* 2 * linux/kernel/softirq.c 3 * 4 * Copyright (C) 1992 Linus Torvalds 5 * 6 * Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903) 7 */ 8 9 #include <linux/module.h> 10 #include <linux/kernel_stat.h> 11 #include <linux/interrupt.h> 12 #include <linux/init.h> 13 #include <linux/mm.h> 14 #include <linux/notifier.h> 15 #include <linux/percpu.h> 16 #include <linux/cpu.h> 17 #include <linux/kthread.h> 18 #include <linux/rcupdate.h> 19 #include <linux/smp.h> 20 21 #include <asm/irq.h> 22 /* 23 - No shared variables, all the data are CPU local. 24 - If a softirq needs serialization, let it serialize itself 25 by its own spinlocks. 26 - Even if softirq is serialized, only local cpu is marked for 27 execution. Hence, we get something sort of weak cpu binding. 28 Though it is still not clear, will it result in better locality 29 or will not. 30 31 Examples: 32 - NET RX softirq. It is multithreaded and does not require 33 any global serialization. 34 - NET TX softirq. It kicks software netdevice queues, hence 35 it is logically serialized per device, but this serialization 36 is invisible to common code. 37 - Tasklets: serialized wrt itself. 38 */ 39 40 #ifndef __ARCH_IRQ_STAT 41 irq_cpustat_t irq_stat[NR_CPUS] ____cacheline_aligned; 42 EXPORT_SYMBOL(irq_stat); 43 #endif 44 45 static struct softirq_action softirq_vec[32] __cacheline_aligned_in_smp; 46 47 static DEFINE_PER_CPU(struct task_struct *, ksoftirqd); 48 49 /* 50 * we cannot loop indefinitely here to avoid userspace starvation, 51 * but we also don't want to introduce a worst case 1/HZ latency 52 * to the pending events, so lets the scheduler to balance 53 * the softirq load for us. 54 */ 55 static inline void wakeup_softirqd(void) 56 { 57 /* Interrupts are disabled: no need to stop preemption */ 58 struct task_struct *tsk = __get_cpu_var(ksoftirqd); 59 60 if (tsk && tsk->state != TASK_RUNNING) 61 wake_up_process(tsk); 62 } 63 64 /* 65 * This one is for softirq.c-internal use, 66 * where hardirqs are disabled legitimately: 67 */ 68 #ifdef CONFIG_TRACE_IRQFLAGS 69 static void __local_bh_disable(unsigned long ip) 70 { 71 unsigned long flags; 72 73 WARN_ON_ONCE(in_irq()); 74 75 raw_local_irq_save(flags); 76 add_preempt_count(SOFTIRQ_OFFSET); 77 /* 78 * Were softirqs turned off above: 79 */ 80 if (softirq_count() == SOFTIRQ_OFFSET) 81 trace_softirqs_off(ip); 82 raw_local_irq_restore(flags); 83 } 84 #else /* !CONFIG_TRACE_IRQFLAGS */ 85 static inline void __local_bh_disable(unsigned long ip) 86 { 87 add_preempt_count(SOFTIRQ_OFFSET); 88 barrier(); 89 } 90 #endif /* CONFIG_TRACE_IRQFLAGS */ 91 92 void local_bh_disable(void) 93 { 94 __local_bh_disable((unsigned long)__builtin_return_address(0)); 95 } 96 97 EXPORT_SYMBOL(local_bh_disable); 98 99 void __local_bh_enable(void) 100 { 101 WARN_ON_ONCE(in_irq()); 102 103 /* 104 * softirqs should never be enabled by __local_bh_enable(), 105 * it always nests inside local_bh_enable() sections: 106 */ 107 WARN_ON_ONCE(softirq_count() == SOFTIRQ_OFFSET); 108 109 sub_preempt_count(SOFTIRQ_OFFSET); 110 } 111 EXPORT_SYMBOL_GPL(__local_bh_enable); 112 113 /* 114 * Special-case - softirqs can safely be enabled in 115 * cond_resched_softirq(), or by __do_softirq(), 116 * without processing still-pending softirqs: 117 */ 118 void _local_bh_enable(void) 119 { 120 WARN_ON_ONCE(in_irq()); 121 WARN_ON_ONCE(!irqs_disabled()); 122 123 if (softirq_count() == SOFTIRQ_OFFSET) 124 trace_softirqs_on((unsigned long)__builtin_return_address(0)); 125 sub_preempt_count(SOFTIRQ_OFFSET); 126 } 127 128 EXPORT_SYMBOL(_local_bh_enable); 129 130 void local_bh_enable(void) 131 { 132 #ifdef CONFIG_TRACE_IRQFLAGS 133 unsigned long flags; 134 135 WARN_ON_ONCE(in_irq()); 136 #endif 137 WARN_ON_ONCE(irqs_disabled()); 138 139 #ifdef CONFIG_TRACE_IRQFLAGS 140 local_irq_save(flags); 141 #endif 142 /* 143 * Are softirqs going to be turned on now: 144 */ 145 if (softirq_count() == SOFTIRQ_OFFSET) 146 trace_softirqs_on((unsigned long)__builtin_return_address(0)); 147 /* 148 * Keep preemption disabled until we are done with 149 * softirq processing: 150 */ 151 sub_preempt_count(SOFTIRQ_OFFSET - 1); 152 153 if (unlikely(!in_interrupt() && local_softirq_pending())) 154 do_softirq(); 155 156 dec_preempt_count(); 157 #ifdef CONFIG_TRACE_IRQFLAGS 158 local_irq_restore(flags); 159 #endif 160 preempt_check_resched(); 161 } 162 EXPORT_SYMBOL(local_bh_enable); 163 164 void local_bh_enable_ip(unsigned long ip) 165 { 166 #ifdef CONFIG_TRACE_IRQFLAGS 167 unsigned long flags; 168 169 WARN_ON_ONCE(in_irq()); 170 171 local_irq_save(flags); 172 #endif 173 /* 174 * Are softirqs going to be turned on now: 175 */ 176 if (softirq_count() == SOFTIRQ_OFFSET) 177 trace_softirqs_on(ip); 178 /* 179 * Keep preemption disabled until we are done with 180 * softirq processing: 181 */ 182 sub_preempt_count(SOFTIRQ_OFFSET - 1); 183 184 if (unlikely(!in_interrupt() && local_softirq_pending())) 185 do_softirq(); 186 187 dec_preempt_count(); 188 #ifdef CONFIG_TRACE_IRQFLAGS 189 local_irq_restore(flags); 190 #endif 191 preempt_check_resched(); 192 } 193 EXPORT_SYMBOL(local_bh_enable_ip); 194 195 /* 196 * We restart softirq processing MAX_SOFTIRQ_RESTART times, 197 * and we fall back to softirqd after that. 198 * 199 * This number has been established via experimentation. 200 * The two things to balance is latency against fairness - 201 * we want to handle softirqs as soon as possible, but they 202 * should not be able to lock up the box. 203 */ 204 #define MAX_SOFTIRQ_RESTART 10 205 206 asmlinkage void __do_softirq(void) 207 { 208 struct softirq_action *h; 209 __u32 pending; 210 int max_restart = MAX_SOFTIRQ_RESTART; 211 int cpu; 212 213 pending = local_softirq_pending(); 214 account_system_vtime(current); 215 216 __local_bh_disable((unsigned long)__builtin_return_address(0)); 217 trace_softirq_enter(); 218 219 cpu = smp_processor_id(); 220 restart: 221 /* Reset the pending bitmask before enabling irqs */ 222 set_softirq_pending(0); 223 224 local_irq_enable(); 225 226 h = softirq_vec; 227 228 do { 229 if (pending & 1) { 230 h->action(h); 231 rcu_bh_qsctr_inc(cpu); 232 } 233 h++; 234 pending >>= 1; 235 } while (pending); 236 237 local_irq_disable(); 238 239 pending = local_softirq_pending(); 240 if (pending && --max_restart) 241 goto restart; 242 243 if (pending) 244 wakeup_softirqd(); 245 246 trace_softirq_exit(); 247 248 account_system_vtime(current); 249 _local_bh_enable(); 250 } 251 252 #ifndef __ARCH_HAS_DO_SOFTIRQ 253 254 asmlinkage void do_softirq(void) 255 { 256 __u32 pending; 257 unsigned long flags; 258 259 if (in_interrupt()) 260 return; 261 262 local_irq_save(flags); 263 264 pending = local_softirq_pending(); 265 266 if (pending) 267 __do_softirq(); 268 269 local_irq_restore(flags); 270 } 271 272 EXPORT_SYMBOL(do_softirq); 273 274 #endif 275 276 #ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED 277 # define invoke_softirq() __do_softirq() 278 #else 279 # define invoke_softirq() do_softirq() 280 #endif 281 282 /* 283 * Exit an interrupt context. Process softirqs if needed and possible: 284 */ 285 void irq_exit(void) 286 { 287 account_system_vtime(current); 288 trace_hardirq_exit(); 289 sub_preempt_count(IRQ_EXIT_OFFSET); 290 if (!in_interrupt() && local_softirq_pending()) 291 invoke_softirq(); 292 preempt_enable_no_resched(); 293 } 294 295 /* 296 * This function must run with irqs disabled! 297 */ 298 inline fastcall void raise_softirq_irqoff(unsigned int nr) 299 { 300 __raise_softirq_irqoff(nr); 301 302 /* 303 * If we're in an interrupt or softirq, we're done 304 * (this also catches softirq-disabled code). We will 305 * actually run the softirq once we return from 306 * the irq or softirq. 307 * 308 * Otherwise we wake up ksoftirqd to make sure we 309 * schedule the softirq soon. 310 */ 311 if (!in_interrupt()) 312 wakeup_softirqd(); 313 } 314 315 EXPORT_SYMBOL(raise_softirq_irqoff); 316 317 void fastcall raise_softirq(unsigned int nr) 318 { 319 unsigned long flags; 320 321 local_irq_save(flags); 322 raise_softirq_irqoff(nr); 323 local_irq_restore(flags); 324 } 325 326 void open_softirq(int nr, void (*action)(struct softirq_action*), void *data) 327 { 328 softirq_vec[nr].data = data; 329 softirq_vec[nr].action = action; 330 } 331 332 /* Tasklets */ 333 struct tasklet_head 334 { 335 struct tasklet_struct *list; 336 }; 337 338 /* Some compilers disobey section attribute on statics when not 339 initialized -- RR */ 340 static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec) = { NULL }; 341 static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec) = { NULL }; 342 343 void fastcall __tasklet_schedule(struct tasklet_struct *t) 344 { 345 unsigned long flags; 346 347 local_irq_save(flags); 348 t->next = __get_cpu_var(tasklet_vec).list; 349 __get_cpu_var(tasklet_vec).list = t; 350 raise_softirq_irqoff(TASKLET_SOFTIRQ); 351 local_irq_restore(flags); 352 } 353 354 EXPORT_SYMBOL(__tasklet_schedule); 355 356 void fastcall __tasklet_hi_schedule(struct tasklet_struct *t) 357 { 358 unsigned long flags; 359 360 local_irq_save(flags); 361 t->next = __get_cpu_var(tasklet_hi_vec).list; 362 __get_cpu_var(tasklet_hi_vec).list = t; 363 raise_softirq_irqoff(HI_SOFTIRQ); 364 local_irq_restore(flags); 365 } 366 367 EXPORT_SYMBOL(__tasklet_hi_schedule); 368 369 static void tasklet_action(struct softirq_action *a) 370 { 371 struct tasklet_struct *list; 372 373 local_irq_disable(); 374 list = __get_cpu_var(tasklet_vec).list; 375 __get_cpu_var(tasklet_vec).list = NULL; 376 local_irq_enable(); 377 378 while (list) { 379 struct tasklet_struct *t = list; 380 381 list = list->next; 382 383 if (tasklet_trylock(t)) { 384 if (!atomic_read(&t->count)) { 385 if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)) 386 BUG(); 387 t->func(t->data); 388 tasklet_unlock(t); 389 continue; 390 } 391 tasklet_unlock(t); 392 } 393 394 local_irq_disable(); 395 t->next = __get_cpu_var(tasklet_vec).list; 396 __get_cpu_var(tasklet_vec).list = t; 397 __raise_softirq_irqoff(TASKLET_SOFTIRQ); 398 local_irq_enable(); 399 } 400 } 401 402 static void tasklet_hi_action(struct softirq_action *a) 403 { 404 struct tasklet_struct *list; 405 406 local_irq_disable(); 407 list = __get_cpu_var(tasklet_hi_vec).list; 408 __get_cpu_var(tasklet_hi_vec).list = NULL; 409 local_irq_enable(); 410 411 while (list) { 412 struct tasklet_struct *t = list; 413 414 list = list->next; 415 416 if (tasklet_trylock(t)) { 417 if (!atomic_read(&t->count)) { 418 if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)) 419 BUG(); 420 t->func(t->data); 421 tasklet_unlock(t); 422 continue; 423 } 424 tasklet_unlock(t); 425 } 426 427 local_irq_disable(); 428 t->next = __get_cpu_var(tasklet_hi_vec).list; 429 __get_cpu_var(tasklet_hi_vec).list = t; 430 __raise_softirq_irqoff(HI_SOFTIRQ); 431 local_irq_enable(); 432 } 433 } 434 435 436 void tasklet_init(struct tasklet_struct *t, 437 void (*func)(unsigned long), unsigned long data) 438 { 439 t->next = NULL; 440 t->state = 0; 441 atomic_set(&t->count, 0); 442 t->func = func; 443 t->data = data; 444 } 445 446 EXPORT_SYMBOL(tasklet_init); 447 448 void tasklet_kill(struct tasklet_struct *t) 449 { 450 if (in_interrupt()) 451 printk("Attempt to kill tasklet from interrupt\n"); 452 453 while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) { 454 do 455 yield(); 456 while (test_bit(TASKLET_STATE_SCHED, &t->state)); 457 } 458 tasklet_unlock_wait(t); 459 clear_bit(TASKLET_STATE_SCHED, &t->state); 460 } 461 462 EXPORT_SYMBOL(tasklet_kill); 463 464 void __init softirq_init(void) 465 { 466 open_softirq(TASKLET_SOFTIRQ, tasklet_action, NULL); 467 open_softirq(HI_SOFTIRQ, tasklet_hi_action, NULL); 468 } 469 470 static int ksoftirqd(void * __bind_cpu) 471 { 472 set_user_nice(current, 19); 473 current->flags |= PF_NOFREEZE; 474 475 set_current_state(TASK_INTERRUPTIBLE); 476 477 while (!kthread_should_stop()) { 478 preempt_disable(); 479 if (!local_softirq_pending()) { 480 preempt_enable_no_resched(); 481 schedule(); 482 preempt_disable(); 483 } 484 485 __set_current_state(TASK_RUNNING); 486 487 while (local_softirq_pending()) { 488 /* Preempt disable stops cpu going offline. 489 If already offline, we'll be on wrong CPU: 490 don't process */ 491 if (cpu_is_offline((long)__bind_cpu)) 492 goto wait_to_die; 493 do_softirq(); 494 preempt_enable_no_resched(); 495 cond_resched(); 496 preempt_disable(); 497 } 498 preempt_enable(); 499 set_current_state(TASK_INTERRUPTIBLE); 500 } 501 __set_current_state(TASK_RUNNING); 502 return 0; 503 504 wait_to_die: 505 preempt_enable(); 506 /* Wait for kthread_stop */ 507 set_current_state(TASK_INTERRUPTIBLE); 508 while (!kthread_should_stop()) { 509 schedule(); 510 set_current_state(TASK_INTERRUPTIBLE); 511 } 512 __set_current_state(TASK_RUNNING); 513 return 0; 514 } 515 516 #ifdef CONFIG_HOTPLUG_CPU 517 /* 518 * tasklet_kill_immediate is called to remove a tasklet which can already be 519 * scheduled for execution on @cpu. 520 * 521 * Unlike tasklet_kill, this function removes the tasklet 522 * _immediately_, even if the tasklet is in TASKLET_STATE_SCHED state. 523 * 524 * When this function is called, @cpu must be in the CPU_DEAD state. 525 */ 526 void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu) 527 { 528 struct tasklet_struct **i; 529 530 BUG_ON(cpu_online(cpu)); 531 BUG_ON(test_bit(TASKLET_STATE_RUN, &t->state)); 532 533 if (!test_bit(TASKLET_STATE_SCHED, &t->state)) 534 return; 535 536 /* CPU is dead, so no lock needed. */ 537 for (i = &per_cpu(tasklet_vec, cpu).list; *i; i = &(*i)->next) { 538 if (*i == t) { 539 *i = t->next; 540 return; 541 } 542 } 543 BUG(); 544 } 545 546 static void takeover_tasklets(unsigned int cpu) 547 { 548 struct tasklet_struct **i; 549 550 /* CPU is dead, so no lock needed. */ 551 local_irq_disable(); 552 553 /* Find end, append list for that CPU. */ 554 for (i = &__get_cpu_var(tasklet_vec).list; *i; i = &(*i)->next); 555 *i = per_cpu(tasklet_vec, cpu).list; 556 per_cpu(tasklet_vec, cpu).list = NULL; 557 raise_softirq_irqoff(TASKLET_SOFTIRQ); 558 559 for (i = &__get_cpu_var(tasklet_hi_vec).list; *i; i = &(*i)->next); 560 *i = per_cpu(tasklet_hi_vec, cpu).list; 561 per_cpu(tasklet_hi_vec, cpu).list = NULL; 562 raise_softirq_irqoff(HI_SOFTIRQ); 563 564 local_irq_enable(); 565 } 566 #endif /* CONFIG_HOTPLUG_CPU */ 567 568 static int __cpuinit cpu_callback(struct notifier_block *nfb, 569 unsigned long action, 570 void *hcpu) 571 { 572 int hotcpu = (unsigned long)hcpu; 573 struct task_struct *p; 574 575 switch (action) { 576 case CPU_UP_PREPARE: 577 BUG_ON(per_cpu(tasklet_vec, hotcpu).list); 578 BUG_ON(per_cpu(tasklet_hi_vec, hotcpu).list); 579 p = kthread_create(ksoftirqd, hcpu, "ksoftirqd/%d", hotcpu); 580 if (IS_ERR(p)) { 581 printk("ksoftirqd for %i failed\n", hotcpu); 582 return NOTIFY_BAD; 583 } 584 kthread_bind(p, hotcpu); 585 per_cpu(ksoftirqd, hotcpu) = p; 586 break; 587 case CPU_ONLINE: 588 wake_up_process(per_cpu(ksoftirqd, hotcpu)); 589 break; 590 #ifdef CONFIG_HOTPLUG_CPU 591 case CPU_UP_CANCELED: 592 if (!per_cpu(ksoftirqd, hotcpu)) 593 break; 594 /* Unbind so it can run. Fall thru. */ 595 kthread_bind(per_cpu(ksoftirqd, hotcpu), 596 any_online_cpu(cpu_online_map)); 597 case CPU_DEAD: 598 p = per_cpu(ksoftirqd, hotcpu); 599 per_cpu(ksoftirqd, hotcpu) = NULL; 600 kthread_stop(p); 601 takeover_tasklets(hotcpu); 602 break; 603 #endif /* CONFIG_HOTPLUG_CPU */ 604 } 605 return NOTIFY_OK; 606 } 607 608 static struct notifier_block __cpuinitdata cpu_nfb = { 609 .notifier_call = cpu_callback 610 }; 611 612 __init int spawn_ksoftirqd(void) 613 { 614 void *cpu = (void *)(long)smp_processor_id(); 615 cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu); 616 cpu_callback(&cpu_nfb, CPU_ONLINE, cpu); 617 register_cpu_notifier(&cpu_nfb); 618 return 0; 619 } 620 621 #ifdef CONFIG_SMP 622 /* 623 * Call a function on all processors 624 */ 625 int on_each_cpu(void (*func) (void *info), void *info, int retry, int wait) 626 { 627 int ret = 0; 628 629 preempt_disable(); 630 ret = smp_call_function(func, info, retry, wait); 631 local_irq_disable(); 632 func(info); 633 local_irq_enable(); 634 preempt_enable(); 635 return ret; 636 } 637 EXPORT_SYMBOL(on_each_cpu); 638 #endif 639