2 * linux/kernel/softirq.c
4 * Copyright (C) 1992 Linus Torvalds
6 * Distribute under GPLv2.
8 * Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903)
11 #include <linux/module.h>
12 #include <linux/kernel_stat.h>
13 #include <linux/interrupt.h>
14 #include <linux/init.h>
16 #include <linux/notifier.h>
17 #include <linux/percpu.h>
18 #include <linux/cpu.h>
19 #include <linux/freezer.h>
20 #include <linux/kthread.h>
21 #include <linux/rcupdate.h>
22 #include <linux/smp.h>
23 #include <linux/tick.h>
27 - No shared variables, all the data are CPU local.
28 - If a softirq needs serialization, let it serialize itself
30 - Even if softirq is serialized, only local cpu is marked for
31 execution. Hence, we get something sort of weak cpu binding.
32 Though it is still not clear, will it result in better locality
36 - NET RX softirq. It is multithreaded and does not require
37 any global serialization.
38 - NET TX softirq. It kicks software netdevice queues, hence
39 it is logically serialized per device, but this serialization
40 is invisible to common code.
41 - Tasklets: serialized wrt itself.
44 #ifndef __ARCH_IRQ_STAT
45 irq_cpustat_t irq_stat[NR_CPUS] ____cacheline_aligned;
46 EXPORT_SYMBOL(irq_stat);
49 static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp;
51 static DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
54 * we cannot loop indefinitely here to avoid userspace starvation,
55 * but we also don't want to introduce a worst case 1/HZ latency
56 * to the pending events, so lets the scheduler to balance
57 * the softirq load for us.
59 static inline void wakeup_softirqd(void)
61 /* Interrupts are disabled: no need to stop preemption */
62 struct task_struct *tsk = __get_cpu_var(ksoftirqd);
64 if (tsk && tsk->state != TASK_RUNNING)
69 * This one is for softirq.c-internal use,
70 * where hardirqs are disabled legitimately:
72 #ifdef CONFIG_TRACE_IRQFLAGS
73 static void __local_bh_disable(unsigned long ip)
77 WARN_ON_ONCE(in_irq());
79 raw_local_irq_save(flags);
80 add_preempt_count(SOFTIRQ_OFFSET);
82 * Were softirqs turned off above:
84 if (softirq_count() == SOFTIRQ_OFFSET)
85 trace_softirqs_off(ip);
86 raw_local_irq_restore(flags);
88 #else /* !CONFIG_TRACE_IRQFLAGS */
89 static inline void __local_bh_disable(unsigned long ip)
91 add_preempt_count(SOFTIRQ_OFFSET);
94 #endif /* CONFIG_TRACE_IRQFLAGS */
96 void local_bh_disable(void)
98 __local_bh_disable((unsigned long)__builtin_return_address(0));
101 EXPORT_SYMBOL(local_bh_disable);
103 void __local_bh_enable(void)
105 WARN_ON_ONCE(in_irq());
108 * softirqs should never be enabled by __local_bh_enable(),
109 * it always nests inside local_bh_enable() sections:
111 WARN_ON_ONCE(softirq_count() == SOFTIRQ_OFFSET);
113 sub_preempt_count(SOFTIRQ_OFFSET);
115 EXPORT_SYMBOL_GPL(__local_bh_enable);
118 * Special-case - softirqs can safely be enabled in
119 * cond_resched_softirq(), or by __do_softirq(),
120 * without processing still-pending softirqs:
122 void _local_bh_enable(void)
124 WARN_ON_ONCE(in_irq());
125 WARN_ON_ONCE(!irqs_disabled());
127 if (softirq_count() == SOFTIRQ_OFFSET)
128 trace_softirqs_on((unsigned long)__builtin_return_address(0));
129 sub_preempt_count(SOFTIRQ_OFFSET);
132 EXPORT_SYMBOL(_local_bh_enable);
134 static inline void _local_bh_enable_ip(unsigned long ip)
136 WARN_ON_ONCE(in_irq() || irqs_disabled());
137 #ifdef CONFIG_TRACE_IRQFLAGS
141 * Are softirqs going to be turned on now:
143 if (softirq_count() == SOFTIRQ_OFFSET)
144 trace_softirqs_on(ip);
146 * Keep preemption disabled until we are done with
147 * softirq processing:
149 sub_preempt_count(SOFTIRQ_OFFSET - 1);
151 if (unlikely(!in_interrupt() && local_softirq_pending()))
155 #ifdef CONFIG_TRACE_IRQFLAGS
158 preempt_check_resched();
161 void local_bh_enable(void)
163 _local_bh_enable_ip((unsigned long)__builtin_return_address(0));
165 EXPORT_SYMBOL(local_bh_enable);
167 void local_bh_enable_ip(unsigned long ip)
169 _local_bh_enable_ip(ip);
171 EXPORT_SYMBOL(local_bh_enable_ip);
174 * We restart softirq processing MAX_SOFTIRQ_RESTART times,
175 * and we fall back to softirqd after that.
177 * This number has been established via experimentation.
178 * The two things to balance is latency against fairness -
179 * we want to handle softirqs as soon as possible, but they
180 * should not be able to lock up the box.
182 #define MAX_SOFTIRQ_RESTART 10
184 asmlinkage void __do_softirq(void)
186 struct softirq_action *h;
188 int max_restart = MAX_SOFTIRQ_RESTART;
191 pending = local_softirq_pending();
192 account_system_vtime(current);
194 __local_bh_disable((unsigned long)__builtin_return_address(0));
195 trace_softirq_enter();
197 cpu = smp_processor_id();
199 /* Reset the pending bitmask before enabling irqs */
200 set_softirq_pending(0);
208 int prev_count = preempt_count();
212 if (unlikely(prev_count != preempt_count())) {
213 printk(KERN_ERR "huh, entered softirq %td %p"
214 "with preempt_count %08x,"
215 " exited with %08x?\n", h - softirq_vec,
216 h->action, prev_count, preempt_count());
217 preempt_count() = prev_count;
220 rcu_bh_qsctr_inc(cpu);
228 pending = local_softirq_pending();
229 if (pending && --max_restart)
235 trace_softirq_exit();
237 account_system_vtime(current);
241 #ifndef __ARCH_HAS_DO_SOFTIRQ
243 asmlinkage void do_softirq(void)
251 local_irq_save(flags);
253 pending = local_softirq_pending();
258 local_irq_restore(flags);
264 * Enter an interrupt context.
268 int cpu = smp_processor_id();
270 if (idle_cpu(cpu) && !in_interrupt())
271 tick_check_idle(cpu);
276 #ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED
277 # define invoke_softirq() __do_softirq()
279 # define invoke_softirq() do_softirq()
283 * Exit an interrupt context. Process softirqs if needed and possible:
287 account_system_vtime(current);
288 trace_hardirq_exit();
289 sub_preempt_count(IRQ_EXIT_OFFSET);
290 if (!in_interrupt() && local_softirq_pending())
294 /* Make sure that timer wheel updates are propagated */
295 if (!in_interrupt() && idle_cpu(smp_processor_id()) && !need_resched())
296 tick_nohz_stop_sched_tick(0);
299 preempt_enable_no_resched();
303 * This function must run with irqs disabled!
305 inline void raise_softirq_irqoff(unsigned int nr)
307 __raise_softirq_irqoff(nr);
310 * If we're in an interrupt or softirq, we're done
311 * (this also catches softirq-disabled code). We will
312 * actually run the softirq once we return from
313 * the irq or softirq.
315 * Otherwise we wake up ksoftirqd to make sure we
316 * schedule the softirq soon.
322 void raise_softirq(unsigned int nr)
326 local_irq_save(flags);
327 raise_softirq_irqoff(nr);
328 local_irq_restore(flags);
331 void open_softirq(int nr, void (*action)(struct softirq_action *))
333 softirq_vec[nr].action = action;
339 struct tasklet_struct *head;
340 struct tasklet_struct **tail;
343 static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec);
344 static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec);
346 void __tasklet_schedule(struct tasklet_struct *t)
350 local_irq_save(flags);
352 *__get_cpu_var(tasklet_vec).tail = t;
353 __get_cpu_var(tasklet_vec).tail = &(t->next);
354 raise_softirq_irqoff(TASKLET_SOFTIRQ);
355 local_irq_restore(flags);
358 EXPORT_SYMBOL(__tasklet_schedule);
360 void __tasklet_hi_schedule(struct tasklet_struct *t)
364 local_irq_save(flags);
366 *__get_cpu_var(tasklet_hi_vec).tail = t;
367 __get_cpu_var(tasklet_hi_vec).tail = &(t->next);
368 raise_softirq_irqoff(HI_SOFTIRQ);
369 local_irq_restore(flags);
372 EXPORT_SYMBOL(__tasklet_hi_schedule);
374 static void tasklet_action(struct softirq_action *a)
376 struct tasklet_struct *list;
379 list = __get_cpu_var(tasklet_vec).head;
380 __get_cpu_var(tasklet_vec).head = NULL;
381 __get_cpu_var(tasklet_vec).tail = &__get_cpu_var(tasklet_vec).head;
385 struct tasklet_struct *t = list;
389 if (tasklet_trylock(t)) {
390 if (!atomic_read(&t->count)) {
391 if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
402 *__get_cpu_var(tasklet_vec).tail = t;
403 __get_cpu_var(tasklet_vec).tail = &(t->next);
404 __raise_softirq_irqoff(TASKLET_SOFTIRQ);
409 static void tasklet_hi_action(struct softirq_action *a)
411 struct tasklet_struct *list;
414 list = __get_cpu_var(tasklet_hi_vec).head;
415 __get_cpu_var(tasklet_hi_vec).head = NULL;
416 __get_cpu_var(tasklet_hi_vec).tail = &__get_cpu_var(tasklet_hi_vec).head;
420 struct tasklet_struct *t = list;
424 if (tasklet_trylock(t)) {
425 if (!atomic_read(&t->count)) {
426 if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
437 *__get_cpu_var(tasklet_hi_vec).tail = t;
438 __get_cpu_var(tasklet_hi_vec).tail = &(t->next);
439 __raise_softirq_irqoff(HI_SOFTIRQ);
445 void tasklet_init(struct tasklet_struct *t,
446 void (*func)(unsigned long), unsigned long data)
450 atomic_set(&t->count, 0);
455 EXPORT_SYMBOL(tasklet_init);
457 void tasklet_kill(struct tasklet_struct *t)
460 printk("Attempt to kill tasklet from interrupt\n");
462 while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) {
465 while (test_bit(TASKLET_STATE_SCHED, &t->state));
467 tasklet_unlock_wait(t);
468 clear_bit(TASKLET_STATE_SCHED, &t->state);
471 EXPORT_SYMBOL(tasklet_kill);
473 void __init softirq_init(void)
477 for_each_possible_cpu(cpu) {
478 per_cpu(tasklet_vec, cpu).tail =
479 &per_cpu(tasklet_vec, cpu).head;
480 per_cpu(tasklet_hi_vec, cpu).tail =
481 &per_cpu(tasklet_hi_vec, cpu).head;
484 open_softirq(TASKLET_SOFTIRQ, tasklet_action);
485 open_softirq(HI_SOFTIRQ, tasklet_hi_action);
488 static int ksoftirqd(void * __bind_cpu)
490 set_current_state(TASK_INTERRUPTIBLE);
492 while (!kthread_should_stop()) {
494 if (!local_softirq_pending()) {
495 preempt_enable_no_resched();
500 __set_current_state(TASK_RUNNING);
502 while (local_softirq_pending()) {
503 /* Preempt disable stops cpu going offline.
504 If already offline, we'll be on wrong CPU:
506 if (cpu_is_offline((long)__bind_cpu))
509 preempt_enable_no_resched();
514 set_current_state(TASK_INTERRUPTIBLE);
516 __set_current_state(TASK_RUNNING);
521 /* Wait for kthread_stop */
522 set_current_state(TASK_INTERRUPTIBLE);
523 while (!kthread_should_stop()) {
525 set_current_state(TASK_INTERRUPTIBLE);
527 __set_current_state(TASK_RUNNING);
531 #ifdef CONFIG_HOTPLUG_CPU
533 * tasklet_kill_immediate is called to remove a tasklet which can already be
534 * scheduled for execution on @cpu.
536 * Unlike tasklet_kill, this function removes the tasklet
537 * _immediately_, even if the tasklet is in TASKLET_STATE_SCHED state.
539 * When this function is called, @cpu must be in the CPU_DEAD state.
541 void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu)
543 struct tasklet_struct **i;
545 BUG_ON(cpu_online(cpu));
546 BUG_ON(test_bit(TASKLET_STATE_RUN, &t->state));
548 if (!test_bit(TASKLET_STATE_SCHED, &t->state))
551 /* CPU is dead, so no lock needed. */
552 for (i = &per_cpu(tasklet_vec, cpu).head; *i; i = &(*i)->next) {
555 /* If this was the tail element, move the tail ptr */
557 per_cpu(tasklet_vec, cpu).tail = i;
564 static void takeover_tasklets(unsigned int cpu)
566 /* CPU is dead, so no lock needed. */
569 /* Find end, append list for that CPU. */
570 if (&per_cpu(tasklet_vec, cpu).head != per_cpu(tasklet_vec, cpu).tail) {
571 *(__get_cpu_var(tasklet_vec).tail) = per_cpu(tasklet_vec, cpu).head;
572 __get_cpu_var(tasklet_vec).tail = per_cpu(tasklet_vec, cpu).tail;
573 per_cpu(tasklet_vec, cpu).head = NULL;
574 per_cpu(tasklet_vec, cpu).tail = &per_cpu(tasklet_vec, cpu).head;
576 raise_softirq_irqoff(TASKLET_SOFTIRQ);
578 if (&per_cpu(tasklet_hi_vec, cpu).head != per_cpu(tasklet_hi_vec, cpu).tail) {
579 *__get_cpu_var(tasklet_hi_vec).tail = per_cpu(tasklet_hi_vec, cpu).head;
580 __get_cpu_var(tasklet_hi_vec).tail = per_cpu(tasklet_hi_vec, cpu).tail;
581 per_cpu(tasklet_hi_vec, cpu).head = NULL;
582 per_cpu(tasklet_hi_vec, cpu).tail = &per_cpu(tasklet_hi_vec, cpu).head;
584 raise_softirq_irqoff(HI_SOFTIRQ);
588 #endif /* CONFIG_HOTPLUG_CPU */
590 static int __cpuinit cpu_callback(struct notifier_block *nfb,
591 unsigned long action,
594 int hotcpu = (unsigned long)hcpu;
595 struct task_struct *p;
599 case CPU_UP_PREPARE_FROZEN:
600 p = kthread_create(ksoftirqd, hcpu, "ksoftirqd/%d", hotcpu);
602 printk("ksoftirqd for %i failed\n", hotcpu);
605 kthread_bind(p, hotcpu);
606 per_cpu(ksoftirqd, hotcpu) = p;
609 case CPU_ONLINE_FROZEN:
610 wake_up_process(per_cpu(ksoftirqd, hotcpu));
612 #ifdef CONFIG_HOTPLUG_CPU
613 case CPU_UP_CANCELED:
614 case CPU_UP_CANCELED_FROZEN:
615 if (!per_cpu(ksoftirqd, hotcpu))
617 /* Unbind so it can run. Fall thru. */
618 kthread_bind(per_cpu(ksoftirqd, hotcpu),
619 any_online_cpu(cpu_online_map));
621 case CPU_DEAD_FROZEN: {
622 struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
624 p = per_cpu(ksoftirqd, hotcpu);
625 per_cpu(ksoftirqd, hotcpu) = NULL;
626 sched_setscheduler_nocheck(p, SCHED_FIFO, ¶m);
628 takeover_tasklets(hotcpu);
631 #endif /* CONFIG_HOTPLUG_CPU */
636 static struct notifier_block __cpuinitdata cpu_nfb = {
637 .notifier_call = cpu_callback
640 static __init int spawn_ksoftirqd(void)
642 void *cpu = (void *)(long)smp_processor_id();
643 int err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
645 BUG_ON(err == NOTIFY_BAD);
646 cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
647 register_cpu_notifier(&cpu_nfb);
650 early_initcall(spawn_ksoftirqd);
654 * Call a function on all processors
656 int on_each_cpu(void (*func) (void *info), void *info, int wait)
661 ret = smp_call_function(func, info, wait);
668 EXPORT_SYMBOL(on_each_cpu);