Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
softirq.c
Go to the documentation of this file.
1 /*
2  * linux/kernel/softirq.c
3  *
4  * Copyright (C) 1992 Linus Torvalds
5  *
6  * Distribute under GPLv2.
7  *
8  * Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903)
9  *
10  * Remote softirq infrastructure is by Jens Axboe.
11  */
12 
13 #include <linux/export.h>
14 #include <linux/kernel_stat.h>
15 #include <linux/interrupt.h>
16 #include <linux/init.h>
17 #include <linux/mm.h>
18 #include <linux/notifier.h>
19 #include <linux/percpu.h>
20 #include <linux/cpu.h>
21 #include <linux/freezer.h>
22 #include <linux/kthread.h>
23 #include <linux/rcupdate.h>
24 #include <linux/ftrace.h>
25 #include <linux/smp.h>
26 #include <linux/smpboot.h>
27 #include <linux/tick.h>
28 
29 #define CREATE_TRACE_POINTS
30 #include <trace/events/irq.h>
31 
32 #include <asm/irq.h>
33 /*
34  - No shared variables, all the data are CPU local.
35  - If a softirq needs serialization, let it serialize itself
36  by its own spinlocks.
37  - Even if softirq is serialized, only local cpu is marked for
38  execution. Hence, we get something sort of weak cpu binding.
39  Though it is still not clear, will it result in better locality
40  or will not.
41 
42  Examples:
43  - NET RX softirq. It is multithreaded and does not require
44  any global serialization.
45  - NET TX softirq. It kicks software netdevice queues, hence
46  it is logically serialized per device, but this serialization
47  is invisible to common code.
48  - Tasklets: serialized wrt itself.
49  */
50 
51 #ifndef __ARCH_IRQ_STAT
53 EXPORT_SYMBOL(irq_stat);
54 #endif
55 
56 static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp;
57 
58 DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
59 
61  "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "BLOCK_IOPOLL",
62  "TASKLET", "SCHED", "HRTIMER", "RCU"
63 };
64 
65 /*
66  * we cannot loop indefinitely here to avoid userspace starvation,
67  * but we also don't want to introduce a worst case 1/HZ latency
68  * to the pending events, so lets the scheduler to balance
69  * the softirq load for us.
70  */
71 static void wakeup_softirqd(void)
72 {
73  /* Interrupts are disabled: no need to stop preemption */
74  struct task_struct *tsk = __this_cpu_read(ksoftirqd);
75 
76  if (tsk && tsk->state != TASK_RUNNING)
77  wake_up_process(tsk);
78 }
79 
80 /*
81  * preempt_count and SOFTIRQ_OFFSET usage:
82  * - preempt_count is changed by SOFTIRQ_OFFSET on entering or leaving
83  * softirq processing.
84  * - preempt_count is changed by SOFTIRQ_DISABLE_OFFSET (= 2 * SOFTIRQ_OFFSET)
85  * on local_bh_disable or local_bh_enable.
86  * This lets us distinguish between whether we are currently processing
87  * softirq and whether we just have bh disabled.
88  */
89 
90 /*
91  * This one is for softirq.c-internal use,
92  * where hardirqs are disabled legitimately:
93  */
94 #ifdef CONFIG_TRACE_IRQFLAGS
95 static void __local_bh_disable(unsigned long ip, unsigned int cnt)
96 {
97  unsigned long flags;
98 
100 
101  raw_local_irq_save(flags);
102  /*
103  * The preempt tracer hooks into add_preempt_count and will break
104  * lockdep because it calls back into lockdep after SOFTIRQ_OFFSET
105  * is set and before current->softirq_enabled is cleared.
106  * We must manually increment preempt_count here and manually
107  * call the trace_preempt_off later.
108  */
109  preempt_count() += cnt;
110  /*
111  * Were softirqs turned off above:
112  */
113  if (softirq_count() == cnt)
114  trace_softirqs_off(ip);
115  raw_local_irq_restore(flags);
116 
117  if (preempt_count() == cnt)
119 }
120 #else /* !CONFIG_TRACE_IRQFLAGS */
121 static inline void __local_bh_disable(unsigned long ip, unsigned int cnt)
122 {
123  add_preempt_count(cnt);
124  barrier();
125 }
126 #endif /* CONFIG_TRACE_IRQFLAGS */
127 
129 {
130  __local_bh_disable((unsigned long)__builtin_return_address(0),
132 }
133 
135 
136 static void __local_bh_enable(unsigned int cnt)
137 {
138  WARN_ON_ONCE(in_irq());
140 
141  if (softirq_count() == cnt)
142  trace_softirqs_on((unsigned long)__builtin_return_address(0));
143  sub_preempt_count(cnt);
144 }
145 
146 /*
147  * Special-case - softirqs can safely be enabled in
148  * cond_resched_softirq(), or by __do_softirq(),
149  * without processing still-pending softirqs:
150  */
152 {
153  __local_bh_enable(SOFTIRQ_DISABLE_OFFSET);
154 }
155 
157 
158 static inline void _local_bh_enable_ip(unsigned long ip)
159 {
161 #ifdef CONFIG_TRACE_IRQFLAGS
163 #endif
164  /*
165  * Are softirqs going to be turned on now:
166  */
168  trace_softirqs_on(ip);
169  /*
170  * Keep preemption disabled until we are done with
171  * softirq processing:
172  */
174 
176  do_softirq();
177 
179 #ifdef CONFIG_TRACE_IRQFLAGS
181 #endif
183 }
184 
185 void local_bh_enable(void)
186 {
187  _local_bh_enable_ip((unsigned long)__builtin_return_address(0));
188 }
190 
191 void local_bh_enable_ip(unsigned long ip)
192 {
193  _local_bh_enable_ip(ip);
194 }
196 
197 /*
198  * We restart softirq processing MAX_SOFTIRQ_RESTART times,
199  * and we fall back to softirqd after that.
200  *
201  * This number has been established via experimentation.
202  * The two things to balance is latency against fairness -
203  * we want to handle softirqs as soon as possible, but they
204  * should not be able to lock up the box.
205  */
206 #define MAX_SOFTIRQ_RESTART 10
207 
209 {
210  struct softirq_action *h;
211  __u32 pending;
212  int max_restart = MAX_SOFTIRQ_RESTART;
213  int cpu;
214  unsigned long old_flags = current->flags;
215 
216  /*
217  * Mask out PF_MEMALLOC s current task context is borrowed for the
218  * softirq. A softirq handled such as network RX might set PF_MEMALLOC
219  * again if the socket is related to swap
220  */
221  current->flags &= ~PF_MEMALLOC;
222 
223  pending = local_softirq_pending();
225 
226  __local_bh_disable((unsigned long)__builtin_return_address(0),
229 
230  cpu = smp_processor_id();
231 restart:
232  /* Reset the pending bitmask before enabling irqs */
234 
236 
237  h = softirq_vec;
238 
239  do {
240  if (pending & 1) {
241  unsigned int vec_nr = h - softirq_vec;
242  int prev_count = preempt_count();
243 
244  kstat_incr_softirqs_this_cpu(vec_nr);
245 
246  trace_softirq_entry(vec_nr);
247  h->action(h);
248  trace_softirq_exit(vec_nr);
249  if (unlikely(prev_count != preempt_count())) {
250  printk(KERN_ERR "huh, entered softirq %u %s %p"
251  "with preempt_count %08x,"
252  " exited with %08x?\n", vec_nr,
253  softirq_to_name[vec_nr], h->action,
254  prev_count, preempt_count());
255  preempt_count() = prev_count;
256  }
257 
258  rcu_bh_qs(cpu);
259  }
260  h++;
261  pending >>= 1;
262  } while (pending);
263 
265 
266  pending = local_softirq_pending();
267  if (pending && --max_restart)
268  goto restart;
269 
270  if (pending)
271  wakeup_softirqd();
272 
274 
276  __local_bh_enable(SOFTIRQ_OFFSET);
277  tsk_restore_flags(current, old_flags, PF_MEMALLOC);
278 }
279 
280 #ifndef __ARCH_HAS_DO_SOFTIRQ
281 
283 {
284  __u32 pending;
285  unsigned long flags;
286 
287  if (in_interrupt())
288  return;
289 
290  local_irq_save(flags);
291 
292  pending = local_softirq_pending();
293 
294  if (pending)
295  __do_softirq();
296 
297  local_irq_restore(flags);
298 }
299 
300 #endif
301 
302 /*
303  * Enter an interrupt context.
304  */
305 void irq_enter(void)
306 {
307  int cpu = smp_processor_id();
308 
309  rcu_irq_enter();
310  if (is_idle_task(current) && !in_interrupt()) {
311  /*
312  * Prevent raise_softirq from needlessly waking up ksoftirqd
313  * here, as softirq will be serviced on return from interrupt.
314  */
316  tick_check_idle(cpu);
318  }
319 
320  __irq_enter();
321 }
322 
323 static inline void invoke_softirq(void)
324 {
325  if (!force_irqthreads) {
326 #ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED
327  __do_softirq();
328 #else
329  do_softirq();
330 #endif
331  } else {
332  __local_bh_disable((unsigned long)__builtin_return_address(0),
334  wakeup_softirqd();
335  __local_bh_enable(SOFTIRQ_OFFSET);
336  }
337 }
338 
339 /*
340  * Exit an interrupt context. Process softirqs if needed and possible:
341  */
342 void irq_exit(void)
343 {
348  invoke_softirq();
349 
350 #ifdef CONFIG_NO_HZ
351  /* Make sure that timer wheel updates are propagated */
352  if (idle_cpu(smp_processor_id()) && !in_interrupt() && !need_resched())
353  tick_nohz_irq_exit();
354 #endif
355  rcu_irq_exit();
357 }
358 
359 /*
360  * This function must run with irqs disabled!
361  */
362 inline void raise_softirq_irqoff(unsigned int nr)
363 {
365 
366  /*
367  * If we're in an interrupt or softirq, we're done
368  * (this also catches softirq-disabled code). We will
369  * actually run the softirq once we return from
370  * the irq or softirq.
371  *
372  * Otherwise we wake up ksoftirqd to make sure we
373  * schedule the softirq soon.
374  */
375  if (!in_interrupt())
376  wakeup_softirqd();
377 }
378 
379 void raise_softirq(unsigned int nr)
380 {
381  unsigned long flags;
382 
383  local_irq_save(flags);
385  local_irq_restore(flags);
386 }
387 
388 void __raise_softirq_irqoff(unsigned int nr)
389 {
390  trace_softirq_raise(nr);
391  or_softirq_pending(1UL << nr);
392 }
393 
394 void open_softirq(int nr, void (*action)(struct softirq_action *))
395 {
396  softirq_vec[nr].action = action;
397 }
398 
399 /*
400  * Tasklets
401  */
403 {
406 };
407 
408 static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec);
409 static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec);
410 
412 {
413  unsigned long flags;
414 
415  local_irq_save(flags);
416  t->next = NULL;
417  *__this_cpu_read(tasklet_vec.tail) = t;
418  __this_cpu_write(tasklet_vec.tail, &(t->next));
420  local_irq_restore(flags);
421 }
422 
424 
426 {
427  unsigned long flags;
428 
429  local_irq_save(flags);
430  t->next = NULL;
431  *__this_cpu_read(tasklet_hi_vec.tail) = t;
432  __this_cpu_write(tasklet_hi_vec.tail, &(t->next));
434  local_irq_restore(flags);
435 }
436 
438 
440 {
441  BUG_ON(!irqs_disabled());
442 
443  t->next = __this_cpu_read(tasklet_hi_vec.head);
444  __this_cpu_write(tasklet_hi_vec.head, t);
446 }
447 
449 
450 static void tasklet_action(struct softirq_action *a)
451 {
452  struct tasklet_struct *list;
453 
455  list = __this_cpu_read(tasklet_vec.head);
456  __this_cpu_write(tasklet_vec.head, NULL);
457  __this_cpu_write(tasklet_vec.tail, &__get_cpu_var(tasklet_vec).head);
459 
460  while (list) {
461  struct tasklet_struct *t = list;
462 
463  list = list->next;
464 
465  if (tasklet_trylock(t)) {
466  if (!atomic_read(&t->count)) {
468  BUG();
469  t->func(t->data);
470  tasklet_unlock(t);
471  continue;
472  }
473  tasklet_unlock(t);
474  }
475 
477  t->next = NULL;
478  *__this_cpu_read(tasklet_vec.tail) = t;
479  __this_cpu_write(tasklet_vec.tail, &(t->next));
482  }
483 }
484 
485 static void tasklet_hi_action(struct softirq_action *a)
486 {
487  struct tasklet_struct *list;
488 
490  list = __this_cpu_read(tasklet_hi_vec.head);
491  __this_cpu_write(tasklet_hi_vec.head, NULL);
492  __this_cpu_write(tasklet_hi_vec.tail, &__get_cpu_var(tasklet_hi_vec).head);
494 
495  while (list) {
496  struct tasklet_struct *t = list;
497 
498  list = list->next;
499 
500  if (tasklet_trylock(t)) {
501  if (!atomic_read(&t->count)) {
503  BUG();
504  t->func(t->data);
505  tasklet_unlock(t);
506  continue;
507  }
508  tasklet_unlock(t);
509  }
510 
512  t->next = NULL;
513  *__this_cpu_read(tasklet_hi_vec.tail) = t;
514  __this_cpu_write(tasklet_hi_vec.tail, &(t->next));
517  }
518 }
519 
520 
522  void (*func)(unsigned long), unsigned long data)
523 {
524  t->next = NULL;
525  t->state = 0;
526  atomic_set(&t->count, 0);
527  t->func = func;
528  t->data = data;
529 }
530 
532 
534 {
535  if (in_interrupt())
536  printk("Attempt to kill tasklet from interrupt\n");
537 
539  do {
540  yield();
541  } while (test_bit(TASKLET_STATE_SCHED, &t->state));
542  }
545 }
546 
548 
549 /*
550  * tasklet_hrtimer
551  */
552 
553 /*
554  * The trampoline is called when the hrtimer expires. It schedules a tasklet
555  * to run __tasklet_hrtimer_trampoline() which in turn will call the intended
556  * hrtimer callback, but from softirq context.
557  */
558 static enum hrtimer_restart __hrtimer_tasklet_trampoline(struct hrtimer *timer)
559 {
560  struct tasklet_hrtimer *ttimer =
561  container_of(timer, struct tasklet_hrtimer, timer);
562 
563  tasklet_hi_schedule(&ttimer->tasklet);
564  return HRTIMER_NORESTART;
565 }
566 
567 /*
568  * Helper function which calls the hrtimer callback from
569  * tasklet/softirq context
570  */
571 static void __tasklet_hrtimer_trampoline(unsigned long data)
572 {
573  struct tasklet_hrtimer *ttimer = (void *)data;
575 
576  restart = ttimer->function(&ttimer->timer);
577  if (restart != HRTIMER_NORESTART)
578  hrtimer_restart(&ttimer->timer);
579 }
580 
589  enum hrtimer_restart (*function)(struct hrtimer *),
590  clockid_t which_clock, enum hrtimer_mode mode)
591 {
592  hrtimer_init(&ttimer->timer, which_clock, mode);
593  ttimer->timer.function = __hrtimer_tasklet_trampoline;
594  tasklet_init(&ttimer->tasklet, __tasklet_hrtimer_trampoline,
595  (unsigned long)ttimer);
596  ttimer->function = function;
597 }
599 
600 /*
601  * Remote softirq bits
602  */
603 
604 DEFINE_PER_CPU(struct list_head [NR_SOFTIRQS], softirq_work_list);
605 EXPORT_PER_CPU_SYMBOL(softirq_work_list);
606 
607 static void __local_trigger(struct call_single_data *cp, int softirq)
608 {
609  struct list_head *head = &__get_cpu_var(softirq_work_list[softirq]);
610 
611  list_add_tail(&cp->list, head);
612 
613  /* Trigger the softirq only if the list was previously empty. */
614  if (head->next == &cp->list)
615  raise_softirq_irqoff(softirq);
616 }
617 
618 #ifdef CONFIG_USE_GENERIC_SMP_HELPERS
619 static void remote_softirq_receive(void *data)
620 {
621  struct call_single_data *cp = data;
622  unsigned long flags;
623  int softirq;
624 
625  softirq = cp->priv;
626 
627  local_irq_save(flags);
628  __local_trigger(cp, softirq);
629  local_irq_restore(flags);
630 }
631 
632 static int __try_remote_softirq(struct call_single_data *cp, int cpu, int softirq)
633 {
634  if (cpu_online(cpu)) {
635  cp->func = remote_softirq_receive;
636  cp->info = cp;
637  cp->flags = 0;
638  cp->priv = softirq;
639 
640  __smp_call_function_single(cpu, cp, 0);
641  return 0;
642  }
643  return 1;
644 }
645 #else /* CONFIG_USE_GENERIC_SMP_HELPERS */
646 static int __try_remote_softirq(struct call_single_data *cp, int cpu, int softirq)
647 {
648  return 1;
649 }
650 #endif
651 
664 void __send_remote_softirq(struct call_single_data *cp, int cpu, int this_cpu, int softirq)
665 {
666  if (cpu == this_cpu || __try_remote_softirq(cp, cpu, softirq))
667  __local_trigger(cp, softirq);
668 }
670 
680 void send_remote_softirq(struct call_single_data *cp, int cpu, int softirq)
681 {
682  unsigned long flags;
683  int this_cpu;
684 
685  local_irq_save(flags);
686  this_cpu = smp_processor_id();
687  __send_remote_softirq(cp, cpu, this_cpu, softirq);
688  local_irq_restore(flags);
689 }
691 
692 static int __cpuinit remote_softirq_cpu_notify(struct notifier_block *self,
693  unsigned long action, void *hcpu)
694 {
695  /*
696  * If a CPU goes away, splice its entries to the current CPU
697  * and trigger a run of the softirq
698  */
699  if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
700  int cpu = (unsigned long) hcpu;
701  int i;
702 
704  for (i = 0; i < NR_SOFTIRQS; i++) {
705  struct list_head *head = &per_cpu(softirq_work_list[i], cpu);
706  struct list_head *local_head;
707 
708  if (list_empty(head))
709  continue;
710 
711  local_head = &__get_cpu_var(softirq_work_list[i]);
712  list_splice_init(head, local_head);
714  }
716  }
717 
718  return NOTIFY_OK;
719 }
720 
721 static struct notifier_block __cpuinitdata remote_softirq_cpu_notifier = {
722  .notifier_call = remote_softirq_cpu_notify,
723 };
724 
726 {
727  int cpu;
728 
729  for_each_possible_cpu(cpu) {
730  int i;
731 
732  per_cpu(tasklet_vec, cpu).tail =
733  &per_cpu(tasklet_vec, cpu).head;
734  per_cpu(tasklet_hi_vec, cpu).tail =
735  &per_cpu(tasklet_hi_vec, cpu).head;
736  for (i = 0; i < NR_SOFTIRQS; i++)
737  INIT_LIST_HEAD(&per_cpu(softirq_work_list[i], cpu));
738  }
739 
740  register_hotcpu_notifier(&remote_softirq_cpu_notifier);
741 
742  open_softirq(TASKLET_SOFTIRQ, tasklet_action);
743  open_softirq(HI_SOFTIRQ, tasklet_hi_action);
744 }
745 
746 static int ksoftirqd_should_run(unsigned int cpu)
747 {
748  return local_softirq_pending();
749 }
750 
751 static void run_ksoftirqd(unsigned int cpu)
752 {
754  if (local_softirq_pending()) {
755  __do_softirq();
758  cond_resched();
759  return;
760  }
762 }
763 
764 #ifdef CONFIG_HOTPLUG_CPU
765 /*
766  * tasklet_kill_immediate is called to remove a tasklet which can already be
767  * scheduled for execution on @cpu.
768  *
769  * Unlike tasklet_kill, this function removes the tasklet
770  * _immediately_, even if the tasklet is in TASKLET_STATE_SCHED state.
771  *
772  * When this function is called, @cpu must be in the CPU_DEAD state.
773  */
774 void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu)
775 {
776  struct tasklet_struct **i;
777 
778  BUG_ON(cpu_online(cpu));
780 
782  return;
783 
784  /* CPU is dead, so no lock needed. */
785  for (i = &per_cpu(tasklet_vec, cpu).head; *i; i = &(*i)->next) {
786  if (*i == t) {
787  *i = t->next;
788  /* If this was the tail element, move the tail ptr */
789  if (*i == NULL)
790  per_cpu(tasklet_vec, cpu).tail = i;
791  return;
792  }
793  }
794  BUG();
795 }
796 
797 static void takeover_tasklets(unsigned int cpu)
798 {
799  /* CPU is dead, so no lock needed. */
801 
802  /* Find end, append list for that CPU. */
803  if (&per_cpu(tasklet_vec, cpu).head != per_cpu(tasklet_vec, cpu).tail) {
804  *__this_cpu_read(tasklet_vec.tail) = per_cpu(tasklet_vec, cpu).head;
805  this_cpu_write(tasklet_vec.tail, per_cpu(tasklet_vec, cpu).tail);
806  per_cpu(tasklet_vec, cpu).head = NULL;
807  per_cpu(tasklet_vec, cpu).tail = &per_cpu(tasklet_vec, cpu).head;
808  }
810 
811  if (&per_cpu(tasklet_hi_vec, cpu).head != per_cpu(tasklet_hi_vec, cpu).tail) {
812  *__this_cpu_read(tasklet_hi_vec.tail) = per_cpu(tasklet_hi_vec, cpu).head;
813  __this_cpu_write(tasklet_hi_vec.tail, per_cpu(tasklet_hi_vec, cpu).tail);
814  per_cpu(tasklet_hi_vec, cpu).head = NULL;
815  per_cpu(tasklet_hi_vec, cpu).tail = &per_cpu(tasklet_hi_vec, cpu).head;
816  }
818 
820 }
821 #endif /* CONFIG_HOTPLUG_CPU */
822 
823 static int __cpuinit cpu_callback(struct notifier_block *nfb,
824  unsigned long action,
825  void *hcpu)
826 {
827  switch (action) {
828 #ifdef CONFIG_HOTPLUG_CPU
829  case CPU_DEAD:
830  case CPU_DEAD_FROZEN:
831  takeover_tasklets((unsigned long)hcpu);
832  break;
833 #endif /* CONFIG_HOTPLUG_CPU */
834  }
835  return NOTIFY_OK;
836 }
837 
838 static struct notifier_block __cpuinitdata cpu_nfb = {
839  .notifier_call = cpu_callback
840 };
841 
842 static struct smp_hotplug_thread softirq_threads = {
843  .store = &ksoftirqd,
844  .thread_should_run = ksoftirqd_should_run,
845  .thread_fn = run_ksoftirqd,
846  .thread_comm = "ksoftirqd/%u",
847 };
848 
849 static __init int spawn_ksoftirqd(void)
850 {
851  register_cpu_notifier(&cpu_nfb);
852 
853  BUG_ON(smpboot_register_percpu_thread(&softirq_threads));
854 
855  return 0;
856 }
857 early_initcall(spawn_ksoftirqd);
858 
859 /*
860  * [ These __weak aliases are kept in a separate compilation unit, so that
861  * GCC does not inline them incorrectly. ]
862  */
863 
865 {
866  return 0;
867 }
868 
869 #ifdef CONFIG_GENERIC_HARDIRQS
871 {
872  return NR_IRQS_LEGACY;
873 }
874 
876 {
877  return 0;
878 }
879 #endif