Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
smp.c
Go to the documentation of this file.
1 /*
2  * Generic helpers for smp ipi calls
3  *
4  * (C) Jens Axboe <[email protected]> 2008
5  */
6 #include <linux/rcupdate.h>
7 #include <linux/rculist.h>
8 #include <linux/kernel.h>
9 #include <linux/export.h>
10 #include <linux/percpu.h>
11 #include <linux/init.h>
12 #include <linux/gfp.h>
13 #include <linux/smp.h>
14 #include <linux/cpu.h>
15 
16 #include "smpboot.h"
17 
18 #ifdef CONFIG_USE_GENERIC_SMP_HELPERS
19 static struct {
20  struct list_head queue;
22 } call_function __cacheline_aligned_in_smp =
23  {
24  .queue = LIST_HEAD_INIT(call_function.queue),
25  .lock = __RAW_SPIN_LOCK_UNLOCKED(call_function.lock),
26  };
27 
28 enum {
29  CSD_FLAG_LOCK = 0x01,
30 };
31 
32 struct call_function_data {
33  struct call_single_data csd;
34  atomic_t refs;
36 };
37 
38 static DEFINE_PER_CPU_SHARED_ALIGNED(struct call_function_data, cfd_data);
39 
40 struct call_single_queue {
41  struct list_head list;
43 };
44 
45 static DEFINE_PER_CPU_SHARED_ALIGNED(struct call_single_queue, call_single_queue);
46 
47 static int
48 hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu)
49 {
50  long cpu = (long)hcpu;
51  struct call_function_data *cfd = &per_cpu(cfd_data, cpu);
52 
53  switch (action) {
54  case CPU_UP_PREPARE:
56  if (!zalloc_cpumask_var_node(&cfd->cpumask, GFP_KERNEL,
57  cpu_to_node(cpu)))
58  return notifier_from_errno(-ENOMEM);
59  break;
60 
61 #ifdef CONFIG_HOTPLUG_CPU
62  case CPU_UP_CANCELED:
64 
65  case CPU_DEAD:
66  case CPU_DEAD_FROZEN:
67  free_cpumask_var(cfd->cpumask);
68  break;
69 #endif
70  };
71 
72  return NOTIFY_OK;
73 }
74 
75 static struct notifier_block __cpuinitdata hotplug_cfd_notifier = {
76  .notifier_call = hotplug_cfd,
77 };
78 
79 void __init call_function_init(void)
80 {
81  void *cpu = (void *)(long)smp_processor_id();
82  int i;
83 
85  struct call_single_queue *q = &per_cpu(call_single_queue, i);
86 
87  raw_spin_lock_init(&q->lock);
88  INIT_LIST_HEAD(&q->list);
89  }
90 
91  hotplug_cfd(&hotplug_cfd_notifier, CPU_UP_PREPARE, cpu);
92  register_cpu_notifier(&hotplug_cfd_notifier);
93 }
94 
95 /*
96  * csd_lock/csd_unlock used to serialize access to per-cpu csd resources
97  *
98  * For non-synchronous ipi calls the csd can still be in use by the
99  * previous function call. For multi-cpu calls its even more interesting
100  * as we'll have to ensure no other cpu is observing our csd.
101  */
102 static void csd_lock_wait(struct call_single_data *data)
103 {
104  while (data->flags & CSD_FLAG_LOCK)
105  cpu_relax();
106 }
107 
108 static void csd_lock(struct call_single_data *data)
109 {
110  csd_lock_wait(data);
111  data->flags = CSD_FLAG_LOCK;
112 
113  /*
114  * prevent CPU from reordering the above assignment
115  * to ->flags with any subsequent assignments to other
116  * fields of the specified call_single_data structure:
117  */
118  smp_mb();
119 }
120 
121 static void csd_unlock(struct call_single_data *data)
122 {
123  WARN_ON(!(data->flags & CSD_FLAG_LOCK));
124 
125  /*
126  * ensure we're all done before releasing data:
127  */
128  smp_mb();
129 
130  data->flags &= ~CSD_FLAG_LOCK;
131 }
132 
133 /*
134  * Insert a previously allocated call_single_data element
135  * for execution on the given CPU. data must already have
136  * ->func, ->info, and ->flags set.
137  */
138 static
139 void generic_exec_single(int cpu, struct call_single_data *data, int wait)
140 {
141  struct call_single_queue *dst = &per_cpu(call_single_queue, cpu);
142  unsigned long flags;
143  int ipi;
144 
145  raw_spin_lock_irqsave(&dst->lock, flags);
146  ipi = list_empty(&dst->list);
147  list_add_tail(&data->list, &dst->list);
148  raw_spin_unlock_irqrestore(&dst->lock, flags);
149 
150  /*
151  * The list addition should be visible before sending the IPI
152  * handler locks the list to pull the entry off it because of
153  * normal cache coherency rules implied by spinlocks.
154  *
155  * If IPIs can go out of order to the cache coherency protocol
156  * in an architecture, sufficient synchronisation should be added
157  * to arch code to make it appear to obey cache coherency WRT
158  * locking and barrier primitives. Generic code isn't really
159  * equipped to do the right thing...
160  */
161  if (ipi)
163 
164  if (wait)
165  csd_lock_wait(data);
166 }
167 
168 /*
169  * Invoked by arch to handle an IPI for call function. Must be called with
170  * interrupts disabled.
171  */
172 void generic_smp_call_function_interrupt(void)
173 {
174  struct call_function_data *data;
175  int cpu = smp_processor_id();
176 
177  /*
178  * Shouldn't receive this interrupt on a cpu that is not yet online.
179  */
180  WARN_ON_ONCE(!cpu_online(cpu));
181 
182  /*
183  * Ensure entry is visible on call_function_queue after we have
184  * entered the IPI. See comment in smp_call_function_many.
185  * If we don't have this, then we may miss an entry on the list
186  * and never get another IPI to process it.
187  */
188  smp_mb();
189 
190  /*
191  * It's ok to use list_for_each_rcu() here even though we may
192  * delete 'pos', since list_del_rcu() doesn't clear ->next
193  */
194  list_for_each_entry_rcu(data, &call_function.queue, csd.list) {
195  int refs;
197 
198  /*
199  * Since we walk the list without any locks, we might
200  * see an entry that was completed, removed from the
201  * list and is in the process of being reused.
202  *
203  * We must check that the cpu is in the cpumask before
204  * checking the refs, and both must be set before
205  * executing the callback on this cpu.
206  */
207 
208  if (!cpumask_test_cpu(cpu, data->cpumask))
209  continue;
210 
211  smp_rmb();
212 
213  if (atomic_read(&data->refs) == 0)
214  continue;
215 
216  func = data->csd.func; /* save for later warn */
217  func(data->csd.info);
218 
219  /*
220  * If the cpu mask is not still set then func enabled
221  * interrupts (BUG), and this cpu took another smp call
222  * function interrupt and executed func(info) twice
223  * on this cpu. That nested execution decremented refs.
224  */
225  if (!cpumask_test_and_clear_cpu(cpu, data->cpumask)) {
226  WARN(1, "%pf enabled interrupts and double executed\n", func);
227  continue;
228  }
229 
230  refs = atomic_dec_return(&data->refs);
231  WARN_ON(refs < 0);
232 
233  if (refs)
234  continue;
235 
236  WARN_ON(!cpumask_empty(data->cpumask));
237 
238  raw_spin_lock(&call_function.lock);
239  list_del_rcu(&data->csd.list);
240  raw_spin_unlock(&call_function.lock);
241 
242  csd_unlock(&data->csd);
243  }
244 
245 }
246 
247 /*
248  * Invoked by arch to handle an IPI for call function single. Must be
249  * called from the arch with interrupts disabled.
250  */
251 void generic_smp_call_function_single_interrupt(void)
252 {
253  struct call_single_queue *q = &__get_cpu_var(call_single_queue);
254  unsigned int data_flags;
255  LIST_HEAD(list);
256 
257  /*
258  * Shouldn't receive this interrupt on a cpu that is not yet online.
259  */
261 
262  raw_spin_lock(&q->lock);
263  list_replace_init(&q->list, &list);
264  raw_spin_unlock(&q->lock);
265 
266  while (!list_empty(&list)) {
267  struct call_single_data *data;
268 
269  data = list_entry(list.next, struct call_single_data, list);
270  list_del(&data->list);
271 
272  /*
273  * 'data' can be invalid after this call if flags == 0
274  * (when called through generic_exec_single()),
275  * so save them away before making the call:
276  */
277  data_flags = data->flags;
278 
279  data->func(data->info);
280 
281  /*
282  * Unlocked CSDs are valid through generic_exec_single():
283  */
284  if (data_flags & CSD_FLAG_LOCK)
285  csd_unlock(data);
286  }
287 }
288 
289 static DEFINE_PER_CPU_SHARED_ALIGNED(struct call_single_data, csd_data);
290 
291 /*
292  * smp_call_function_single - Run a function on a specific CPU
293  * @func: The function to run. This must be fast and non-blocking.
294  * @info: An arbitrary pointer to pass to the function.
295  * @wait: If true, wait until function has completed on other CPUs.
296  *
297  * Returns 0 on success, else a negative status code.
298  */
299 int smp_call_function_single(int cpu, smp_call_func_t func, void *info,
300  int wait)
301 {
302  struct call_single_data d = {
303  .flags = 0,
304  };
305  unsigned long flags;
306  int this_cpu;
307  int err = 0;
308 
309  /*
310  * prevent preemption and reschedule on another processor,
311  * as well as CPU removal
312  */
313  this_cpu = get_cpu();
314 
315  /*
316  * Can deadlock when called with interrupts disabled.
317  * We allow cpu's that are not yet online though, as no one else can
318  * send smp call function interrupt to this cpu and as such deadlocks
319  * can't happen.
320  */
321  WARN_ON_ONCE(cpu_online(this_cpu) && irqs_disabled()
322  && !oops_in_progress);
323 
324  if (cpu == this_cpu) {
325  local_irq_save(flags);
326  func(info);
327  local_irq_restore(flags);
328  } else {
329  if ((unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) {
330  struct call_single_data *data = &d;
331 
332  if (!wait)
333  data = &__get_cpu_var(csd_data);
334 
335  csd_lock(data);
336 
337  data->func = func;
338  data->info = info;
339  generic_exec_single(cpu, data, wait);
340  } else {
341  err = -ENXIO; /* CPU not online */
342  }
343  }
344 
345  put_cpu();
346 
347  return err;
348 }
350 
351 /*
352  * smp_call_function_any - Run a function on any of the given cpus
353  * @mask: The mask of cpus it can run on.
354  * @func: The function to run. This must be fast and non-blocking.
355  * @info: An arbitrary pointer to pass to the function.
356  * @wait: If true, wait until function has completed.
357  *
358  * Returns 0 on success, else a negative status code (if no cpus were online).
359  * Note that @wait will be implicitly turned on in case of allocation failures,
360  * since we fall back to on-stack allocation.
361  *
362  * Selection preference:
363  * 1) current cpu if in @mask
364  * 2) any cpu of current node if in @mask
365  * 3) any other online cpu in @mask
366  */
367 int smp_call_function_any(const struct cpumask *mask,
368  smp_call_func_t func, void *info, int wait)
369 {
370  unsigned int cpu;
371  const struct cpumask *nodemask;
372  int ret;
373 
374  /* Try for same CPU (cheapest) */
375  cpu = get_cpu();
376  if (cpumask_test_cpu(cpu, mask))
377  goto call;
378 
379  /* Try for same node. */
380  nodemask = cpumask_of_node(cpu_to_node(cpu));
381  for (cpu = cpumask_first_and(nodemask, mask); cpu < nr_cpu_ids;
382  cpu = cpumask_next_and(cpu, nodemask, mask)) {
383  if (cpu_online(cpu))
384  goto call;
385  }
386 
387  /* Any online will do: smp_call_function_single handles nr_cpu_ids. */
388  cpu = cpumask_any_and(mask, cpu_online_mask);
389 call:
390  ret = smp_call_function_single(cpu, func, info, wait);
391  put_cpu();
392  return ret;
393 }
394 EXPORT_SYMBOL_GPL(smp_call_function_any);
395 
406 void __smp_call_function_single(int cpu, struct call_single_data *data,
407  int wait)
408 {
409  unsigned int this_cpu;
410  unsigned long flags;
411 
412  this_cpu = get_cpu();
413  /*
414  * Can deadlock when called with interrupts disabled.
415  * We allow cpu's that are not yet online though, as no one else can
416  * send smp call function interrupt to this cpu and as such deadlocks
417  * can't happen.
418  */
420  && !oops_in_progress);
421 
422  if (cpu == this_cpu) {
423  local_irq_save(flags);
424  data->func(data->info);
425  local_irq_restore(flags);
426  } else {
427  csd_lock(data);
428  generic_exec_single(cpu, data, wait);
429  }
430  put_cpu();
431 }
432 
447 void smp_call_function_many(const struct cpumask *mask,
448  smp_call_func_t func, void *info, bool wait)
449 {
450  struct call_function_data *data;
451  unsigned long flags;
452  int refs, cpu, next_cpu, this_cpu = smp_processor_id();
453 
454  /*
455  * Can deadlock when called with interrupts disabled.
456  * We allow cpu's that are not yet online though, as no one else can
457  * send smp call function interrupt to this cpu and as such deadlocks
458  * can't happen.
459  */
460  WARN_ON_ONCE(cpu_online(this_cpu) && irqs_disabled()
461  && !oops_in_progress && !early_boot_irqs_disabled);
462 
463  /* Try to fastpath. So, what's a CPU they want? Ignoring this one. */
464  cpu = cpumask_first_and(mask, cpu_online_mask);
465  if (cpu == this_cpu)
466  cpu = cpumask_next_and(cpu, mask, cpu_online_mask);
467 
468  /* No online cpus? We're done. */
469  if (cpu >= nr_cpu_ids)
470  return;
471 
472  /* Do we have another CPU which isn't us? */
473  next_cpu = cpumask_next_and(cpu, mask, cpu_online_mask);
474  if (next_cpu == this_cpu)
475  next_cpu = cpumask_next_and(next_cpu, mask, cpu_online_mask);
476 
477  /* Fastpath: do that cpu by itself. */
478  if (next_cpu >= nr_cpu_ids) {
479  smp_call_function_single(cpu, func, info, wait);
480  return;
481  }
482 
483  data = &__get_cpu_var(cfd_data);
484  csd_lock(&data->csd);
485 
486  /* This BUG_ON verifies our reuse assertions and can be removed */
487  BUG_ON(atomic_read(&data->refs) || !cpumask_empty(data->cpumask));
488 
489  /*
490  * The global call function queue list add and delete are protected
491  * by a lock, but the list is traversed without any lock, relying
492  * on the rcu list add and delete to allow safe concurrent traversal.
493  * We reuse the call function data without waiting for any grace
494  * period after some other cpu removes it from the global queue.
495  * This means a cpu might find our data block as it is being
496  * filled out.
497  *
498  * We hold off the interrupt handler on the other cpu by
499  * ordering our writes to the cpu mask vs our setting of the
500  * refs counter. We assert only the cpu owning the data block
501  * will set a bit in cpumask, and each bit will only be cleared
502  * by the subject cpu. Each cpu must first find its bit is
503  * set and then check that refs is set indicating the element is
504  * ready to be processed, otherwise it must skip the entry.
505  *
506  * On the previous iteration refs was set to 0 by another cpu.
507  * To avoid the use of transitivity, set the counter to 0 here
508  * so the wmb will pair with the rmb in the interrupt handler.
509  */
510  atomic_set(&data->refs, 0); /* convert 3rd to 1st party write */
511 
512  data->csd.func = func;
513  data->csd.info = info;
514 
515  /* Ensure 0 refs is visible before mask. Also orders func and info */
516  smp_wmb();
517 
518  /* We rely on the "and" being processed before the store */
519  cpumask_and(data->cpumask, mask, cpu_online_mask);
520  cpumask_clear_cpu(this_cpu, data->cpumask);
521  refs = cpumask_weight(data->cpumask);
522 
523  /* Some callers race with other cpus changing the passed mask */
524  if (unlikely(!refs)) {
525  csd_unlock(&data->csd);
526  return;
527  }
528 
529  raw_spin_lock_irqsave(&call_function.lock, flags);
530  /*
531  * Place entry at the _HEAD_ of the list, so that any cpu still
532  * observing the entry in generic_smp_call_function_interrupt()
533  * will not miss any other list entries:
534  */
535  list_add_rcu(&data->csd.list, &call_function.queue);
536  /*
537  * We rely on the wmb() in list_add_rcu to complete our writes
538  * to the cpumask before this write to refs, which indicates
539  * data is on the list and is ready to be processed.
540  */
541  atomic_set(&data->refs, refs);
542  raw_spin_unlock_irqrestore(&call_function.lock, flags);
543 
544  /*
545  * Make the list addition visible before sending the ipi.
546  * (IPIs must obey or appear to obey normal Linux cache
547  * coherency rules -- see comment in generic_exec_single).
548  */
549  smp_mb();
550 
551  /* Send a message to all CPUs in the map */
552  arch_send_call_function_ipi_mask(data->cpumask);
553 
554  /* Optionally wait for the CPUs to complete */
555  if (wait)
556  csd_lock_wait(&data->csd);
557 }
559 
575 int smp_call_function(smp_call_func_t func, void *info, int wait)
576 {
577  preempt_disable();
578  smp_call_function_many(cpu_online_mask, func, info, wait);
579  preempt_enable();
580 
581  return 0;
582 }
584 #endif /* USE_GENERIC_SMP_HELPERS */
585 
586 /* Setup configured maximum number of CPUs to activate */
587 unsigned int setup_max_cpus = NR_CPUS;
588 EXPORT_SYMBOL(setup_max_cpus);
589 
590 
591 /*
592  * Setup routine for controlling SMP activation
593  *
594  * Command-line option of "nosmp" or "maxcpus=0" will disable SMP
595  * activation entirely (the MPS table probe still happens, though).
596  *
597  * Command-line option of "maxcpus=<NUM>", where <NUM> is an integer
598  * greater than 0, limits the maximum number of CPUs activated in
599  * SMP mode to <NUM>.
600  */
601 
603 
604 static int __init nosmp(char *str)
605 {
606  setup_max_cpus = 0;
608 
609  return 0;
610 }
611 
612 early_param("nosmp", nosmp);
613 
614 /* this is hard limit */
615 static int __init nrcpus(char *str)
616 {
617  int nr_cpus;
618 
619  get_option(&str, &nr_cpus);
620  if (nr_cpus > 0 && nr_cpus < nr_cpu_ids)
621  nr_cpu_ids = nr_cpus;
622 
623  return 0;
624 }
625 
626 early_param("nr_cpus", nrcpus);
627 
628 static int __init maxcpus(char *str)
629 {
630  get_option(&str, &setup_max_cpus);
631  if (setup_max_cpus == 0)
633 
634  return 0;
635 }
636 
637 early_param("maxcpus", maxcpus);
638 
639 /* Setup number of possible processor ids */
640 int nr_cpu_ids __read_mostly = NR_CPUS;
641 EXPORT_SYMBOL(nr_cpu_ids);
642 
643 /* An arch may set nr_cpu_ids earlier if needed, so this would be redundant */
645 {
647 }
648 
649 /* Called by boot processor to activate the rest. */
650 void __init smp_init(void)
651 {
652  unsigned int cpu;
653 
654  idle_threads_init();
655 
656  /* FIXME: This should be done in userspace --RR */
657  for_each_present_cpu(cpu) {
658  if (num_online_cpus() >= setup_max_cpus)
659  break;
660  if (!cpu_online(cpu))
661  cpu_up(cpu);
662  }
663 
664  /* Any cleanup work */
665  printk(KERN_INFO "Brought up %ld CPUs\n", (long)num_online_cpus());
666  smp_cpus_done(setup_max_cpus);
667 }
668 
669 /*
670  * Call a function on all processors. May be used during early boot while
671  * early_boot_irqs_disabled is set. Use local_irq_save/restore() instead
672  * of local_irq_disable/enable().
673  */
674 int on_each_cpu(void (*func) (void *info), void *info, int wait)
675 {
676  unsigned long flags;
677  int ret = 0;
678 
679  preempt_disable();
680  ret = smp_call_function(func, info, wait);
681  local_irq_save(flags);
682  func(info);
683  local_irq_restore(flags);
684  preempt_enable();
685  return ret;
686 }
688 
703 void on_each_cpu_mask(const struct cpumask *mask, smp_call_func_t func,
704  void *info, bool wait)
705 {
706  int cpu = get_cpu();
707 
708  smp_call_function_many(mask, func, info, wait);
709  if (cpumask_test_cpu(cpu, mask)) {
711  func(info);
713  }
714  put_cpu();
715 }
717 
718 /*
719  * on_each_cpu_cond(): Call a function on each processor for which
720  * the supplied function cond_func returns true, optionally waiting
721  * for all the required CPUs to finish. This may include the local
722  * processor.
723  * @cond_func: A callback function that is passed a cpu id and
724  * the the info parameter. The function is called
725  * with preemption disabled. The function should
726  * return a blooean value indicating whether to IPI
727  * the specified CPU.
728  * @func: The function to run on all applicable CPUs.
729  * This must be fast and non-blocking.
730  * @info: An arbitrary pointer to pass to both functions.
731  * @wait: If true, wait (atomically) until function has
732  * completed on other CPUs.
733  * @gfp_flags: GFP flags to use when allocating the cpumask
734  * used internally by the function.
735  *
736  * The function might sleep if the GFP flags indicates a non
737  * atomic allocation is allowed.
738  *
739  * Preemption is disabled to protect against CPUs going offline but not online.
740  * CPUs going online during the call will not be seen or sent an IPI.
741  *
742  * You must not call this function with disabled interrupts or
743  * from a hardware interrupt handler or from a bottom half handler.
744  */
745 void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info),
746  smp_call_func_t func, void *info, bool wait,
747  gfp_t gfp_flags)
748 {
750  int cpu, ret;
751 
752  might_sleep_if(gfp_flags & __GFP_WAIT);
753 
754  if (likely(zalloc_cpumask_var(&cpus, (gfp_flags|__GFP_NOWARN)))) {
755  preempt_disable();
757  if (cond_func(cpu, info))
758  cpumask_set_cpu(cpu, cpus);
759  on_each_cpu_mask(cpus, func, info, wait);
760  preempt_enable();
761  free_cpumask_var(cpus);
762  } else {
763  /*
764  * No free cpumask, bother. No matter, we'll
765  * just have to IPI them one by one.
766  */
767  preempt_disable();
769  if (cond_func(cpu, info)) {
770  ret = smp_call_function_single(cpu, func,
771  info, wait);
772  WARN_ON_ONCE(!ret);
773  }
774  preempt_enable();
775  }
776 }
778 
779 static void do_nothing(void *unused)
780 {
781 }
782 
795 {
796  /* Make sure the change is visible before we kick the cpus */
797  smp_mb();
798  smp_call_function(do_nothing, NULL, 1);
799 }