Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
smp.c
Go to the documentation of this file.
1 /*
2  * Xen SMP support
3  *
4  * This file implements the Xen versions of smp_ops. SMP under Xen is
5  * very straightforward. Bringing a CPU up is simply a matter of
6  * loading its initial context and setting it running.
7  *
8  * IPIs are handled through the Xen event mechanism.
9  *
10  * Because virtual CPUs can be scheduled onto any real CPU, there's no
11  * useful topology information for the kernel to make use of. As a
12  * result, all CPUs are treated as if they're single-core and
13  * single-threaded.
14  */
15 #include <linux/sched.h>
16 #include <linux/err.h>
17 #include <linux/slab.h>
18 #include <linux/smp.h>
19 #include <linux/irq_work.h>
20 
21 #include <asm/paravirt.h>
22 #include <asm/desc.h>
23 #include <asm/pgtable.h>
24 #include <asm/cpu.h>
25 
26 #include <xen/interface/xen.h>
27 #include <xen/interface/vcpu.h>
28 
29 #include <asm/xen/interface.h>
30 #include <asm/xen/hypercall.h>
31 
32 #include <xen/xen.h>
33 #include <xen/page.h>
34 #include <xen/events.h>
35 
36 #include <xen/hvc-console.h>
37 #include "xen-ops.h"
38 #include "mmu.h"
39 
41 
42 static DEFINE_PER_CPU(int, xen_resched_irq);
43 static DEFINE_PER_CPU(int, xen_callfunc_irq);
44 static DEFINE_PER_CPU(int, xen_callfuncsingle_irq);
45 static DEFINE_PER_CPU(int, xen_irq_work);
46 static DEFINE_PER_CPU(int, xen_debug_irq) = -1;
47 
48 static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id);
49 static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id);
50 static irqreturn_t xen_irq_work_interrupt(int irq, void *dev_id);
51 
52 /*
53  * Reschedule call back.
54  */
55 static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id)
56 {
57  inc_irq_stat(irq_resched_count);
58  scheduler_ipi();
59 
60  return IRQ_HANDLED;
61 }
62 
63 static void __cpuinit cpu_bringup(void)
64 {
65  int cpu;
66 
67  cpu_init();
70 
73 
74  cpu = smp_processor_id();
75  smp_store_cpu_info(cpu);
76  cpu_data(cpu).x86_max_cores = 1;
78 
80 
81  notify_cpu_starting(cpu);
82 
83  set_cpu_online(cpu, true);
84 
85  this_cpu_write(cpu_state, CPU_ONLINE);
86 
87  wmb();
88 
89  /* We can take interrupts now: we're officially "up". */
91 
92  wmb(); /* make sure everything is out */
93 }
94 
95 static void __cpuinit cpu_bringup_and_idle(void)
96 {
97  cpu_bringup();
98  cpu_idle();
99 }
100 
101 static int xen_smp_intr_init(unsigned int cpu)
102 {
103  int rc;
104  const char *resched_name, *callfunc_name, *debug_name;
105 
106  resched_name = kasprintf(GFP_KERNEL, "resched%d", cpu);
108  cpu,
109  xen_reschedule_interrupt,
111  resched_name,
112  NULL);
113  if (rc < 0)
114  goto fail;
115  per_cpu(xen_resched_irq, cpu) = rc;
116 
117  callfunc_name = kasprintf(GFP_KERNEL, "callfunc%d", cpu);
119  cpu,
120  xen_call_function_interrupt,
122  callfunc_name,
123  NULL);
124  if (rc < 0)
125  goto fail;
126  per_cpu(xen_callfunc_irq, cpu) = rc;
127 
128  debug_name = kasprintf(GFP_KERNEL, "debug%d", cpu);
131  debug_name, NULL);
132  if (rc < 0)
133  goto fail;
134  per_cpu(xen_debug_irq, cpu) = rc;
135 
136  callfunc_name = kasprintf(GFP_KERNEL, "callfuncsingle%d", cpu);
138  cpu,
139  xen_call_function_single_interrupt,
141  callfunc_name,
142  NULL);
143  if (rc < 0)
144  goto fail;
145  per_cpu(xen_callfuncsingle_irq, cpu) = rc;
146 
147  callfunc_name = kasprintf(GFP_KERNEL, "irqwork%d", cpu);
149  cpu,
150  xen_irq_work_interrupt,
152  callfunc_name,
153  NULL);
154  if (rc < 0)
155  goto fail;
156  per_cpu(xen_irq_work, cpu) = rc;
157 
158  return 0;
159 
160  fail:
161  if (per_cpu(xen_resched_irq, cpu) >= 0)
162  unbind_from_irqhandler(per_cpu(xen_resched_irq, cpu), NULL);
163  if (per_cpu(xen_callfunc_irq, cpu) >= 0)
164  unbind_from_irqhandler(per_cpu(xen_callfunc_irq, cpu), NULL);
165  if (per_cpu(xen_debug_irq, cpu) >= 0)
166  unbind_from_irqhandler(per_cpu(xen_debug_irq, cpu), NULL);
167  if (per_cpu(xen_callfuncsingle_irq, cpu) >= 0)
168  unbind_from_irqhandler(per_cpu(xen_callfuncsingle_irq, cpu),
169  NULL);
170  if (per_cpu(xen_irq_work, cpu) >= 0)
171  unbind_from_irqhandler(per_cpu(xen_irq_work, cpu), NULL);
172 
173  return rc;
174 }
175 
176 static void __init xen_fill_possible_map(void)
177 {
178  int i, rc;
179 
180  if (xen_initial_domain())
181  return;
182 
183  for (i = 0; i < nr_cpu_ids; i++) {
185  if (rc >= 0) {
186  num_processors++;
187  set_cpu_possible(i, true);
188  }
189  }
190 }
191 
192 static void __init xen_filter_cpu_maps(void)
193 {
194  int i, rc;
195  unsigned int subtract = 0;
196 
197  if (!xen_initial_domain())
198  return;
199 
200  num_processors = 0;
201  disabled_cpus = 0;
202  for (i = 0; i < nr_cpu_ids; i++) {
204  if (rc >= 0) {
205  num_processors++;
206  set_cpu_possible(i, true);
207  } else {
208  set_cpu_possible(i, false);
209  set_cpu_present(i, false);
210  subtract++;
211  }
212  }
213 #ifdef CONFIG_HOTPLUG_CPU
214  /* This is akin to using 'nr_cpus' on the Linux command line.
215  * Which is OK as when we use 'dom0_max_vcpus=X' we can only
216  * have up to X, while nr_cpu_ids is greater than X. This
217  * normally is not a problem, except when CPU hotplugging
218  * is involved and then there might be more than X CPUs
219  * in the guest - which will not work as there is no
220  * hypercall to expand the max number of VCPUs an already
221  * running guest has. So cap it up to X. */
222  if (subtract)
223  nr_cpu_ids = nr_cpu_ids - subtract;
224 #endif
225 
226 }
227 
228 static void __init xen_smp_prepare_boot_cpu(void)
229 {
230  BUG_ON(smp_processor_id() != 0);
232 
233  /* We've switched to the "real" per-cpu gdt, so make sure the
234  old memory can be recycled */
236 
237  xen_filter_cpu_maps();
239 }
240 
241 static void __init xen_smp_prepare_cpus(unsigned int max_cpus)
242 {
243  unsigned cpu;
244  unsigned int i;
245 
246  if (skip_ioapic_setup) {
247  char *m = (max_cpus == 0) ?
248  "The nosmp parameter is incompatible with Xen; " \
249  "use Xen dom0_max_vcpus=1 parameter" :
250  "The noapic parameter is incompatible with Xen";
251 
252  xen_raw_printk(m);
253  panic(m);
254  }
256 
258  cpu_data(0).x86_max_cores = 1;
259 
261  zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL);
262  zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL);
263  zalloc_cpumask_var(&per_cpu(cpu_llc_shared_map, i), GFP_KERNEL);
264  }
266 
267  if (xen_smp_intr_init(0))
268  BUG();
269 
270  if (!alloc_cpumask_var(&xen_cpu_initialized_map, GFP_KERNEL))
271  panic("could not allocate xen_cpu_initialized_map\n");
272 
273  cpumask_copy(xen_cpu_initialized_map, cpumask_of(0));
274 
275  /* Restrict the possible_map according to max_cpus. */
276  while ((num_possible_cpus() > 1) && (num_possible_cpus() > max_cpus)) {
277  for (cpu = nr_cpu_ids - 1; !cpu_possible(cpu); cpu--)
278  continue;
279  set_cpu_possible(cpu, false);
280  }
281 
283  set_cpu_present(cpu, true);
284 }
285 
286 static int __cpuinit
287 cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
288 {
289  struct vcpu_guest_context *ctxt;
290  struct desc_struct *gdt;
291  unsigned long gdt_mfn;
292 
293  if (cpumask_test_and_set_cpu(cpu, xen_cpu_initialized_map))
294  return 0;
295 
296  ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
297  if (ctxt == NULL)
298  return -ENOMEM;
299 
300  gdt = get_cpu_gdt_table(cpu);
301 
302  ctxt->flags = VGCF_IN_KERNEL;
303  ctxt->user_regs.ds = __USER_DS;
304  ctxt->user_regs.es = __USER_DS;
305  ctxt->user_regs.ss = __KERNEL_DS;
306 #ifdef CONFIG_X86_32
307  ctxt->user_regs.fs = __KERNEL_PERCPU;
308  ctxt->user_regs.gs = __KERNEL_STACK_CANARY;
309 #else
310  ctxt->gs_base_kernel = per_cpu_offset(cpu);
311 #endif
312  ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle;
313  ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */
314 
315  memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
316 
318 
319  ctxt->ldt_ents = 0;
320 
321  BUG_ON((unsigned long)gdt & ~PAGE_MASK);
322 
323  gdt_mfn = arbitrary_virt_to_mfn(gdt);
326 
327  ctxt->gdt_frames[0] = gdt_mfn;
328  ctxt->gdt_ents = GDT_ENTRIES;
329 
330  ctxt->user_regs.cs = __KERNEL_CS;
331  ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs);
332 
333  ctxt->kernel_ss = __KERNEL_DS;
334  ctxt->kernel_sp = idle->thread.sp0;
335 
336 #ifdef CONFIG_X86_32
337  ctxt->event_callback_cs = __KERNEL_CS;
338  ctxt->failsafe_callback_cs = __KERNEL_CS;
339 #endif
340  ctxt->event_callback_eip = (unsigned long)xen_hypervisor_callback;
341  ctxt->failsafe_callback_eip = (unsigned long)xen_failsafe_callback;
342 
343  per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir);
345 
346  if (HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, ctxt))
347  BUG();
348 
349  kfree(ctxt);
350  return 0;
351 }
352 
353 static int __cpuinit xen_cpu_up(unsigned int cpu, struct task_struct *idle)
354 {
355  int rc;
356 
357  per_cpu(current_task, cpu) = idle;
358 #ifdef CONFIG_X86_32
359  irq_ctx_init(cpu);
360 #else
361  clear_tsk_thread_flag(idle, TIF_FORK);
362  per_cpu(kernel_stack, cpu) =
363  (unsigned long)task_stack_page(idle) -
365 #endif
367  xen_setup_timer(cpu);
368  xen_init_lock_cpu(cpu);
369 
370  per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
371 
372  /* make sure interrupts start blocked */
373  per_cpu(xen_vcpu, cpu)->evtchn_upcall_mask = 1;
374 
375  rc = cpu_initialize_context(cpu, idle);
376  if (rc)
377  return rc;
378 
379  if (num_online_cpus() == 1)
380  /* Just in case we booted with a single CPU. */
381  alternatives_enable_smp();
382 
383  rc = xen_smp_intr_init(cpu);
384  if (rc)
385  return rc;
386 
387  rc = HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL);
388  BUG_ON(rc);
389 
390  while(per_cpu(cpu_state, cpu) != CPU_ONLINE) {
392  barrier();
393  }
394 
395  return 0;
396 }
397 
398 static void xen_smp_cpus_done(unsigned int max_cpus)
399 {
400 }
401 
402 #ifdef CONFIG_HOTPLUG_CPU
403 static int xen_cpu_disable(void)
404 {
405  unsigned int cpu = smp_processor_id();
406  if (cpu == 0)
407  return -EBUSY;
408 
409  cpu_disable_common();
410 
411  load_cr3(swapper_pg_dir);
412  return 0;
413 }
414 
415 static void xen_cpu_die(unsigned int cpu)
416 {
417  while (HYPERVISOR_vcpu_op(VCPUOP_is_up, cpu, NULL)) {
418  current->state = TASK_UNINTERRUPTIBLE;
419  schedule_timeout(HZ/10);
420  }
421  unbind_from_irqhandler(per_cpu(xen_resched_irq, cpu), NULL);
422  unbind_from_irqhandler(per_cpu(xen_callfunc_irq, cpu), NULL);
423  unbind_from_irqhandler(per_cpu(xen_debug_irq, cpu), NULL);
424  unbind_from_irqhandler(per_cpu(xen_callfuncsingle_irq, cpu), NULL);
425  unbind_from_irqhandler(per_cpu(xen_irq_work, cpu), NULL);
426  xen_uninit_lock_cpu(cpu);
427  xen_teardown_timer(cpu);
428 }
429 
430 static void __cpuinit xen_play_dead(void) /* used only with HOTPLUG_CPU */
431 {
432  play_dead_common();
434  cpu_bringup();
435  /*
436  * Balance out the preempt calls - as we are running in cpu_idle
437  * loop which has been called at bootup from cpu_bringup_and_idle.
438  * The cpucpu_bringup_and_idle called cpu_bringup which made a
439  * preempt_disable() So this preempt_enable will balance it out.
440  */
441  preempt_enable();
442 }
443 
444 #else /* !CONFIG_HOTPLUG_CPU */
445 static int xen_cpu_disable(void)
446 {
447  return -ENOSYS;
448 }
449 
450 static void xen_cpu_die(unsigned int cpu)
451 {
452  BUG();
453 }
454 
455 static void xen_play_dead(void)
456 {
457  BUG();
458 }
459 
460 #endif
461 static void stop_self(void *v)
462 {
463  int cpu = smp_processor_id();
464 
465  /* make sure we're not pinning something down */
466  load_cr3(swapper_pg_dir);
467  /* should set up a minimal gdt */
468 
469  set_cpu_online(cpu, false);
470 
472  BUG();
473 }
474 
475 static void xen_stop_other_cpus(int wait)
476 {
477  smp_call_function(stop_self, NULL, wait);
478 }
479 
480 static void xen_smp_send_reschedule(int cpu)
481 {
483 }
484 
485 static void __xen_send_IPI_mask(const struct cpumask *mask,
486  int vector)
487 {
488  unsigned cpu;
489 
490  for_each_cpu_and(cpu, mask, cpu_online_mask)
491  xen_send_IPI_one(cpu, vector);
492 }
493 
494 static void xen_smp_send_call_function_ipi(const struct cpumask *mask)
495 {
496  int cpu;
497 
498  __xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR);
499 
500  /* Make sure other vcpus get a chance to run if they need to. */
501  for_each_cpu(cpu, mask) {
502  if (xen_vcpu_stolen(cpu)) {
504  break;
505  }
506  }
507 }
508 
509 static void xen_smp_send_call_function_single_ipi(int cpu)
510 {
511  __xen_send_IPI_mask(cpumask_of(cpu),
513 }
514 
515 static inline int xen_map_vector(int vector)
516 {
517  int xen_vector;
518 
519  switch (vector) {
520  case RESCHEDULE_VECTOR:
521  xen_vector = XEN_RESCHEDULE_VECTOR;
522  break;
524  xen_vector = XEN_CALL_FUNCTION_VECTOR;
525  break;
527  xen_vector = XEN_CALL_FUNCTION_SINGLE_VECTOR;
528  break;
529  case IRQ_WORK_VECTOR:
530  xen_vector = XEN_IRQ_WORK_VECTOR;
531  break;
532  default:
533  xen_vector = -1;
534  printk(KERN_ERR "xen: vector 0x%x is not implemented\n",
535  vector);
536  }
537 
538  return xen_vector;
539 }
540 
541 void xen_send_IPI_mask(const struct cpumask *mask,
542  int vector)
543 {
544  int xen_vector = xen_map_vector(vector);
545 
546  if (xen_vector >= 0)
547  __xen_send_IPI_mask(mask, xen_vector);
548 }
549 
550 void xen_send_IPI_all(int vector)
551 {
552  int xen_vector = xen_map_vector(vector);
553 
554  if (xen_vector >= 0)
555  __xen_send_IPI_mask(cpu_online_mask, xen_vector);
556 }
557 
558 void xen_send_IPI_self(int vector)
559 {
560  int xen_vector = xen_map_vector(vector);
561 
562  if (xen_vector >= 0)
563  xen_send_IPI_one(smp_processor_id(), xen_vector);
564 }
565 
566 void xen_send_IPI_mask_allbutself(const struct cpumask *mask,
567  int vector)
568 {
569  unsigned cpu;
570  unsigned int this_cpu = smp_processor_id();
571 
572  if (!(num_online_cpus() > 1))
573  return;
574 
575  for_each_cpu_and(cpu, mask, cpu_online_mask) {
576  if (this_cpu == cpu)
577  continue;
578 
579  xen_smp_send_call_function_single_ipi(cpu);
580  }
581 }
582 
583 void xen_send_IPI_allbutself(int vector)
584 {
585  int xen_vector = xen_map_vector(vector);
586 
587  if (xen_vector >= 0)
589 }
590 
591 static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id)
592 {
593  irq_enter();
594  generic_smp_call_function_interrupt();
595  inc_irq_stat(irq_call_count);
596  irq_exit();
597 
598  return IRQ_HANDLED;
599 }
600 
601 static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id)
602 {
603  irq_enter();
604  generic_smp_call_function_single_interrupt();
605  inc_irq_stat(irq_call_count);
606  irq_exit();
607 
608  return IRQ_HANDLED;
609 }
610 
611 static irqreturn_t xen_irq_work_interrupt(int irq, void *dev_id)
612 {
613  irq_enter();
614  irq_work_run();
615  inc_irq_stat(apic_irq_work_irqs);
616  irq_exit();
617 
618  return IRQ_HANDLED;
619 }
620 
621 static const struct smp_ops xen_smp_ops __initconst = {
622  .smp_prepare_boot_cpu = xen_smp_prepare_boot_cpu,
623  .smp_prepare_cpus = xen_smp_prepare_cpus,
624  .smp_cpus_done = xen_smp_cpus_done,
625 
626  .cpu_up = xen_cpu_up,
627  .cpu_die = xen_cpu_die,
628  .cpu_disable = xen_cpu_disable,
629  .play_dead = xen_play_dead,
630 
631  .stop_other_cpus = xen_stop_other_cpus,
632  .smp_send_reschedule = xen_smp_send_reschedule,
633 
634  .send_call_func_ipi = xen_smp_send_call_function_ipi,
635  .send_call_func_single_ipi = xen_smp_send_call_function_single_ipi,
636 };
637 
639 {
640  smp_ops = xen_smp_ops;
641  xen_fill_possible_map();
643 }
644 
645 static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus)
646 {
647  native_smp_prepare_cpus(max_cpus);
649 
651 }
652 
653 static int __cpuinit xen_hvm_cpu_up(unsigned int cpu, struct task_struct *tidle)
654 {
655  int rc;
656  rc = native_cpu_up(cpu, tidle);
657  WARN_ON (xen_smp_intr_init(cpu));
658  return rc;
659 }
660 
661 static void xen_hvm_cpu_die(unsigned int cpu)
662 {
663  unbind_from_irqhandler(per_cpu(xen_resched_irq, cpu), NULL);
664  unbind_from_irqhandler(per_cpu(xen_callfunc_irq, cpu), NULL);
665  unbind_from_irqhandler(per_cpu(xen_debug_irq, cpu), NULL);
666  unbind_from_irqhandler(per_cpu(xen_callfuncsingle_irq, cpu), NULL);
667  unbind_from_irqhandler(per_cpu(xen_irq_work, cpu), NULL);
668  native_cpu_die(cpu);
669 }
670 
672 {
674  return;
675  smp_ops.smp_prepare_cpus = xen_hvm_smp_prepare_cpus;
676  smp_ops.smp_send_reschedule = xen_smp_send_reschedule;
677  smp_ops.cpu_up = xen_hvm_cpu_up;
678  smp_ops.cpu_die = xen_hvm_cpu_die;
679  smp_ops.send_call_func_ipi = xen_smp_send_call_function_ipi;
680  smp_ops.send_call_func_single_ipi = xen_smp_send_call_function_single_ipi;
681 }