Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
svm.c
Go to the documentation of this file.
1 /*
2  * Kernel-based Virtual Machine driver for Linux
3  *
4  * AMD SVM support
5  *
6  * Copyright (C) 2006 Qumranet, Inc.
7  * Copyright 2010 Red Hat, Inc. and/or its affiliates.
8  *
9  * Authors:
10  * Yaniv Kamay <[email protected]>
11  * Avi Kivity <[email protected]>
12  *
13  * This work is licensed under the terms of the GNU GPL, version 2. See
14  * the COPYING file in the top-level directory.
15  *
16  */
17 #include <linux/kvm_host.h>
18 
19 #include "irq.h"
20 #include "mmu.h"
21 #include "kvm_cache_regs.h"
22 #include "x86.h"
23 
24 #include <linux/module.h>
25 #include <linux/mod_devicetable.h>
26 #include <linux/kernel.h>
27 #include <linux/vmalloc.h>
28 #include <linux/highmem.h>
29 #include <linux/sched.h>
30 #include <linux/ftrace_event.h>
31 #include <linux/slab.h>
32 
33 #include <asm/perf_event.h>
34 #include <asm/tlbflush.h>
35 #include <asm/desc.h>
36 #include <asm/kvm_para.h>
37 
38 #include <asm/virtext.h>
39 #include "trace.h"
40 
41 #define __ex(x) __kvm_handle_fault_on_reboot(x)
42 
43 MODULE_AUTHOR("Qumranet");
44 MODULE_LICENSE("GPL");
45 
46 static const struct x86_cpu_id svm_cpu_id[] = {
48  {}
49 };
50 MODULE_DEVICE_TABLE(x86cpu, svm_cpu_id);
51 
52 #define IOPM_ALLOC_ORDER 2
53 #define MSRPM_ALLOC_ORDER 1
54 
55 #define SEG_TYPE_LDT 2
56 #define SEG_TYPE_BUSY_TSS16 3
57 
58 #define SVM_FEATURE_NPT (1 << 0)
59 #define SVM_FEATURE_LBRV (1 << 1)
60 #define SVM_FEATURE_SVML (1 << 2)
61 #define SVM_FEATURE_NRIP (1 << 3)
62 #define SVM_FEATURE_TSC_RATE (1 << 4)
63 #define SVM_FEATURE_VMCB_CLEAN (1 << 5)
64 #define SVM_FEATURE_FLUSH_ASID (1 << 6)
65 #define SVM_FEATURE_DECODE_ASSIST (1 << 7)
66 #define SVM_FEATURE_PAUSE_FILTER (1 << 10)
67 
68 #define NESTED_EXIT_HOST 0 /* Exit handled on host level */
69 #define NESTED_EXIT_DONE 1 /* Exit caused nested vmexit */
70 #define NESTED_EXIT_CONTINUE 2 /* Further checks needed */
71 
72 #define DEBUGCTL_RESERVED_BITS (~(0x3fULL))
73 
74 #define TSC_RATIO_RSVD 0xffffff0000000000ULL
75 #define TSC_RATIO_MIN 0x0000000000000001ULL
76 #define TSC_RATIO_MAX 0x000000ffffffffffULL
77 
78 static bool erratum_383_found __read_mostly;
79 
80 static const u32 host_save_user_msrs[] = {
81 #ifdef CONFIG_X86_64
84 #endif
86 };
87 
88 #define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs)
89 
90 struct kvm_vcpu;
91 
92 struct nested_state {
93  struct vmcb *hsave;
97 
98  /* These are the merged vectors */
100 
101  /* gpa pointers to the real vectors */
104 
105  /* A VMEXIT is required but not yet emulated */
107 
108  /* cache for intercepts of the guest */
113 
114  /* Nested Paging related state */
116 };
117 
118 #define MSRPM_OFFSETS 16
119 static u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly;
120 
121 /*
122  * Set osvw_len to higher value when updated Revision Guides
123  * are published and we know what the new status bits are
124  */
125 static uint64_t osvw_len = 4, osvw_status;
126 
127 struct vcpu_svm {
128  struct kvm_vcpu vcpu;
129  struct vmcb *vmcb;
130  unsigned long vmcb_pa;
135 
137 
139  struct {
144  } host;
145 
147 
149 
151 
153 
154  unsigned int3_injected;
155  unsigned long int3_rip;
157 
159 };
160 
161 static DEFINE_PER_CPU(u64, current_tsc_ratio);
162 #define TSC_RATIO_DEFAULT 0x0100000000ULL
163 
164 #define MSR_INVALID 0xffffffffU
165 
166 static const struct svm_direct_access_msrs {
167  u32 index; /* Index of the MSR */
168  bool always; /* True if intercept is always on */
169 } direct_access_msrs[] = {
170  { .index = MSR_STAR, .always = true },
171  { .index = MSR_IA32_SYSENTER_CS, .always = true },
172 #ifdef CONFIG_X86_64
173  { .index = MSR_GS_BASE, .always = true },
174  { .index = MSR_FS_BASE, .always = true },
175  { .index = MSR_KERNEL_GS_BASE, .always = true },
176  { .index = MSR_LSTAR, .always = true },
177  { .index = MSR_CSTAR, .always = true },
178  { .index = MSR_SYSCALL_MASK, .always = true },
179 #endif
180  { .index = MSR_IA32_LASTBRANCHFROMIP, .always = false },
181  { .index = MSR_IA32_LASTBRANCHTOIP, .always = false },
182  { .index = MSR_IA32_LASTINTFROMIP, .always = false },
183  { .index = MSR_IA32_LASTINTTOIP, .always = false },
184  { .index = MSR_INVALID, .always = false },
185 };
186 
187 /* enable NPT for AMD64 and X86 with PAE */
188 #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
189 static bool npt_enabled = true;
190 #else
191 static bool npt_enabled;
192 #endif
193 
194 /* allow nested paging (virtualized MMU) for all guests */
195 static int npt = true;
196 module_param(npt, int, S_IRUGO);
197 
198 /* allow nested virtualization in KVM/SVM */
199 static int nested = true;
200 module_param(nested, int, S_IRUGO);
201 
202 static void svm_flush_tlb(struct kvm_vcpu *vcpu);
203 static void svm_complete_interrupts(struct vcpu_svm *svm);
204 
205 static int nested_svm_exit_handled(struct vcpu_svm *svm);
206 static int nested_svm_intercept(struct vcpu_svm *svm);
207 static int nested_svm_vmexit(struct vcpu_svm *svm);
208 static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
209  bool has_error_code, u32 error_code);
210 static u64 __scale_tsc(u64 ratio, u64 tsc);
211 
212 enum {
213  VMCB_INTERCEPTS, /* Intercept vectors, TSC offset,
214  pause filter count */
215  VMCB_PERM_MAP, /* IOPM Base and MSRPM Base */
216  VMCB_ASID, /* ASID */
217  VMCB_INTR, /* int_ctl, int_vector */
218  VMCB_NPT, /* npt_en, nCR3, gPAT */
219  VMCB_CR, /* CR0, CR3, CR4, EFER */
220  VMCB_DR, /* DR6, DR7 */
221  VMCB_DT, /* GDT, IDT */
222  VMCB_SEG, /* CS, DS, SS, ES, CPL */
223  VMCB_CR2, /* CR2 only */
224  VMCB_LBR, /* DBGCTL, BR_FROM, BR_TO, LAST_EX_FROM, LAST_EX_TO */
226 };
227 
228 /* TPR and CR2 are always written before VMRUN */
229 #define VMCB_ALWAYS_DIRTY_MASK ((1U << VMCB_INTR) | (1U << VMCB_CR2))
230 
231 static inline void mark_all_dirty(struct vmcb *vmcb)
232 {
233  vmcb->control.clean = 0;
234 }
235 
236 static inline void mark_all_clean(struct vmcb *vmcb)
237 {
238  vmcb->control.clean = ((1 << VMCB_DIRTY_MAX) - 1)
240 }
241 
242 static inline void mark_dirty(struct vmcb *vmcb, int bit)
243 {
244  vmcb->control.clean &= ~(1 << bit);
245 }
246 
247 static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
248 {
249  return container_of(vcpu, struct vcpu_svm, vcpu);
250 }
251 
252 static void recalc_intercepts(struct vcpu_svm *svm)
253 {
254  struct vmcb_control_area *c, *h;
255  struct nested_state *g;
256 
257  mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
258 
259  if (!is_guest_mode(&svm->vcpu))
260  return;
261 
262  c = &svm->vmcb->control;
263  h = &svm->nested.hsave->control;
264  g = &svm->nested;
265 
266  c->intercept_cr = h->intercept_cr | g->intercept_cr;
267  c->intercept_dr = h->intercept_dr | g->intercept_dr;
268  c->intercept_exceptions = h->intercept_exceptions | g->intercept_exceptions;
269  c->intercept = h->intercept | g->intercept;
270 }
271 
272 static inline struct vmcb *get_host_vmcb(struct vcpu_svm *svm)
273 {
274  if (is_guest_mode(&svm->vcpu))
275  return svm->nested.hsave;
276  else
277  return svm->vmcb;
278 }
279 
280 static inline void set_cr_intercept(struct vcpu_svm *svm, int bit)
281 {
282  struct vmcb *vmcb = get_host_vmcb(svm);
283 
284  vmcb->control.intercept_cr |= (1U << bit);
285 
286  recalc_intercepts(svm);
287 }
288 
289 static inline void clr_cr_intercept(struct vcpu_svm *svm, int bit)
290 {
291  struct vmcb *vmcb = get_host_vmcb(svm);
292 
293  vmcb->control.intercept_cr &= ~(1U << bit);
294 
295  recalc_intercepts(svm);
296 }
297 
298 static inline bool is_cr_intercept(struct vcpu_svm *svm, int bit)
299 {
300  struct vmcb *vmcb = get_host_vmcb(svm);
301 
302  return vmcb->control.intercept_cr & (1U << bit);
303 }
304 
305 static inline void set_dr_intercept(struct vcpu_svm *svm, int bit)
306 {
307  struct vmcb *vmcb = get_host_vmcb(svm);
308 
309  vmcb->control.intercept_dr |= (1U << bit);
310 
311  recalc_intercepts(svm);
312 }
313 
314 static inline void clr_dr_intercept(struct vcpu_svm *svm, int bit)
315 {
316  struct vmcb *vmcb = get_host_vmcb(svm);
317 
318  vmcb->control.intercept_dr &= ~(1U << bit);
319 
320  recalc_intercepts(svm);
321 }
322 
323 static inline void set_exception_intercept(struct vcpu_svm *svm, int bit)
324 {
325  struct vmcb *vmcb = get_host_vmcb(svm);
326 
327  vmcb->control.intercept_exceptions |= (1U << bit);
328 
329  recalc_intercepts(svm);
330 }
331 
332 static inline void clr_exception_intercept(struct vcpu_svm *svm, int bit)
333 {
334  struct vmcb *vmcb = get_host_vmcb(svm);
335 
336  vmcb->control.intercept_exceptions &= ~(1U << bit);
337 
338  recalc_intercepts(svm);
339 }
340 
341 static inline void set_intercept(struct vcpu_svm *svm, int bit)
342 {
343  struct vmcb *vmcb = get_host_vmcb(svm);
344 
345  vmcb->control.intercept |= (1ULL << bit);
346 
347  recalc_intercepts(svm);
348 }
349 
350 static inline void clr_intercept(struct vcpu_svm *svm, int bit)
351 {
352  struct vmcb *vmcb = get_host_vmcb(svm);
353 
354  vmcb->control.intercept &= ~(1ULL << bit);
355 
356  recalc_intercepts(svm);
357 }
358 
359 static inline void enable_gif(struct vcpu_svm *svm)
360 {
361  svm->vcpu.arch.hflags |= HF_GIF_MASK;
362 }
363 
364 static inline void disable_gif(struct vcpu_svm *svm)
365 {
366  svm->vcpu.arch.hflags &= ~HF_GIF_MASK;
367 }
368 
369 static inline bool gif_set(struct vcpu_svm *svm)
370 {
371  return !!(svm->vcpu.arch.hflags & HF_GIF_MASK);
372 }
373 
374 static unsigned long iopm_base;
375 
379  unsigned base1:8, type:5, dpl:2, p:1;
380  unsigned limit1:4, zero0:3, g:1, base2:8;
383 } __attribute__((packed));
385 struct svm_cpu_data {
386  int cpu;
392 
393  struct page *save_area;
394 };
395 
396 static DEFINE_PER_CPU(struct svm_cpu_data *, svm_data);
397 
399  int cpu;
400  int r;
401 };
402 
403 static const u32 msrpm_ranges[] = {0, 0xc0000000, 0xc0010000};
404 
405 #define NUM_MSR_MAPS ARRAY_SIZE(msrpm_ranges)
406 #define MSRS_RANGE_SIZE 2048
407 #define MSRS_IN_RANGE (MSRS_RANGE_SIZE * 8 / 2)
408 
409 static u32 svm_msrpm_offset(u32 msr)
410 {
411  u32 offset;
412  int i;
413 
414  for (i = 0; i < NUM_MSR_MAPS; i++) {
415  if (msr < msrpm_ranges[i] ||
416  msr >= msrpm_ranges[i] + MSRS_IN_RANGE)
417  continue;
418 
419  offset = (msr - msrpm_ranges[i]) / 4; /* 4 msrs per u8 */
420  offset += (i * MSRS_RANGE_SIZE); /* add range offset */
421 
422  /* Now we have the u8 offset - but need the u32 offset */
423  return offset / 4;
424  }
425 
426  /* MSR not in any range */
427  return MSR_INVALID;
428 }
429 
430 #define MAX_INST_SIZE 15
431 
432 static inline void clgi(void)
433 {
434  asm volatile (__ex(SVM_CLGI));
435 }
436 
437 static inline void stgi(void)
438 {
439  asm volatile (__ex(SVM_STGI));
440 }
441 
442 static inline void invlpga(unsigned long addr, u32 asid)
443 {
444  asm volatile (__ex(SVM_INVLPGA) : : "a"(addr), "c"(asid));
445 }
446 
447 static int get_npt_level(void)
448 {
449 #ifdef CONFIG_X86_64
450  return PT64_ROOT_LEVEL;
451 #else
452  return PT32E_ROOT_LEVEL;
453 #endif
454 }
455 
456 static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
457 {
458  vcpu->arch.efer = efer;
459  if (!npt_enabled && !(efer & EFER_LMA))
460  efer &= ~EFER_LME;
461 
462  to_svm(vcpu)->vmcb->save.efer = efer | EFER_SVME;
463  mark_dirty(to_svm(vcpu)->vmcb, VMCB_CR);
464 }
465 
466 static int is_external_interrupt(u32 info)
467 {
468  info &= SVM_EVTINJ_TYPE_MASK | SVM_EVTINJ_VALID;
469  return info == (SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR);
470 }
471 
472 static u32 svm_get_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
473 {
474  struct vcpu_svm *svm = to_svm(vcpu);
475  u32 ret = 0;
476 
477  if (svm->vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK)
479  return ret & mask;
480 }
481 
482 static void svm_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
483 {
484  struct vcpu_svm *svm = to_svm(vcpu);
485 
486  if (mask == 0)
487  svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK;
488  else
489  svm->vmcb->control.int_state |= SVM_INTERRUPT_SHADOW_MASK;
490 
491 }
492 
493 static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
494 {
495  struct vcpu_svm *svm = to_svm(vcpu);
496 
497  if (svm->vmcb->control.next_rip != 0)
498  svm->next_rip = svm->vmcb->control.next_rip;
499 
500  if (!svm->next_rip) {
501  if (emulate_instruction(vcpu, EMULTYPE_SKIP) !=
502  EMULATE_DONE)
503  printk(KERN_DEBUG "%s: NOP\n", __func__);
504  return;
505  }
506  if (svm->next_rip - kvm_rip_read(vcpu) > MAX_INST_SIZE)
507  printk(KERN_ERR "%s: ip 0x%lx next 0x%llx\n",
508  __func__, kvm_rip_read(vcpu), svm->next_rip);
509 
510  kvm_rip_write(vcpu, svm->next_rip);
511  svm_set_interrupt_shadow(vcpu, 0);
512 }
513 
514 static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
515  bool has_error_code, u32 error_code,
516  bool reinject)
517 {
518  struct vcpu_svm *svm = to_svm(vcpu);
519 
520  /*
521  * If we are within a nested VM we'd better #VMEXIT and let the guest
522  * handle the exception
523  */
524  if (!reinject &&
525  nested_svm_check_exception(svm, nr, has_error_code, error_code))
526  return;
527 
528  if (nr == BP_VECTOR && !static_cpu_has(X86_FEATURE_NRIPS)) {
529  unsigned long rip, old_rip = kvm_rip_read(&svm->vcpu);
530 
531  /*
532  * For guest debugging where we have to reinject #BP if some
533  * INT3 is guest-owned:
534  * Emulate nRIP by moving RIP forward. Will fail if injection
535  * raises a fault that is not intercepted. Still better than
536  * failing in all cases.
537  */
538  skip_emulated_instruction(&svm->vcpu);
539  rip = kvm_rip_read(&svm->vcpu);
540  svm->int3_rip = rip + svm->vmcb->save.cs.base;
541  svm->int3_injected = rip - old_rip;
542  }
543 
544  svm->vmcb->control.event_inj = nr
545  | SVM_EVTINJ_VALID
546  | (has_error_code ? SVM_EVTINJ_VALID_ERR : 0)
547  | SVM_EVTINJ_TYPE_EXEPT;
548  svm->vmcb->control.event_inj_err = error_code;
549 }
550 
551 static void svm_init_erratum_383(void)
552 {
553  u32 low, high;
554  int err;
555  u64 val;
556 
558  return;
559 
560  /* Use _safe variants to not break nested virtualization */
561  val = native_read_msr_safe(MSR_AMD64_DC_CFG, &err);
562  if (err)
563  return;
564 
565  val |= (1ULL << 47);
566 
567  low = lower_32_bits(val);
568  high = upper_32_bits(val);
569 
570  native_write_msr_safe(MSR_AMD64_DC_CFG, low, high);
571 
572  erratum_383_found = true;
573 }
574 
575 static void svm_init_osvw(struct kvm_vcpu *vcpu)
576 {
577  /*
578  * Guests should see errata 400 and 415 as fixed (assuming that
579  * HLT and IO instructions are intercepted).
580  */
581  vcpu->arch.osvw.length = (osvw_len >= 3) ? (osvw_len) : 3;
582  vcpu->arch.osvw.status = osvw_status & ~(6ULL);
583 
584  /*
585  * By increasing VCPU's osvw.length to 3 we are telling the guest that
586  * all osvw.status bits inside that length, including bit 0 (which is
587  * reserved for erratum 298), are valid. However, if host processor's
588  * osvw_len is 0 then osvw_status[0] carries no information. We need to
589  * be conservative here and therefore we tell the guest that erratum 298
590  * is present (because we really don't know).
591  */
592  if (osvw_len == 0 && boot_cpu_data.x86 == 0x10)
593  vcpu->arch.osvw.status |= 1;
594 }
595 
596 static int has_svm(void)
597 {
598  const char *msg;
599 
600  if (!cpu_has_svm(&msg)) {
601  printk(KERN_INFO "has_svm: %s\n", msg);
602  return 0;
603  }
604 
605  return 1;
606 }
607 
608 static void svm_hardware_disable(void *garbage)
609 {
610  /* Make sure we clean up behind us */
611  if (static_cpu_has(X86_FEATURE_TSCRATEMSR))
613 
614  cpu_svm_disable();
615 
617 }
618 
619 static int svm_hardware_enable(void *garbage)
620 {
621 
622  struct svm_cpu_data *sd;
623  uint64_t efer;
624  struct desc_ptr gdt_descr;
625  struct desc_struct *gdt;
626  int me = raw_smp_processor_id();
627 
628  rdmsrl(MSR_EFER, efer);
629  if (efer & EFER_SVME)
630  return -EBUSY;
631 
632  if (!has_svm()) {
633  printk(KERN_ERR "svm_hardware_enable: err EOPNOTSUPP on %d\n",
634  me);
635  return -EINVAL;
636  }
637  sd = per_cpu(svm_data, me);
638 
639  if (!sd) {
640  printk(KERN_ERR "svm_hardware_enable: svm_data is NULL on %d\n",
641  me);
642  return -EINVAL;
643  }
644 
645  sd->asid_generation = 1;
646  sd->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1;
647  sd->next_asid = sd->max_asid + 1;
648 
649  native_store_gdt(&gdt_descr);
650  gdt = (struct desc_struct *)gdt_descr.address;
651  sd->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS);
652 
653  wrmsrl(MSR_EFER, efer | EFER_SVME);
654 
656 
657  if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) {
659  __get_cpu_var(current_tsc_ratio) = TSC_RATIO_DEFAULT;
660  }
661 
662 
663  /*
664  * Get OSVW bits.
665  *
666  * Note that it is possible to have a system with mixed processor
667  * revisions and therefore different OSVW bits. If bits are not the same
668  * on different processors then choose the worst case (i.e. if erratum
669  * is present on one processor and not on another then assume that the
670  * erratum is present everywhere).
671  */
672  if (cpu_has(&boot_cpu_data, X86_FEATURE_OSVW)) {
673  uint64_t len, status = 0;
674  int err;
675 
676  len = native_read_msr_safe(MSR_AMD64_OSVW_ID_LENGTH, &err);
677  if (!err)
678  status = native_read_msr_safe(MSR_AMD64_OSVW_STATUS,
679  &err);
680 
681  if (err)
682  osvw_status = osvw_len = 0;
683  else {
684  if (len < osvw_len)
685  osvw_len = len;
686  osvw_status |= status;
687  osvw_status &= (1ULL << osvw_len) - 1;
688  }
689  } else
690  osvw_status = osvw_len = 0;
691 
692  svm_init_erratum_383();
693 
695 
696  return 0;
697 }
698 
699 static void svm_cpu_uninit(int cpu)
700 {
701  struct svm_cpu_data *sd = per_cpu(svm_data, raw_smp_processor_id());
702 
703  if (!sd)
704  return;
705 
706  per_cpu(svm_data, raw_smp_processor_id()) = NULL;
707  __free_page(sd->save_area);
708  kfree(sd);
709 }
710 
711 static int svm_cpu_init(int cpu)
712 {
713  struct svm_cpu_data *sd;
714  int r;
715 
716  sd = kzalloc(sizeof(struct svm_cpu_data), GFP_KERNEL);
717  if (!sd)
718  return -ENOMEM;
719  sd->cpu = cpu;
721  r = -ENOMEM;
722  if (!sd->save_area)
723  goto err_1;
724 
725  per_cpu(svm_data, cpu) = sd;
726 
727  return 0;
728 
729 err_1:
730  kfree(sd);
731  return r;
732 
733 }
734 
735 static bool valid_msr_intercept(u32 index)
736 {
737  int i;
738 
739  for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++)
740  if (direct_access_msrs[i].index == index)
741  return true;
742 
743  return false;
744 }
745 
746 static void set_msr_interception(u32 *msrpm, unsigned msr,
747  int read, int write)
748 {
749  u8 bit_read, bit_write;
750  unsigned long tmp;
751  u32 offset;
752 
753  /*
754  * If this warning triggers extend the direct_access_msrs list at the
755  * beginning of the file
756  */
757  WARN_ON(!valid_msr_intercept(msr));
758 
759  offset = svm_msrpm_offset(msr);
760  bit_read = 2 * (msr & 0x0f);
761  bit_write = 2 * (msr & 0x0f) + 1;
762  tmp = msrpm[offset];
763 
764  BUG_ON(offset == MSR_INVALID);
765 
766  read ? clear_bit(bit_read, &tmp) : set_bit(bit_read, &tmp);
767  write ? clear_bit(bit_write, &tmp) : set_bit(bit_write, &tmp);
768 
769  msrpm[offset] = tmp;
770 }
771 
772 static void svm_vcpu_init_msrpm(u32 *msrpm)
773 {
774  int i;
775 
776  memset(msrpm, 0xff, PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER));
777 
778  for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) {
779  if (!direct_access_msrs[i].always)
780  continue;
781 
782  set_msr_interception(msrpm, direct_access_msrs[i].index, 1, 1);
783  }
784 }
785 
786 static void add_msr_offset(u32 offset)
787 {
788  int i;
789 
790  for (i = 0; i < MSRPM_OFFSETS; ++i) {
791 
792  /* Offset already in list? */
793  if (msrpm_offsets[i] == offset)
794  return;
795 
796  /* Slot used by another offset? */
797  if (msrpm_offsets[i] != MSR_INVALID)
798  continue;
799 
800  /* Add offset to list */
801  msrpm_offsets[i] = offset;
802 
803  return;
804  }
805 
806  /*
807  * If this BUG triggers the msrpm_offsets table has an overflow. Just
808  * increase MSRPM_OFFSETS in this case.
809  */
810  BUG();
811 }
812 
813 static void init_msrpm_offsets(void)
814 {
815  int i;
816 
817  memset(msrpm_offsets, 0xff, sizeof(msrpm_offsets));
818 
819  for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) {
820  u32 offset;
821 
822  offset = svm_msrpm_offset(direct_access_msrs[i].index);
823  BUG_ON(offset == MSR_INVALID);
824 
825  add_msr_offset(offset);
826  }
827 }
828 
829 static void svm_enable_lbrv(struct vcpu_svm *svm)
830 {
831  u32 *msrpm = svm->msrpm;
832 
833  svm->vmcb->control.lbr_ctl = 1;
834  set_msr_interception(msrpm, MSR_IA32_LASTBRANCHFROMIP, 1, 1);
835  set_msr_interception(msrpm, MSR_IA32_LASTBRANCHTOIP, 1, 1);
836  set_msr_interception(msrpm, MSR_IA32_LASTINTFROMIP, 1, 1);
837  set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 1, 1);
838 }
839 
840 static void svm_disable_lbrv(struct vcpu_svm *svm)
841 {
842  u32 *msrpm = svm->msrpm;
843 
844  svm->vmcb->control.lbr_ctl = 0;
845  set_msr_interception(msrpm, MSR_IA32_LASTBRANCHFROMIP, 0, 0);
846  set_msr_interception(msrpm, MSR_IA32_LASTBRANCHTOIP, 0, 0);
847  set_msr_interception(msrpm, MSR_IA32_LASTINTFROMIP, 0, 0);
848  set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 0, 0);
849 }
850 
851 static __init int svm_hardware_setup(void)
852 {
853  int cpu;
854  struct page *iopm_pages;
855  void *iopm_va;
856  int r;
857 
858  iopm_pages = alloc_pages(GFP_KERNEL, IOPM_ALLOC_ORDER);
859 
860  if (!iopm_pages)
861  return -ENOMEM;
862 
863  iopm_va = page_address(iopm_pages);
864  memset(iopm_va, 0xff, PAGE_SIZE * (1 << IOPM_ALLOC_ORDER));
865  iopm_base = page_to_pfn(iopm_pages) << PAGE_SHIFT;
866 
867  init_msrpm_offsets();
868 
869  if (boot_cpu_has(X86_FEATURE_NX))
871 
872  if (boot_cpu_has(X86_FEATURE_FXSR_OPT))
874 
875  if (boot_cpu_has(X86_FEATURE_TSCRATEMSR)) {
876  u64 max;
877 
878  kvm_has_tsc_control = true;
879 
880  /*
881  * Make sure the user can only configure tsc_khz values that
882  * fit into a signed integer.
883  * A min value is not calculated needed because it will always
884  * be 1 on all machines and a value of 0 is used to disable
885  * tsc-scaling for the vcpu.
886  */
887  max = min(0x7fffffffULL, __scale_tsc(tsc_khz, TSC_RATIO_MAX));
888 
890  }
891 
892  if (nested) {
893  printk(KERN_INFO "kvm: Nested Virtualization enabled\n");
894  kvm_enable_efer_bits(EFER_SVME | EFER_LMSLE);
895  }
896 
897  for_each_possible_cpu(cpu) {
898  r = svm_cpu_init(cpu);
899  if (r)
900  goto err;
901  }
902 
903  if (!boot_cpu_has(X86_FEATURE_NPT))
904  npt_enabled = false;
905 
906  if (npt_enabled && !npt) {
907  printk(KERN_INFO "kvm: Nested Paging disabled\n");
908  npt_enabled = false;
909  }
910 
911  if (npt_enabled) {
912  printk(KERN_INFO "kvm: Nested Paging enabled\n");
913  kvm_enable_tdp();
914  } else
915  kvm_disable_tdp();
916 
917  return 0;
918 
919 err:
920  __free_pages(iopm_pages, IOPM_ALLOC_ORDER);
921  iopm_base = 0;
922  return r;
923 }
924 
925 static __exit void svm_hardware_unsetup(void)
926 {
927  int cpu;
928 
930  svm_cpu_uninit(cpu);
931 
933  iopm_base = 0;
934 }
935 
936 static void init_seg(struct vmcb_seg *seg)
937 {
938  seg->selector = 0;
939  seg->attrib = SVM_SELECTOR_P_MASK | SVM_SELECTOR_S_MASK |
940  SVM_SELECTOR_WRITE_MASK; /* Read/Write Data Segment */
941  seg->limit = 0xffff;
942  seg->base = 0;
943 }
944 
945 static void init_sys_seg(struct vmcb_seg *seg, uint32_t type)
946 {
947  seg->selector = 0;
948  seg->attrib = SVM_SELECTOR_P_MASK | type;
949  seg->limit = 0xffff;
950  seg->base = 0;
951 }
952 
953 static u64 __scale_tsc(u64 ratio, u64 tsc)
954 {
955  u64 mult, frac, _tsc;
956 
957  mult = ratio >> 32;
958  frac = ratio & ((1ULL << 32) - 1);
959 
960  _tsc = tsc;
961  _tsc *= mult;
962  _tsc += (tsc >> 32) * frac;
963  _tsc += ((tsc & ((1ULL << 32) - 1)) * frac) >> 32;
964 
965  return _tsc;
966 }
967 
968 static u64 svm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc)
969 {
970  struct vcpu_svm *svm = to_svm(vcpu);
971  u64 _tsc = tsc;
972 
973  if (svm->tsc_ratio != TSC_RATIO_DEFAULT)
974  _tsc = __scale_tsc(svm->tsc_ratio, tsc);
975 
976  return _tsc;
977 }
978 
979 static void svm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale)
980 {
981  struct vcpu_svm *svm = to_svm(vcpu);
982  u64 ratio;
983  u64 khz;
984 
985  /* Guest TSC same frequency as host TSC? */
986  if (!scale) {
988  return;
989  }
990 
991  /* TSC scaling supported? */
992  if (!boot_cpu_has(X86_FEATURE_TSCRATEMSR)) {
993  if (user_tsc_khz > tsc_khz) {
994  vcpu->arch.tsc_catchup = 1;
995  vcpu->arch.tsc_always_catchup = 1;
996  } else
997  WARN(1, "user requested TSC rate below hardware speed\n");
998  return;
999  }
1000 
1001  khz = user_tsc_khz;
1002 
1003  /* TSC scaling required - calculate ratio */
1004  ratio = khz << 32;
1005  do_div(ratio, tsc_khz);
1006 
1007  if (ratio == 0 || ratio & TSC_RATIO_RSVD) {
1008  WARN_ONCE(1, "Invalid TSC ratio - virtual-tsc-khz=%u\n",
1009  user_tsc_khz);
1010  return;
1011  }
1012  svm->tsc_ratio = ratio;
1013 }
1014 
1015 static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
1016 {
1017  struct vcpu_svm *svm = to_svm(vcpu);
1018  u64 g_tsc_offset = 0;
1019 
1020  if (is_guest_mode(vcpu)) {
1021  g_tsc_offset = svm->vmcb->control.tsc_offset -
1022  svm->nested.hsave->control.tsc_offset;
1023  svm->nested.hsave->control.tsc_offset = offset;
1024  }
1025 
1026  svm->vmcb->control.tsc_offset = offset + g_tsc_offset;
1027 
1028  mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
1029 }
1030 
1031 static void svm_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment, bool host)
1032 {
1033  struct vcpu_svm *svm = to_svm(vcpu);
1034 
1035  WARN_ON(adjustment < 0);
1036  if (host)
1037  adjustment = svm_scale_tsc(vcpu, adjustment);
1038 
1039  svm->vmcb->control.tsc_offset += adjustment;
1040  if (is_guest_mode(vcpu))
1041  svm->nested.hsave->control.tsc_offset += adjustment;
1042  mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
1043 }
1044 
1045 static u64 svm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc)
1046 {
1047  u64 tsc;
1048 
1049  tsc = svm_scale_tsc(vcpu, native_read_tsc());
1050 
1051  return target_tsc - tsc;
1052 }
1053 
1054 static void init_vmcb(struct vcpu_svm *svm)
1055 {
1056  struct vmcb_control_area *control = &svm->vmcb->control;
1057  struct vmcb_save_area *save = &svm->vmcb->save;
1058 
1059  svm->vcpu.fpu_active = 1;
1060  svm->vcpu.arch.hflags = 0;
1061 
1062  set_cr_intercept(svm, INTERCEPT_CR0_READ);
1063  set_cr_intercept(svm, INTERCEPT_CR3_READ);
1064  set_cr_intercept(svm, INTERCEPT_CR4_READ);
1065  set_cr_intercept(svm, INTERCEPT_CR0_WRITE);
1066  set_cr_intercept(svm, INTERCEPT_CR3_WRITE);
1067  set_cr_intercept(svm, INTERCEPT_CR4_WRITE);
1068  set_cr_intercept(svm, INTERCEPT_CR8_WRITE);
1069 
1070  set_dr_intercept(svm, INTERCEPT_DR0_READ);
1071  set_dr_intercept(svm, INTERCEPT_DR1_READ);
1072  set_dr_intercept(svm, INTERCEPT_DR2_READ);
1073  set_dr_intercept(svm, INTERCEPT_DR3_READ);
1074  set_dr_intercept(svm, INTERCEPT_DR4_READ);
1075  set_dr_intercept(svm, INTERCEPT_DR5_READ);
1076  set_dr_intercept(svm, INTERCEPT_DR6_READ);
1077  set_dr_intercept(svm, INTERCEPT_DR7_READ);
1078 
1079  set_dr_intercept(svm, INTERCEPT_DR0_WRITE);
1080  set_dr_intercept(svm, INTERCEPT_DR1_WRITE);
1081  set_dr_intercept(svm, INTERCEPT_DR2_WRITE);
1082  set_dr_intercept(svm, INTERCEPT_DR3_WRITE);
1083  set_dr_intercept(svm, INTERCEPT_DR4_WRITE);
1084  set_dr_intercept(svm, INTERCEPT_DR5_WRITE);
1085  set_dr_intercept(svm, INTERCEPT_DR6_WRITE);
1086  set_dr_intercept(svm, INTERCEPT_DR7_WRITE);
1087 
1088  set_exception_intercept(svm, PF_VECTOR);
1089  set_exception_intercept(svm, UD_VECTOR);
1090  set_exception_intercept(svm, MC_VECTOR);
1091 
1092  set_intercept(svm, INTERCEPT_INTR);
1093  set_intercept(svm, INTERCEPT_NMI);
1094  set_intercept(svm, INTERCEPT_SMI);
1095  set_intercept(svm, INTERCEPT_SELECTIVE_CR0);
1096  set_intercept(svm, INTERCEPT_RDPMC);
1097  set_intercept(svm, INTERCEPT_CPUID);
1098  set_intercept(svm, INTERCEPT_INVD);
1099  set_intercept(svm, INTERCEPT_HLT);
1100  set_intercept(svm, INTERCEPT_INVLPG);
1101  set_intercept(svm, INTERCEPT_INVLPGA);
1102  set_intercept(svm, INTERCEPT_IOIO_PROT);
1103  set_intercept(svm, INTERCEPT_MSR_PROT);
1104  set_intercept(svm, INTERCEPT_TASK_SWITCH);
1105  set_intercept(svm, INTERCEPT_SHUTDOWN);
1106  set_intercept(svm, INTERCEPT_VMRUN);
1107  set_intercept(svm, INTERCEPT_VMMCALL);
1108  set_intercept(svm, INTERCEPT_VMLOAD);
1109  set_intercept(svm, INTERCEPT_VMSAVE);
1110  set_intercept(svm, INTERCEPT_STGI);
1111  set_intercept(svm, INTERCEPT_CLGI);
1112  set_intercept(svm, INTERCEPT_SKINIT);
1113  set_intercept(svm, INTERCEPT_WBINVD);
1114  set_intercept(svm, INTERCEPT_MONITOR);
1115  set_intercept(svm, INTERCEPT_MWAIT);
1116  set_intercept(svm, INTERCEPT_XSETBV);
1117 
1118  control->iopm_base_pa = iopm_base;
1119  control->msrpm_base_pa = __pa(svm->msrpm);
1120  control->int_ctl = V_INTR_MASKING_MASK;
1121 
1122  init_seg(&save->es);
1123  init_seg(&save->ss);
1124  init_seg(&save->ds);
1125  init_seg(&save->fs);
1126  init_seg(&save->gs);
1127 
1128  save->cs.selector = 0xf000;
1129  /* Executable/Readable Code Segment */
1130  save->cs.attrib = SVM_SELECTOR_READ_MASK | SVM_SELECTOR_P_MASK |
1131  SVM_SELECTOR_S_MASK | SVM_SELECTOR_CODE_MASK;
1132  save->cs.limit = 0xffff;
1133  /*
1134  * cs.base should really be 0xffff0000, but vmx can't handle that, so
1135  * be consistent with it.
1136  *
1137  * Replace when we have real mode working for vmx.
1138  */
1139  save->cs.base = 0xf0000;
1140 
1141  save->gdtr.limit = 0xffff;
1142  save->idtr.limit = 0xffff;
1143 
1144  init_sys_seg(&save->ldtr, SEG_TYPE_LDT);
1145  init_sys_seg(&save->tr, SEG_TYPE_BUSY_TSS16);
1146 
1147  svm_set_efer(&svm->vcpu, 0);
1148  save->dr6 = 0xffff0ff0;
1149  kvm_set_rflags(&svm->vcpu, 2);
1150  save->rip = 0x0000fff0;
1151  svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip;
1152 
1153  /*
1154  * This is the guest-visible cr0 value.
1155  * svm_set_cr0() sets PG and WP and clears NW and CD on save->cr0.
1156  */
1157  svm->vcpu.arch.cr0 = 0;
1159 
1160  save->cr4 = X86_CR4_PAE;
1161  /* rdx = ?? */
1162 
1163  if (npt_enabled) {
1164  /* Setup VMCB for Nested Paging */
1165  control->nested_ctl = 1;
1166  clr_intercept(svm, INTERCEPT_INVLPG);
1167  clr_exception_intercept(svm, PF_VECTOR);
1168  clr_cr_intercept(svm, INTERCEPT_CR3_READ);
1169  clr_cr_intercept(svm, INTERCEPT_CR3_WRITE);
1170  save->g_pat = 0x0007040600070406ULL;
1171  save->cr3 = 0;
1172  save->cr4 = 0;
1173  }
1174  svm->asid_generation = 0;
1175 
1176  svm->nested.vmcb = 0;
1177  svm->vcpu.arch.hflags = 0;
1178 
1179  if (boot_cpu_has(X86_FEATURE_PAUSEFILTER)) {
1180  control->pause_filter_count = 3000;
1181  set_intercept(svm, INTERCEPT_PAUSE);
1182  }
1183 
1184  mark_all_dirty(svm->vmcb);
1185 
1186  enable_gif(svm);
1187 }
1188 
1189 static int svm_vcpu_reset(struct kvm_vcpu *vcpu)
1190 {
1191  struct vcpu_svm *svm = to_svm(vcpu);
1192 
1193  init_vmcb(svm);
1194 
1195  if (!kvm_vcpu_is_bsp(vcpu)) {
1196  kvm_rip_write(vcpu, 0);
1197  svm->vmcb->save.cs.base = svm->vcpu.arch.sipi_vector << 12;
1198  svm->vmcb->save.cs.selector = svm->vcpu.arch.sipi_vector << 8;
1199  }
1200  vcpu->arch.regs_avail = ~0;
1201  vcpu->arch.regs_dirty = ~0;
1202 
1203  return 0;
1204 }
1205 
1206 static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
1207 {
1208  struct vcpu_svm *svm;
1209  struct page *page;
1210  struct page *msrpm_pages;
1211  struct page *hsave_page;
1212  struct page *nested_msrpm_pages;
1213  int err;
1214 
1215  svm = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
1216  if (!svm) {
1217  err = -ENOMEM;
1218  goto out;
1219  }
1220 
1222 
1223  err = kvm_vcpu_init(&svm->vcpu, kvm, id);
1224  if (err)
1225  goto free_svm;
1226 
1227  err = -ENOMEM;
1228  page = alloc_page(GFP_KERNEL);
1229  if (!page)
1230  goto uninit;
1231 
1232  msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER);
1233  if (!msrpm_pages)
1234  goto free_page1;
1235 
1236  nested_msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER);
1237  if (!nested_msrpm_pages)
1238  goto free_page2;
1239 
1240  hsave_page = alloc_page(GFP_KERNEL);
1241  if (!hsave_page)
1242  goto free_page3;
1243 
1244  svm->nested.hsave = page_address(hsave_page);
1245 
1246  svm->msrpm = page_address(msrpm_pages);
1247  svm_vcpu_init_msrpm(svm->msrpm);
1248 
1249  svm->nested.msrpm = page_address(nested_msrpm_pages);
1250  svm_vcpu_init_msrpm(svm->nested.msrpm);
1251 
1252  svm->vmcb = page_address(page);
1253  clear_page(svm->vmcb);
1254  svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT;
1255  svm->asid_generation = 0;
1256  init_vmcb(svm);
1257  kvm_write_tsc(&svm->vcpu, 0);
1258 
1259  err = fx_init(&svm->vcpu);
1260  if (err)
1261  goto free_page4;
1262 
1263  svm->vcpu.arch.apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
1264  if (kvm_vcpu_is_bsp(&svm->vcpu))
1265  svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP;
1266 
1267  svm_init_osvw(&svm->vcpu);
1268 
1269  return &svm->vcpu;
1270 
1271 free_page4:
1272  __free_page(hsave_page);
1273 free_page3:
1274  __free_pages(nested_msrpm_pages, MSRPM_ALLOC_ORDER);
1275 free_page2:
1276  __free_pages(msrpm_pages, MSRPM_ALLOC_ORDER);
1277 free_page1:
1278  __free_page(page);
1279 uninit:
1280  kvm_vcpu_uninit(&svm->vcpu);
1281 free_svm:
1283 out:
1284  return ERR_PTR(err);
1285 }
1286 
1287 static void svm_free_vcpu(struct kvm_vcpu *vcpu)
1288 {
1289  struct vcpu_svm *svm = to_svm(vcpu);
1290 
1293  __free_page(virt_to_page(svm->nested.hsave));
1295  kvm_vcpu_uninit(vcpu);
1297 }
1298 
1299 static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1300 {
1301  struct vcpu_svm *svm = to_svm(vcpu);
1302  int i;
1303 
1304  if (unlikely(cpu != vcpu->cpu)) {
1305  svm->asid_generation = 0;
1306  mark_all_dirty(svm->vmcb);
1307  }
1308 
1309 #ifdef CONFIG_X86_64
1310  rdmsrl(MSR_GS_BASE, to_svm(vcpu)->host.gs_base);
1311 #endif
1312  savesegment(fs, svm->host.fs);
1313  savesegment(gs, svm->host.gs);
1314  svm->host.ldt = kvm_read_ldt();
1315 
1316  for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
1317  rdmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]);
1318 
1319  if (static_cpu_has(X86_FEATURE_TSCRATEMSR) &&
1320  svm->tsc_ratio != __get_cpu_var(current_tsc_ratio)) {
1321  __get_cpu_var(current_tsc_ratio) = svm->tsc_ratio;
1322  wrmsrl(MSR_AMD64_TSC_RATIO, svm->tsc_ratio);
1323  }
1324 }
1325 
1326 static void svm_vcpu_put(struct kvm_vcpu *vcpu)
1327 {
1328  struct vcpu_svm *svm = to_svm(vcpu);
1329  int i;
1330 
1331  ++vcpu->stat.host_state_reload;
1332  kvm_load_ldt(svm->host.ldt);
1333 #ifdef CONFIG_X86_64
1334  loadsegment(fs, svm->host.fs);
1335  wrmsrl(MSR_KERNEL_GS_BASE, current->thread.gs);
1336  load_gs_index(svm->host.gs);
1337 #else
1338 #ifdef CONFIG_X86_32_LAZY_GS
1339  loadsegment(gs, svm->host.gs);
1340 #endif
1341 #endif
1342  for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
1343  wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]);
1344 }
1345 
1346 static void svm_update_cpl(struct kvm_vcpu *vcpu)
1347 {
1348  struct vcpu_svm *svm = to_svm(vcpu);
1349  int cpl;
1350 
1351  if (!is_protmode(vcpu))
1352  cpl = 0;
1353  else if (svm->vmcb->save.rflags & X86_EFLAGS_VM)
1354  cpl = 3;
1355  else
1356  cpl = svm->vmcb->save.cs.selector & 0x3;
1357 
1358  svm->vmcb->save.cpl = cpl;
1359 }
1360 
1361 static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu)
1362 {
1363  return to_svm(vcpu)->vmcb->save.rflags;
1364 }
1365 
1366 static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
1367 {
1368  unsigned long old_rflags = to_svm(vcpu)->vmcb->save.rflags;
1369 
1370  to_svm(vcpu)->vmcb->save.rflags = rflags;
1371  if ((old_rflags ^ rflags) & X86_EFLAGS_VM)
1372  svm_update_cpl(vcpu);
1373 }
1374 
1375 static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
1376 {
1377  switch (reg) {
1378  case VCPU_EXREG_PDPTR:
1379  BUG_ON(!npt_enabled);
1380  load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu));
1381  break;
1382  default:
1383  BUG();
1384  }
1385 }
1386 
1387 static void svm_set_vintr(struct vcpu_svm *svm)
1388 {
1389  set_intercept(svm, INTERCEPT_VINTR);
1390 }
1391 
1392 static void svm_clear_vintr(struct vcpu_svm *svm)
1393 {
1394  clr_intercept(svm, INTERCEPT_VINTR);
1395 }
1396 
1397 static struct vmcb_seg *svm_seg(struct kvm_vcpu *vcpu, int seg)
1398 {
1399  struct vmcb_save_area *save = &to_svm(vcpu)->vmcb->save;
1400 
1401  switch (seg) {
1402  case VCPU_SREG_CS: return &save->cs;
1403  case VCPU_SREG_DS: return &save->ds;
1404  case VCPU_SREG_ES: return &save->es;
1405  case VCPU_SREG_FS: return &save->fs;
1406  case VCPU_SREG_GS: return &save->gs;
1407  case VCPU_SREG_SS: return &save->ss;
1408  case VCPU_SREG_TR: return &save->tr;
1409  case VCPU_SREG_LDTR: return &save->ldtr;
1410  }
1411  BUG();
1412  return NULL;
1413 }
1414 
1415 static u64 svm_get_segment_base(struct kvm_vcpu *vcpu, int seg)
1416 {
1417  struct vmcb_seg *s = svm_seg(vcpu, seg);
1418 
1419  return s->base;
1420 }
1421 
1422 static void svm_get_segment(struct kvm_vcpu *vcpu,
1423  struct kvm_segment *var, int seg)
1424 {
1425  struct vmcb_seg *s = svm_seg(vcpu, seg);
1426 
1427  var->base = s->base;
1428  var->limit = s->limit;
1429  var->selector = s->selector;
1430  var->type = s->attrib & SVM_SELECTOR_TYPE_MASK;
1431  var->s = (s->attrib >> SVM_SELECTOR_S_SHIFT) & 1;
1432  var->dpl = (s->attrib >> SVM_SELECTOR_DPL_SHIFT) & 3;
1433  var->present = (s->attrib >> SVM_SELECTOR_P_SHIFT) & 1;
1434  var->avl = (s->attrib >> SVM_SELECTOR_AVL_SHIFT) & 1;
1435  var->l = (s->attrib >> SVM_SELECTOR_L_SHIFT) & 1;
1436  var->db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1;
1437  var->g = (s->attrib >> SVM_SELECTOR_G_SHIFT) & 1;
1438 
1439  /*
1440  * AMD's VMCB does not have an explicit unusable field, so emulate it
1441  * for cross vendor migration purposes by "not present"
1442  */
1443  var->unusable = !var->present || (var->type == 0);
1444 
1445  switch (seg) {
1446  case VCPU_SREG_CS:
1447  /*
1448  * SVM always stores 0 for the 'G' bit in the CS selector in
1449  * the VMCB on a VMEXIT. This hurts cross-vendor migration:
1450  * Intel's VMENTRY has a check on the 'G' bit.
1451  */
1452  var->g = s->limit > 0xfffff;
1453  break;
1454  case VCPU_SREG_TR:
1455  /*
1456  * Work around a bug where the busy flag in the tr selector
1457  * isn't exposed
1458  */
1459  var->type |= 0x2;
1460  break;
1461  case VCPU_SREG_DS:
1462  case VCPU_SREG_ES:
1463  case VCPU_SREG_FS:
1464  case VCPU_SREG_GS:
1465  /*
1466  * The accessed bit must always be set in the segment
1467  * descriptor cache, although it can be cleared in the
1468  * descriptor, the cached bit always remains at 1. Since
1469  * Intel has a check on this, set it here to support
1470  * cross-vendor migration.
1471  */
1472  if (!var->unusable)
1473  var->type |= 0x1;
1474  break;
1475  case VCPU_SREG_SS:
1476  /*
1477  * On AMD CPUs sometimes the DB bit in the segment
1478  * descriptor is left as 1, although the whole segment has
1479  * been made unusable. Clear it here to pass an Intel VMX
1480  * entry check when cross vendor migrating.
1481  */
1482  if (var->unusable)
1483  var->db = 0;
1484  break;
1485  }
1486 }
1487 
1488 static int svm_get_cpl(struct kvm_vcpu *vcpu)
1489 {
1490  struct vmcb_save_area *save = &to_svm(vcpu)->vmcb->save;
1491 
1492  return save->cpl;
1493 }
1494 
1495 static void svm_get_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
1496 {
1497  struct vcpu_svm *svm = to_svm(vcpu);
1498 
1499  dt->size = svm->vmcb->save.idtr.limit;
1500  dt->address = svm->vmcb->save.idtr.base;
1501 }
1502 
1503 static void svm_set_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
1504 {
1505  struct vcpu_svm *svm = to_svm(vcpu);
1506 
1507  svm->vmcb->save.idtr.limit = dt->size;
1508  svm->vmcb->save.idtr.base = dt->address ;
1509  mark_dirty(svm->vmcb, VMCB_DT);
1510 }
1511 
1512 static void svm_get_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
1513 {
1514  struct vcpu_svm *svm = to_svm(vcpu);
1515 
1516  dt->size = svm->vmcb->save.gdtr.limit;
1517  dt->address = svm->vmcb->save.gdtr.base;
1518 }
1519 
1520 static void svm_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
1521 {
1522  struct vcpu_svm *svm = to_svm(vcpu);
1523 
1524  svm->vmcb->save.gdtr.limit = dt->size;
1525  svm->vmcb->save.gdtr.base = dt->address ;
1526  mark_dirty(svm->vmcb, VMCB_DT);
1527 }
1528 
1529 static void svm_decache_cr0_guest_bits(struct kvm_vcpu *vcpu)
1530 {
1531 }
1532 
1533 static void svm_decache_cr3(struct kvm_vcpu *vcpu)
1534 {
1535 }
1536 
1537 static void svm_decache_cr4_guest_bits(struct kvm_vcpu *vcpu)
1538 {
1539 }
1540 
1541 static void update_cr0_intercept(struct vcpu_svm *svm)
1542 {
1543  ulong gcr0 = svm->vcpu.arch.cr0;
1544  u64 *hcr0 = &svm->vmcb->save.cr0;
1545 
1546  if (!svm->vcpu.fpu_active)
1547  *hcr0 |= SVM_CR0_SELECTIVE_MASK;
1548  else
1549  *hcr0 = (*hcr0 & ~SVM_CR0_SELECTIVE_MASK)
1550  | (gcr0 & SVM_CR0_SELECTIVE_MASK);
1551 
1552  mark_dirty(svm->vmcb, VMCB_CR);
1553 
1554  if (gcr0 == *hcr0 && svm->vcpu.fpu_active) {
1555  clr_cr_intercept(svm, INTERCEPT_CR0_READ);
1556  clr_cr_intercept(svm, INTERCEPT_CR0_WRITE);
1557  } else {
1558  set_cr_intercept(svm, INTERCEPT_CR0_READ);
1559  set_cr_intercept(svm, INTERCEPT_CR0_WRITE);
1560  }
1561 }
1562 
1563 static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
1564 {
1565  struct vcpu_svm *svm = to_svm(vcpu);
1566 
1567 #ifdef CONFIG_X86_64
1568  if (vcpu->arch.efer & EFER_LME) {
1569  if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
1570  vcpu->arch.efer |= EFER_LMA;
1571  svm->vmcb->save.efer |= EFER_LMA | EFER_LME;
1572  }
1573 
1574  if (is_paging(vcpu) && !(cr0 & X86_CR0_PG)) {
1575  vcpu->arch.efer &= ~EFER_LMA;
1576  svm->vmcb->save.efer &= ~(EFER_LMA | EFER_LME);
1577  }
1578  }
1579 #endif
1580  vcpu->arch.cr0 = cr0;
1581 
1582  if (!npt_enabled)
1583  cr0 |= X86_CR0_PG | X86_CR0_WP;
1584 
1585  if (!vcpu->fpu_active)
1586  cr0 |= X86_CR0_TS;
1587  /*
1588  * re-enable caching here because the QEMU bios
1589  * does not do it - this results in some delay at
1590  * reboot
1591  */
1592  cr0 &= ~(X86_CR0_CD | X86_CR0_NW);
1593  svm->vmcb->save.cr0 = cr0;
1594  mark_dirty(svm->vmcb, VMCB_CR);
1595  update_cr0_intercept(svm);
1596 }
1597 
1598 static int svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
1599 {
1600  unsigned long host_cr4_mce = read_cr4() & X86_CR4_MCE;
1601  unsigned long old_cr4 = to_svm(vcpu)->vmcb->save.cr4;
1602 
1603  if (cr4 & X86_CR4_VMXE)
1604  return 1;
1605 
1606  if (npt_enabled && ((old_cr4 ^ cr4) & X86_CR4_PGE))
1607  svm_flush_tlb(vcpu);
1608 
1609  vcpu->arch.cr4 = cr4;
1610  if (!npt_enabled)
1611  cr4 |= X86_CR4_PAE;
1612  cr4 |= host_cr4_mce;
1613  to_svm(vcpu)->vmcb->save.cr4 = cr4;
1614  mark_dirty(to_svm(vcpu)->vmcb, VMCB_CR);
1615  return 0;
1616 }
1617 
1618 static void svm_set_segment(struct kvm_vcpu *vcpu,
1619  struct kvm_segment *var, int seg)
1620 {
1621  struct vcpu_svm *svm = to_svm(vcpu);
1622  struct vmcb_seg *s = svm_seg(vcpu, seg);
1623 
1624  s->base = var->base;
1625  s->limit = var->limit;
1626  s->selector = var->selector;
1627  if (var->unusable)
1628  s->attrib = 0;
1629  else {
1630  s->attrib = (var->type & SVM_SELECTOR_TYPE_MASK);
1631  s->attrib |= (var->s & 1) << SVM_SELECTOR_S_SHIFT;
1632  s->attrib |= (var->dpl & 3) << SVM_SELECTOR_DPL_SHIFT;
1633  s->attrib |= (var->present & 1) << SVM_SELECTOR_P_SHIFT;
1634  s->attrib |= (var->avl & 1) << SVM_SELECTOR_AVL_SHIFT;
1635  s->attrib |= (var->l & 1) << SVM_SELECTOR_L_SHIFT;
1636  s->attrib |= (var->db & 1) << SVM_SELECTOR_DB_SHIFT;
1637  s->attrib |= (var->g & 1) << SVM_SELECTOR_G_SHIFT;
1638  }
1639  if (seg == VCPU_SREG_CS)
1640  svm_update_cpl(vcpu);
1641 
1642  mark_dirty(svm->vmcb, VMCB_SEG);
1643 }
1644 
1645 static void update_db_bp_intercept(struct kvm_vcpu *vcpu)
1646 {
1647  struct vcpu_svm *svm = to_svm(vcpu);
1648 
1649  clr_exception_intercept(svm, DB_VECTOR);
1650  clr_exception_intercept(svm, BP_VECTOR);
1651 
1652  if (svm->nmi_singlestep)
1653  set_exception_intercept(svm, DB_VECTOR);
1654 
1655  if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) {
1656  if (vcpu->guest_debug &
1658  set_exception_intercept(svm, DB_VECTOR);
1659  if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
1660  set_exception_intercept(svm, BP_VECTOR);
1661  } else
1662  vcpu->guest_debug = 0;
1663 }
1664 
1665 static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd)
1666 {
1667  if (sd->next_asid > sd->max_asid) {
1668  ++sd->asid_generation;
1669  sd->next_asid = 1;
1670  svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ALL_ASID;
1671  }
1672 
1673  svm->asid_generation = sd->asid_generation;
1674  svm->vmcb->control.asid = sd->next_asid++;
1675 
1676  mark_dirty(svm->vmcb, VMCB_ASID);
1677 }
1678 
1679 static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value)
1680 {
1681  struct vcpu_svm *svm = to_svm(vcpu);
1682 
1683  svm->vmcb->save.dr7 = value;
1684  mark_dirty(svm->vmcb, VMCB_DR);
1685 }
1686 
1687 static int pf_interception(struct vcpu_svm *svm)
1688 {
1689  u64 fault_address = svm->vmcb->control.exit_info_2;
1690  u32 error_code;
1691  int r = 1;
1692 
1693  switch (svm->apf_reason) {
1694  default:
1695  error_code = svm->vmcb->control.exit_info_1;
1696 
1697  trace_kvm_page_fault(fault_address, error_code);
1698  if (!npt_enabled && kvm_event_needs_reinjection(&svm->vcpu))
1699  kvm_mmu_unprotect_page_virt(&svm->vcpu, fault_address);
1700  r = kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code,
1701  svm->vmcb->control.insn_bytes,
1702  svm->vmcb->control.insn_len);
1703  break;
1705  svm->apf_reason = 0;
1707  kvm_async_pf_task_wait(fault_address);
1708  local_irq_enable();
1709  break;
1711  svm->apf_reason = 0;
1713  kvm_async_pf_task_wake(fault_address);
1714  local_irq_enable();
1715  break;
1716  }
1717  return r;
1718 }
1719 
1720 static int db_interception(struct vcpu_svm *svm)
1721 {
1722  struct kvm_run *kvm_run = svm->vcpu.run;
1723 
1724  if (!(svm->vcpu.guest_debug &
1726  !svm->nmi_singlestep) {
1728  return 1;
1729  }
1730 
1731  if (svm->nmi_singlestep) {
1732  svm->nmi_singlestep = false;
1733  if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP))
1734  svm->vmcb->save.rflags &=
1736  update_db_bp_intercept(&svm->vcpu);
1737  }
1738 
1739  if (svm->vcpu.guest_debug &
1741  kvm_run->exit_reason = KVM_EXIT_DEBUG;
1742  kvm_run->debug.arch.pc =
1743  svm->vmcb->save.cs.base + svm->vmcb->save.rip;
1744  kvm_run->debug.arch.exception = DB_VECTOR;
1745  return 0;
1746  }
1747 
1748  return 1;
1749 }
1750 
1751 static int bp_interception(struct vcpu_svm *svm)
1752 {
1753  struct kvm_run *kvm_run = svm->vcpu.run;
1754 
1755  kvm_run->exit_reason = KVM_EXIT_DEBUG;
1756  kvm_run->debug.arch.pc = svm->vmcb->save.cs.base + svm->vmcb->save.rip;
1757  kvm_run->debug.arch.exception = BP_VECTOR;
1758  return 0;
1759 }
1760 
1761 static int ud_interception(struct vcpu_svm *svm)
1762 {
1763  int er;
1764 
1765  er = emulate_instruction(&svm->vcpu, EMULTYPE_TRAP_UD);
1766  if (er != EMULATE_DONE)
1768  return 1;
1769 }
1770 
1771 static void svm_fpu_activate(struct kvm_vcpu *vcpu)
1772 {
1773  struct vcpu_svm *svm = to_svm(vcpu);
1774 
1775  clr_exception_intercept(svm, NM_VECTOR);
1776 
1777  svm->vcpu.fpu_active = 1;
1778  update_cr0_intercept(svm);
1779 }
1780 
1781 static int nm_interception(struct vcpu_svm *svm)
1782 {
1783  svm_fpu_activate(&svm->vcpu);
1784  return 1;
1785 }
1786 
1787 static bool is_erratum_383(void)
1788 {
1789  int err, i;
1790  u64 value;
1791 
1792  if (!erratum_383_found)
1793  return false;
1794 
1795  value = native_read_msr_safe(MSR_IA32_MC0_STATUS, &err);
1796  if (err)
1797  return false;
1798 
1799  /* Bit 62 may or may not be set for this mce */
1800  value &= ~(1ULL << 62);
1801 
1802  if (value != 0xb600000000010015ULL)
1803  return false;
1804 
1805  /* Clear MCi_STATUS registers */
1806  for (i = 0; i < 6; ++i)
1807  native_write_msr_safe(MSR_IA32_MCx_STATUS(i), 0, 0);
1808 
1809  value = native_read_msr_safe(MSR_IA32_MCG_STATUS, &err);
1810  if (!err) {
1811  u32 low, high;
1812 
1813  value &= ~(1ULL << 2);
1814  low = lower_32_bits(value);
1815  high = upper_32_bits(value);
1816 
1817  native_write_msr_safe(MSR_IA32_MCG_STATUS, low, high);
1818  }
1819 
1820  /* Flush tlb to evict multi-match entries */
1821  __flush_tlb_all();
1822 
1823  return true;
1824 }
1825 
1826 static void svm_handle_mce(struct vcpu_svm *svm)
1827 {
1828  if (is_erratum_383()) {
1829  /*
1830  * Erratum 383 triggered. Guest state is corrupt so kill the
1831  * guest.
1832  */
1833  pr_err("KVM: Guest triggered AMD Erratum 383\n");
1834 
1835  kvm_make_request(KVM_REQ_TRIPLE_FAULT, &svm->vcpu);
1836 
1837  return;
1838  }
1839 
1840  /*
1841  * On an #MC intercept the MCE handler is not called automatically in
1842  * the host. So do it by hand here.
1843  */
1844  asm volatile (
1845  "int $0x12\n");
1846  /* not sure if we ever come back to this point */
1847 
1848  return;
1849 }
1850 
1851 static int mc_interception(struct vcpu_svm *svm)
1852 {
1853  return 1;
1854 }
1855 
1856 static int shutdown_interception(struct vcpu_svm *svm)
1857 {
1858  struct kvm_run *kvm_run = svm->vcpu.run;
1859 
1860  /*
1861  * VMCB is undefined after a SHUTDOWN intercept
1862  * so reinitialize it.
1863  */
1864  clear_page(svm->vmcb);
1865  init_vmcb(svm);
1866 
1867  kvm_run->exit_reason = KVM_EXIT_SHUTDOWN;
1868  return 0;
1869 }
1870 
1871 static int io_interception(struct vcpu_svm *svm)
1872 {
1873  struct kvm_vcpu *vcpu = &svm->vcpu;
1874  u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */
1875  int size, in, string;
1876  unsigned port;
1877 
1878  ++svm->vcpu.stat.io_exits;
1879  string = (io_info & SVM_IOIO_STR_MASK) != 0;
1880  in = (io_info & SVM_IOIO_TYPE_MASK) != 0;
1881  if (string || in)
1882  return emulate_instruction(vcpu, 0) == EMULATE_DONE;
1883 
1884  port = io_info >> 16;
1885  size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT;
1886  svm->next_rip = svm->vmcb->control.exit_info_2;
1887  skip_emulated_instruction(&svm->vcpu);
1888 
1889  return kvm_fast_pio_out(vcpu, size, port);
1890 }
1891 
1892 static int nmi_interception(struct vcpu_svm *svm)
1893 {
1894  return 1;
1895 }
1896 
1897 static int intr_interception(struct vcpu_svm *svm)
1898 {
1899  ++svm->vcpu.stat.irq_exits;
1900  return 1;
1901 }
1902 
1903 static int nop_on_interception(struct vcpu_svm *svm)
1904 {
1905  return 1;
1906 }
1907 
1908 static int halt_interception(struct vcpu_svm *svm)
1909 {
1910  svm->next_rip = kvm_rip_read(&svm->vcpu) + 1;
1911  skip_emulated_instruction(&svm->vcpu);
1912  return kvm_emulate_halt(&svm->vcpu);
1913 }
1914 
1915 static int vmmcall_interception(struct vcpu_svm *svm)
1916 {
1917  svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
1918  skip_emulated_instruction(&svm->vcpu);
1919  kvm_emulate_hypercall(&svm->vcpu);
1920  return 1;
1921 }
1922 
1923 static unsigned long nested_svm_get_tdp_cr3(struct kvm_vcpu *vcpu)
1924 {
1925  struct vcpu_svm *svm = to_svm(vcpu);
1926 
1927  return svm->nested.nested_cr3;
1928 }
1929 
1930 static u64 nested_svm_get_tdp_pdptr(struct kvm_vcpu *vcpu, int index)
1931 {
1932  struct vcpu_svm *svm = to_svm(vcpu);
1933  u64 cr3 = svm->nested.nested_cr3;
1934  u64 pdpte;
1935  int ret;
1936 
1937  ret = kvm_read_guest_page(vcpu->kvm, gpa_to_gfn(cr3), &pdpte,
1938  offset_in_page(cr3) + index * 8, 8);
1939  if (ret)
1940  return 0;
1941  return pdpte;
1942 }
1943 
1944 static void nested_svm_set_tdp_cr3(struct kvm_vcpu *vcpu,
1945  unsigned long root)
1946 {
1947  struct vcpu_svm *svm = to_svm(vcpu);
1948 
1949  svm->vmcb->control.nested_cr3 = root;
1950  mark_dirty(svm->vmcb, VMCB_NPT);
1951  svm_flush_tlb(vcpu);
1952 }
1953 
1954 static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu,
1955  struct x86_exception *fault)
1956 {
1957  struct vcpu_svm *svm = to_svm(vcpu);
1958 
1959  svm->vmcb->control.exit_code = SVM_EXIT_NPF;
1960  svm->vmcb->control.exit_code_hi = 0;
1961  svm->vmcb->control.exit_info_1 = fault->error_code;
1962  svm->vmcb->control.exit_info_2 = fault->address;
1963 
1964  nested_svm_vmexit(svm);
1965 }
1966 
1967 static int nested_svm_init_mmu_context(struct kvm_vcpu *vcpu)
1968 {
1969  int r;
1970 
1971  r = kvm_init_shadow_mmu(vcpu, &vcpu->arch.mmu);
1972 
1973  vcpu->arch.mmu.set_cr3 = nested_svm_set_tdp_cr3;
1974  vcpu->arch.mmu.get_cr3 = nested_svm_get_tdp_cr3;
1975  vcpu->arch.mmu.get_pdptr = nested_svm_get_tdp_pdptr;
1976  vcpu->arch.mmu.inject_page_fault = nested_svm_inject_npf_exit;
1977  vcpu->arch.mmu.shadow_root_level = get_npt_level();
1978  vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu;
1979 
1980  return r;
1981 }
1982 
1983 static void nested_svm_uninit_mmu_context(struct kvm_vcpu *vcpu)
1984 {
1985  vcpu->arch.walk_mmu = &vcpu->arch.mmu;
1986 }
1987 
1988 static int nested_svm_check_permissions(struct vcpu_svm *svm)
1989 {
1990  if (!(svm->vcpu.arch.efer & EFER_SVME)
1991  || !is_paging(&svm->vcpu)) {
1993  return 1;
1994  }
1995 
1996  if (svm->vmcb->save.cpl) {
1997  kvm_inject_gp(&svm->vcpu, 0);
1998  return 1;
1999  }
2000 
2001  return 0;
2002 }
2003 
2004 static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
2005  bool has_error_code, u32 error_code)
2006 {
2007  int vmexit;
2008 
2009  if (!is_guest_mode(&svm->vcpu))
2010  return 0;
2011 
2012  svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + nr;
2013  svm->vmcb->control.exit_code_hi = 0;
2014  svm->vmcb->control.exit_info_1 = error_code;
2015  svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2;
2016 
2017  vmexit = nested_svm_intercept(svm);
2018  if (vmexit == NESTED_EXIT_DONE)
2019  svm->nested.exit_required = true;
2020 
2021  return vmexit;
2022 }
2023 
2024 /* This function returns true if it is save to enable the irq window */
2025 static inline bool nested_svm_intr(struct vcpu_svm *svm)
2026 {
2027  if (!is_guest_mode(&svm->vcpu))
2028  return true;
2029 
2030  if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK))
2031  return true;
2032 
2033  if (!(svm->vcpu.arch.hflags & HF_HIF_MASK))
2034  return false;
2035 
2036  /*
2037  * if vmexit was already requested (by intercepted exception
2038  * for instance) do not overwrite it with "external interrupt"
2039  * vmexit.
2040  */
2041  if (svm->nested.exit_required)
2042  return false;
2043 
2044  svm->vmcb->control.exit_code = SVM_EXIT_INTR;
2045  svm->vmcb->control.exit_info_1 = 0;
2046  svm->vmcb->control.exit_info_2 = 0;
2047 
2048  if (svm->nested.intercept & 1ULL) {
2049  /*
2050  * The #vmexit can't be emulated here directly because this
2051  * code path runs with irqs and preemption disabled. A
2052  * #vmexit emulation might sleep. Only signal request for
2053  * the #vmexit here.
2054  */
2055  svm->nested.exit_required = true;
2056  trace_kvm_nested_intr_vmexit(svm->vmcb->save.rip);
2057  return false;
2058  }
2059 
2060  return true;
2061 }
2062 
2063 /* This function returns true if it is save to enable the nmi window */
2064 static inline bool nested_svm_nmi(struct vcpu_svm *svm)
2065 {
2066  if (!is_guest_mode(&svm->vcpu))
2067  return true;
2068 
2069  if (!(svm->nested.intercept & (1ULL << INTERCEPT_NMI)))
2070  return true;
2071 
2072  svm->vmcb->control.exit_code = SVM_EXIT_NMI;
2073  svm->nested.exit_required = true;
2074 
2075  return false;
2076 }
2077 
2078 static void *nested_svm_map(struct vcpu_svm *svm, u64 gpa, struct page **_page)
2079 {
2080  struct page *page;
2081 
2082  might_sleep();
2083 
2084  page = gfn_to_page(svm->vcpu.kvm, gpa >> PAGE_SHIFT);
2085  if (is_error_page(page))
2086  goto error;
2087 
2088  *_page = page;
2089 
2090  return kmap(page);
2091 
2092 error:
2093  kvm_inject_gp(&svm->vcpu, 0);
2094 
2095  return NULL;
2096 }
2097 
2098 static void nested_svm_unmap(struct page *page)
2099 {
2100  kunmap(page);
2101  kvm_release_page_dirty(page);
2102 }
2103 
2104 static int nested_svm_intercept_ioio(struct vcpu_svm *svm)
2105 {
2106  unsigned port;
2107  u8 val, bit;
2108  u64 gpa;
2109 
2110  if (!(svm->nested.intercept & (1ULL << INTERCEPT_IOIO_PROT)))
2111  return NESTED_EXIT_HOST;
2112 
2113  port = svm->vmcb->control.exit_info_1 >> 16;
2114  gpa = svm->nested.vmcb_iopm + (port / 8);
2115  bit = port % 8;
2116  val = 0;
2117 
2118  if (kvm_read_guest(svm->vcpu.kvm, gpa, &val, 1))
2119  val &= (1 << bit);
2120 
2121  return val ? NESTED_EXIT_DONE : NESTED_EXIT_HOST;
2122 }
2123 
2124 static int nested_svm_exit_handled_msr(struct vcpu_svm *svm)
2125 {
2126  u32 offset, msr, value;
2127  int write, mask;
2128 
2129  if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT)))
2130  return NESTED_EXIT_HOST;
2131 
2132  msr = svm->vcpu.arch.regs[VCPU_REGS_RCX];
2133  offset = svm_msrpm_offset(msr);
2134  write = svm->vmcb->control.exit_info_1 & 1;
2135  mask = 1 << ((2 * (msr & 0xf)) + write);
2136 
2137  if (offset == MSR_INVALID)
2138  return NESTED_EXIT_DONE;
2139 
2140  /* Offset is in 32 bit units but need in 8 bit units */
2141  offset *= 4;
2142 
2143  if (kvm_read_guest(svm->vcpu.kvm, svm->nested.vmcb_msrpm + offset, &value, 4))
2144  return NESTED_EXIT_DONE;
2145 
2146  return (value & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST;
2147 }
2148 
2149 static int nested_svm_exit_special(struct vcpu_svm *svm)
2150 {
2151  u32 exit_code = svm->vmcb->control.exit_code;
2152 
2153  switch (exit_code) {
2154  case SVM_EXIT_INTR:
2155  case SVM_EXIT_NMI:
2157  return NESTED_EXIT_HOST;
2158  case SVM_EXIT_NPF:
2159  /* For now we are always handling NPFs when using them */
2160  if (npt_enabled)
2161  return NESTED_EXIT_HOST;
2162  break;
2164  /* When we're shadowing, trap PFs, but not async PF */
2165  if (!npt_enabled && svm->apf_reason == 0)
2166  return NESTED_EXIT_HOST;
2167  break;
2169  nm_interception(svm);
2170  break;
2171  default:
2172  break;
2173  }
2174 
2175  return NESTED_EXIT_CONTINUE;
2176 }
2177 
2178 /*
2179  * If this function returns true, this #vmexit was already handled
2180  */
2181 static int nested_svm_intercept(struct vcpu_svm *svm)
2182 {
2183  u32 exit_code = svm->vmcb->control.exit_code;
2184  int vmexit = NESTED_EXIT_HOST;
2185 
2186  switch (exit_code) {
2187  case SVM_EXIT_MSR:
2188  vmexit = nested_svm_exit_handled_msr(svm);
2189  break;
2190  case SVM_EXIT_IOIO:
2191  vmexit = nested_svm_intercept_ioio(svm);
2192  break;
2194  u32 bit = 1U << (exit_code - SVM_EXIT_READ_CR0);
2195  if (svm->nested.intercept_cr & bit)
2196  vmexit = NESTED_EXIT_DONE;
2197  break;
2198  }
2200  u32 bit = 1U << (exit_code - SVM_EXIT_READ_DR0);
2201  if (svm->nested.intercept_dr & bit)
2202  vmexit = NESTED_EXIT_DONE;
2203  break;
2204  }
2205  case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f: {
2206  u32 excp_bits = 1 << (exit_code - SVM_EXIT_EXCP_BASE);
2207  if (svm->nested.intercept_exceptions & excp_bits)
2208  vmexit = NESTED_EXIT_DONE;
2209  /* async page fault always cause vmexit */
2210  else if ((exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR) &&
2211  svm->apf_reason != 0)
2212  vmexit = NESTED_EXIT_DONE;
2213  break;
2214  }
2215  case SVM_EXIT_ERR: {
2216  vmexit = NESTED_EXIT_DONE;
2217  break;
2218  }
2219  default: {
2220  u64 exit_bits = 1ULL << (exit_code - SVM_EXIT_INTR);
2221  if (svm->nested.intercept & exit_bits)
2222  vmexit = NESTED_EXIT_DONE;
2223  }
2224  }
2225 
2226  return vmexit;
2227 }
2228 
2229 static int nested_svm_exit_handled(struct vcpu_svm *svm)
2230 {
2231  int vmexit;
2232 
2233  vmexit = nested_svm_intercept(svm);
2234 
2235  if (vmexit == NESTED_EXIT_DONE)
2236  nested_svm_vmexit(svm);
2237 
2238  return vmexit;
2239 }
2240 
2241 static inline void copy_vmcb_control_area(struct vmcb *dst_vmcb, struct vmcb *from_vmcb)
2242 {
2243  struct vmcb_control_area *dst = &dst_vmcb->control;
2244  struct vmcb_control_area *from = &from_vmcb->control;
2245 
2246  dst->intercept_cr = from->intercept_cr;
2247  dst->intercept_dr = from->intercept_dr;
2248  dst->intercept_exceptions = from->intercept_exceptions;
2249  dst->intercept = from->intercept;
2250  dst->iopm_base_pa = from->iopm_base_pa;
2251  dst->msrpm_base_pa = from->msrpm_base_pa;
2252  dst->tsc_offset = from->tsc_offset;
2253  dst->asid = from->asid;
2254  dst->tlb_ctl = from->tlb_ctl;
2255  dst->int_ctl = from->int_ctl;
2256  dst->int_vector = from->int_vector;
2257  dst->int_state = from->int_state;
2258  dst->exit_code = from->exit_code;
2259  dst->exit_code_hi = from->exit_code_hi;
2260  dst->exit_info_1 = from->exit_info_1;
2261  dst->exit_info_2 = from->exit_info_2;
2262  dst->exit_int_info = from->exit_int_info;
2263  dst->exit_int_info_err = from->exit_int_info_err;
2264  dst->nested_ctl = from->nested_ctl;
2265  dst->event_inj = from->event_inj;
2266  dst->event_inj_err = from->event_inj_err;
2267  dst->nested_cr3 = from->nested_cr3;
2268  dst->lbr_ctl = from->lbr_ctl;
2269 }
2270 
2271 static int nested_svm_vmexit(struct vcpu_svm *svm)
2272 {
2273  struct vmcb *nested_vmcb;
2274  struct vmcb *hsave = svm->nested.hsave;
2275  struct vmcb *vmcb = svm->vmcb;
2276  struct page *page;
2277 
2278  trace_kvm_nested_vmexit_inject(vmcb->control.exit_code,
2279  vmcb->control.exit_info_1,
2280  vmcb->control.exit_info_2,
2281  vmcb->control.exit_int_info,
2282  vmcb->control.exit_int_info_err,
2283  KVM_ISA_SVM);
2284 
2285  nested_vmcb = nested_svm_map(svm, svm->nested.vmcb, &page);
2286  if (!nested_vmcb)
2287  return 1;
2288 
2289  /* Exit Guest-Mode */
2290  leave_guest_mode(&svm->vcpu);
2291  svm->nested.vmcb = 0;
2292 
2293  /* Give the current vmcb to the guest */
2294  disable_gif(svm);
2295 
2296  nested_vmcb->save.es = vmcb->save.es;
2297  nested_vmcb->save.cs = vmcb->save.cs;
2298  nested_vmcb->save.ss = vmcb->save.ss;
2299  nested_vmcb->save.ds = vmcb->save.ds;
2300  nested_vmcb->save.gdtr = vmcb->save.gdtr;
2301  nested_vmcb->save.idtr = vmcb->save.idtr;
2302  nested_vmcb->save.efer = svm->vcpu.arch.efer;
2303  nested_vmcb->save.cr0 = kvm_read_cr0(&svm->vcpu);
2304  nested_vmcb->save.cr3 = kvm_read_cr3(&svm->vcpu);
2305  nested_vmcb->save.cr2 = vmcb->save.cr2;
2306  nested_vmcb->save.cr4 = svm->vcpu.arch.cr4;
2307  nested_vmcb->save.rflags = kvm_get_rflags(&svm->vcpu);
2308  nested_vmcb->save.rip = vmcb->save.rip;
2309  nested_vmcb->save.rsp = vmcb->save.rsp;
2310  nested_vmcb->save.rax = vmcb->save.rax;
2311  nested_vmcb->save.dr7 = vmcb->save.dr7;
2312  nested_vmcb->save.dr6 = vmcb->save.dr6;
2313  nested_vmcb->save.cpl = vmcb->save.cpl;
2314 
2315  nested_vmcb->control.int_ctl = vmcb->control.int_ctl;
2316  nested_vmcb->control.int_vector = vmcb->control.int_vector;
2317  nested_vmcb->control.int_state = vmcb->control.int_state;
2318  nested_vmcb->control.exit_code = vmcb->control.exit_code;
2319  nested_vmcb->control.exit_code_hi = vmcb->control.exit_code_hi;
2320  nested_vmcb->control.exit_info_1 = vmcb->control.exit_info_1;
2321  nested_vmcb->control.exit_info_2 = vmcb->control.exit_info_2;
2322  nested_vmcb->control.exit_int_info = vmcb->control.exit_int_info;
2323  nested_vmcb->control.exit_int_info_err = vmcb->control.exit_int_info_err;
2324  nested_vmcb->control.next_rip = vmcb->control.next_rip;
2325 
2326  /*
2327  * If we emulate a VMRUN/#VMEXIT in the same host #vmexit cycle we have
2328  * to make sure that we do not lose injected events. So check event_inj
2329  * here and copy it to exit_int_info if it is valid.
2330  * Exit_int_info and event_inj can't be both valid because the case
2331  * below only happens on a VMRUN instruction intercept which has
2332  * no valid exit_int_info set.
2333  */
2334  if (vmcb->control.event_inj & SVM_EVTINJ_VALID) {
2335  struct vmcb_control_area *nc = &nested_vmcb->control;
2336 
2337  nc->exit_int_info = vmcb->control.event_inj;
2338  nc->exit_int_info_err = vmcb->control.event_inj_err;
2339  }
2340 
2341  nested_vmcb->control.tlb_ctl = 0;
2342  nested_vmcb->control.event_inj = 0;
2343  nested_vmcb->control.event_inj_err = 0;
2344 
2345  /* We always set V_INTR_MASKING and remember the old value in hflags */
2346  if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK))
2347  nested_vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK;
2348 
2349  /* Restore the original control entries */
2350  copy_vmcb_control_area(vmcb, hsave);
2351 
2352  kvm_clear_exception_queue(&svm->vcpu);
2353  kvm_clear_interrupt_queue(&svm->vcpu);
2354 
2355  svm->nested.nested_cr3 = 0;
2356 
2357  /* Restore selected save entries */
2358  svm->vmcb->save.es = hsave->save.es;
2359  svm->vmcb->save.cs = hsave->save.cs;
2360  svm->vmcb->save.ss = hsave->save.ss;
2361  svm->vmcb->save.ds = hsave->save.ds;
2362  svm->vmcb->save.gdtr = hsave->save.gdtr;
2363  svm->vmcb->save.idtr = hsave->save.idtr;
2364  kvm_set_rflags(&svm->vcpu, hsave->save.rflags);
2365  svm_set_efer(&svm->vcpu, hsave->save.efer);
2366  svm_set_cr0(&svm->vcpu, hsave->save.cr0 | X86_CR0_PE);
2367  svm_set_cr4(&svm->vcpu, hsave->save.cr4);
2368  if (npt_enabled) {
2369  svm->vmcb->save.cr3 = hsave->save.cr3;
2370  svm->vcpu.arch.cr3 = hsave->save.cr3;
2371  } else {
2372  (void)kvm_set_cr3(&svm->vcpu, hsave->save.cr3);
2373  }
2374  kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, hsave->save.rax);
2375  kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, hsave->save.rsp);
2376  kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, hsave->save.rip);
2377  svm->vmcb->save.dr7 = 0;
2378  svm->vmcb->save.cpl = 0;
2379  svm->vmcb->control.exit_int_info = 0;
2380 
2381  mark_all_dirty(svm->vmcb);
2382 
2383  nested_svm_unmap(page);
2384 
2385  nested_svm_uninit_mmu_context(&svm->vcpu);
2386  kvm_mmu_reset_context(&svm->vcpu);
2387  kvm_mmu_load(&svm->vcpu);
2388 
2389  return 0;
2390 }
2391 
2392 static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm)
2393 {
2394  /*
2395  * This function merges the msr permission bitmaps of kvm and the
2396  * nested vmcb. It is optimized in that it only merges the parts where
2397  * the kvm msr permission bitmap may contain zero bits
2398  */
2399  int i;
2400 
2401  if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT)))
2402  return true;
2403 
2404  for (i = 0; i < MSRPM_OFFSETS; i++) {
2405  u32 value, p;
2406  u64 offset;
2407 
2408  if (msrpm_offsets[i] == 0xffffffff)
2409  break;
2410 
2411  p = msrpm_offsets[i];
2412  offset = svm->nested.vmcb_msrpm + (p * 4);
2413 
2414  if (kvm_read_guest(svm->vcpu.kvm, offset, &value, 4))
2415  return false;
2416 
2417  svm->nested.msrpm[p] = svm->msrpm[p] | value;
2418  }
2419 
2420  svm->vmcb->control.msrpm_base_pa = __pa(svm->nested.msrpm);
2421 
2422  return true;
2423 }
2424 
2425 static bool nested_vmcb_checks(struct vmcb *vmcb)
2426 {
2427  if ((vmcb->control.intercept & (1ULL << INTERCEPT_VMRUN)) == 0)
2428  return false;
2429 
2430  if (vmcb->control.asid == 0)
2431  return false;
2432 
2433  if (vmcb->control.nested_ctl && !npt_enabled)
2434  return false;
2435 
2436  return true;
2437 }
2438 
2439 static bool nested_svm_vmrun(struct vcpu_svm *svm)
2440 {
2441  struct vmcb *nested_vmcb;
2442  struct vmcb *hsave = svm->nested.hsave;
2443  struct vmcb *vmcb = svm->vmcb;
2444  struct page *page;
2445  u64 vmcb_gpa;
2446 
2447  vmcb_gpa = svm->vmcb->save.rax;
2448 
2449  nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page);
2450  if (!nested_vmcb)
2451  return false;
2452 
2453  if (!nested_vmcb_checks(nested_vmcb)) {
2454  nested_vmcb->control.exit_code = SVM_EXIT_ERR;
2455  nested_vmcb->control.exit_code_hi = 0;
2456  nested_vmcb->control.exit_info_1 = 0;
2457  nested_vmcb->control.exit_info_2 = 0;
2458 
2459  nested_svm_unmap(page);
2460 
2461  return false;
2462  }
2463 
2464  trace_kvm_nested_vmrun(svm->vmcb->save.rip, vmcb_gpa,
2465  nested_vmcb->save.rip,
2466  nested_vmcb->control.int_ctl,
2467  nested_vmcb->control.event_inj,
2468  nested_vmcb->control.nested_ctl);
2469 
2470  trace_kvm_nested_intercepts(nested_vmcb->control.intercept_cr & 0xffff,
2471  nested_vmcb->control.intercept_cr >> 16,
2472  nested_vmcb->control.intercept_exceptions,
2473  nested_vmcb->control.intercept);
2474 
2475  /* Clear internal status */
2476  kvm_clear_exception_queue(&svm->vcpu);
2477  kvm_clear_interrupt_queue(&svm->vcpu);
2478 
2479  /*
2480  * Save the old vmcb, so we don't need to pick what we save, but can
2481  * restore everything when a VMEXIT occurs
2482  */
2483  hsave->save.es = vmcb->save.es;
2484  hsave->save.cs = vmcb->save.cs;
2485  hsave->save.ss = vmcb->save.ss;
2486  hsave->save.ds = vmcb->save.ds;
2487  hsave->save.gdtr = vmcb->save.gdtr;
2488  hsave->save.idtr = vmcb->save.idtr;
2489  hsave->save.efer = svm->vcpu.arch.efer;
2490  hsave->save.cr0 = kvm_read_cr0(&svm->vcpu);
2491  hsave->save.cr4 = svm->vcpu.arch.cr4;
2492  hsave->save.rflags = kvm_get_rflags(&svm->vcpu);
2493  hsave->save.rip = kvm_rip_read(&svm->vcpu);
2494  hsave->save.rsp = vmcb->save.rsp;
2495  hsave->save.rax = vmcb->save.rax;
2496  if (npt_enabled)
2497  hsave->save.cr3 = vmcb->save.cr3;
2498  else
2499  hsave->save.cr3 = kvm_read_cr3(&svm->vcpu);
2500 
2501  copy_vmcb_control_area(hsave, vmcb);
2502 
2503  if (kvm_get_rflags(&svm->vcpu) & X86_EFLAGS_IF)
2504  svm->vcpu.arch.hflags |= HF_HIF_MASK;
2505  else
2506  svm->vcpu.arch.hflags &= ~HF_HIF_MASK;
2507 
2508  if (nested_vmcb->control.nested_ctl) {
2509  kvm_mmu_unload(&svm->vcpu);
2510  svm->nested.nested_cr3 = nested_vmcb->control.nested_cr3;
2511  nested_svm_init_mmu_context(&svm->vcpu);
2512  }
2513 
2514  /* Load the nested guest state */
2515  svm->vmcb->save.es = nested_vmcb->save.es;
2516  svm->vmcb->save.cs = nested_vmcb->save.cs;
2517  svm->vmcb->save.ss = nested_vmcb->save.ss;
2518  svm->vmcb->save.ds = nested_vmcb->save.ds;
2519  svm->vmcb->save.gdtr = nested_vmcb->save.gdtr;
2520  svm->vmcb->save.idtr = nested_vmcb->save.idtr;
2521  kvm_set_rflags(&svm->vcpu, nested_vmcb->save.rflags);
2522  svm_set_efer(&svm->vcpu, nested_vmcb->save.efer);
2523  svm_set_cr0(&svm->vcpu, nested_vmcb->save.cr0);
2524  svm_set_cr4(&svm->vcpu, nested_vmcb->save.cr4);
2525  if (npt_enabled) {
2526  svm->vmcb->save.cr3 = nested_vmcb->save.cr3;
2527  svm->vcpu.arch.cr3 = nested_vmcb->save.cr3;
2528  } else
2529  (void)kvm_set_cr3(&svm->vcpu, nested_vmcb->save.cr3);
2530 
2531  /* Guest paging mode is active - reset mmu */
2532  kvm_mmu_reset_context(&svm->vcpu);
2533 
2534  svm->vmcb->save.cr2 = svm->vcpu.arch.cr2 = nested_vmcb->save.cr2;
2535  kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, nested_vmcb->save.rax);
2536  kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, nested_vmcb->save.rsp);
2537  kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, nested_vmcb->save.rip);
2538 
2539  /* In case we don't even reach vcpu_run, the fields are not updated */
2540  svm->vmcb->save.rax = nested_vmcb->save.rax;
2541  svm->vmcb->save.rsp = nested_vmcb->save.rsp;
2542  svm->vmcb->save.rip = nested_vmcb->save.rip;
2543  svm->vmcb->save.dr7 = nested_vmcb->save.dr7;
2544  svm->vmcb->save.dr6 = nested_vmcb->save.dr6;
2545  svm->vmcb->save.cpl = nested_vmcb->save.cpl;
2546 
2547  svm->nested.vmcb_msrpm = nested_vmcb->control.msrpm_base_pa & ~0x0fffULL;
2548  svm->nested.vmcb_iopm = nested_vmcb->control.iopm_base_pa & ~0x0fffULL;
2549 
2550  /* cache intercepts */
2551  svm->nested.intercept_cr = nested_vmcb->control.intercept_cr;
2552  svm->nested.intercept_dr = nested_vmcb->control.intercept_dr;
2553  svm->nested.intercept_exceptions = nested_vmcb->control.intercept_exceptions;
2554  svm->nested.intercept = nested_vmcb->control.intercept;
2555 
2556  svm_flush_tlb(&svm->vcpu);
2557  svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK;
2558  if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK)
2559  svm->vcpu.arch.hflags |= HF_VINTR_MASK;
2560  else
2561  svm->vcpu.arch.hflags &= ~HF_VINTR_MASK;
2562 
2563  if (svm->vcpu.arch.hflags & HF_VINTR_MASK) {
2564  /* We only want the cr8 intercept bits of the guest */
2565  clr_cr_intercept(svm, INTERCEPT_CR8_READ);
2566  clr_cr_intercept(svm, INTERCEPT_CR8_WRITE);
2567  }
2568 
2569  /* We don't want to see VMMCALLs from a nested guest */
2570  clr_intercept(svm, INTERCEPT_VMMCALL);
2571 
2572  svm->vmcb->control.lbr_ctl = nested_vmcb->control.lbr_ctl;
2573  svm->vmcb->control.int_vector = nested_vmcb->control.int_vector;
2574  svm->vmcb->control.int_state = nested_vmcb->control.int_state;
2575  svm->vmcb->control.tsc_offset += nested_vmcb->control.tsc_offset;
2576  svm->vmcb->control.event_inj = nested_vmcb->control.event_inj;
2577  svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err;
2578 
2579  nested_svm_unmap(page);
2580 
2581  /* Enter Guest-Mode */
2582  enter_guest_mode(&svm->vcpu);
2583 
2584  /*
2585  * Merge guest and host intercepts - must be called with vcpu in
2586  * guest-mode to take affect here
2587  */
2588  recalc_intercepts(svm);
2589 
2590  svm->nested.vmcb = vmcb_gpa;
2591 
2592  enable_gif(svm);
2593 
2594  mark_all_dirty(svm->vmcb);
2595 
2596  return true;
2597 }
2598 
2599 static void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb)
2600 {
2601  to_vmcb->save.fs = from_vmcb->save.fs;
2602  to_vmcb->save.gs = from_vmcb->save.gs;
2603  to_vmcb->save.tr = from_vmcb->save.tr;
2604  to_vmcb->save.ldtr = from_vmcb->save.ldtr;
2605  to_vmcb->save.kernel_gs_base = from_vmcb->save.kernel_gs_base;
2606  to_vmcb->save.star = from_vmcb->save.star;
2607  to_vmcb->save.lstar = from_vmcb->save.lstar;
2608  to_vmcb->save.cstar = from_vmcb->save.cstar;
2609  to_vmcb->save.sfmask = from_vmcb->save.sfmask;
2610  to_vmcb->save.sysenter_cs = from_vmcb->save.sysenter_cs;
2611  to_vmcb->save.sysenter_esp = from_vmcb->save.sysenter_esp;
2612  to_vmcb->save.sysenter_eip = from_vmcb->save.sysenter_eip;
2613 }
2614 
2615 static int vmload_interception(struct vcpu_svm *svm)
2616 {
2617  struct vmcb *nested_vmcb;
2618  struct page *page;
2619 
2620  if (nested_svm_check_permissions(svm))
2621  return 1;
2622 
2623  nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page);
2624  if (!nested_vmcb)
2625  return 1;
2626 
2627  svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
2628  skip_emulated_instruction(&svm->vcpu);
2629 
2630  nested_svm_vmloadsave(nested_vmcb, svm->vmcb);
2631  nested_svm_unmap(page);
2632 
2633  return 1;
2634 }
2635 
2636 static int vmsave_interception(struct vcpu_svm *svm)
2637 {
2638  struct vmcb *nested_vmcb;
2639  struct page *page;
2640 
2641  if (nested_svm_check_permissions(svm))
2642  return 1;
2643 
2644  nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page);
2645  if (!nested_vmcb)
2646  return 1;
2647 
2648  svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
2649  skip_emulated_instruction(&svm->vcpu);
2650 
2651  nested_svm_vmloadsave(svm->vmcb, nested_vmcb);
2652  nested_svm_unmap(page);
2653 
2654  return 1;
2655 }
2656 
2657 static int vmrun_interception(struct vcpu_svm *svm)
2658 {
2659  if (nested_svm_check_permissions(svm))
2660  return 1;
2661 
2662  /* Save rip after vmrun instruction */
2663  kvm_rip_write(&svm->vcpu, kvm_rip_read(&svm->vcpu) + 3);
2664 
2665  if (!nested_svm_vmrun(svm))
2666  return 1;
2667 
2668  if (!nested_svm_vmrun_msrpm(svm))
2669  goto failed;
2670 
2671  return 1;
2672 
2673 failed:
2674 
2675  svm->vmcb->control.exit_code = SVM_EXIT_ERR;
2676  svm->vmcb->control.exit_code_hi = 0;
2677  svm->vmcb->control.exit_info_1 = 0;
2678  svm->vmcb->control.exit_info_2 = 0;
2679 
2680  nested_svm_vmexit(svm);
2681 
2682  return 1;
2683 }
2684 
2685 static int stgi_interception(struct vcpu_svm *svm)
2686 {
2687  if (nested_svm_check_permissions(svm))
2688  return 1;
2689 
2690  svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
2691  skip_emulated_instruction(&svm->vcpu);
2692  kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
2693 
2694  enable_gif(svm);
2695 
2696  return 1;
2697 }
2698 
2699 static int clgi_interception(struct vcpu_svm *svm)
2700 {
2701  if (nested_svm_check_permissions(svm))
2702  return 1;
2703 
2704  svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
2705  skip_emulated_instruction(&svm->vcpu);
2706 
2707  disable_gif(svm);
2708 
2709  /* After a CLGI no interrupts should come */
2710  svm_clear_vintr(svm);
2711  svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
2712 
2713  mark_dirty(svm->vmcb, VMCB_INTR);
2714 
2715  return 1;
2716 }
2717 
2718 static int invlpga_interception(struct vcpu_svm *svm)
2719 {
2720  struct kvm_vcpu *vcpu = &svm->vcpu;
2721 
2722  trace_kvm_invlpga(svm->vmcb->save.rip, vcpu->arch.regs[VCPU_REGS_RCX],
2723  vcpu->arch.regs[VCPU_REGS_RAX]);
2724 
2725  /* Let's treat INVLPGA the same as INVLPG (can be optimized!) */
2726  kvm_mmu_invlpg(vcpu, vcpu->arch.regs[VCPU_REGS_RAX]);
2727 
2728  svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
2729  skip_emulated_instruction(&svm->vcpu);
2730  return 1;
2731 }
2732 
2733 static int skinit_interception(struct vcpu_svm *svm)
2734 {
2735  trace_kvm_skinit(svm->vmcb->save.rip, svm->vcpu.arch.regs[VCPU_REGS_RAX]);
2736 
2738  return 1;
2739 }
2740 
2741 static int xsetbv_interception(struct vcpu_svm *svm)
2742 {
2743  u64 new_bv = kvm_read_edx_eax(&svm->vcpu);
2744  u32 index = kvm_register_read(&svm->vcpu, VCPU_REGS_RCX);
2745 
2746  if (kvm_set_xcr(&svm->vcpu, index, new_bv) == 0) {
2747  svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
2748  skip_emulated_instruction(&svm->vcpu);
2749  }
2750 
2751  return 1;
2752 }
2753 
2754 static int invalid_op_interception(struct vcpu_svm *svm)
2755 {
2757  return 1;
2758 }
2759 
2760 static int task_switch_interception(struct vcpu_svm *svm)
2761 {
2762  u16 tss_selector;
2763  int reason;
2764  int int_type = svm->vmcb->control.exit_int_info &
2765  SVM_EXITINTINFO_TYPE_MASK;
2766  int int_vec = svm->vmcb->control.exit_int_info & SVM_EVTINJ_VEC_MASK;
2767  uint32_t type =
2768  svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_TYPE_MASK;
2769  uint32_t idt_v =
2770  svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_VALID;
2771  bool has_error_code = false;
2772  u32 error_code = 0;
2773 
2774  tss_selector = (u16)svm->vmcb->control.exit_info_1;
2775 
2776  if (svm->vmcb->control.exit_info_2 &
2777  (1ULL << SVM_EXITINFOSHIFT_TS_REASON_IRET))
2778  reason = TASK_SWITCH_IRET;
2779  else if (svm->vmcb->control.exit_info_2 &
2780  (1ULL << SVM_EXITINFOSHIFT_TS_REASON_JMP))
2781  reason = TASK_SWITCH_JMP;
2782  else if (idt_v)
2783  reason = TASK_SWITCH_GATE;
2784  else
2785  reason = TASK_SWITCH_CALL;
2786 
2787  if (reason == TASK_SWITCH_GATE) {
2788  switch (type) {
2789  case SVM_EXITINTINFO_TYPE_NMI:
2790  svm->vcpu.arch.nmi_injected = false;
2791  break;
2792  case SVM_EXITINTINFO_TYPE_EXEPT:
2793  if (svm->vmcb->control.exit_info_2 &
2794  (1ULL << SVM_EXITINFOSHIFT_TS_HAS_ERROR_CODE)) {
2795  has_error_code = true;
2796  error_code =
2797  (u32)svm->vmcb->control.exit_info_2;
2798  }
2799  kvm_clear_exception_queue(&svm->vcpu);
2800  break;
2801  case SVM_EXITINTINFO_TYPE_INTR:
2802  kvm_clear_interrupt_queue(&svm->vcpu);
2803  break;
2804  default:
2805  break;
2806  }
2807  }
2808 
2809  if (reason != TASK_SWITCH_GATE ||
2810  int_type == SVM_EXITINTINFO_TYPE_SOFT ||
2811  (int_type == SVM_EXITINTINFO_TYPE_EXEPT &&
2812  (int_vec == OF_VECTOR || int_vec == BP_VECTOR)))
2813  skip_emulated_instruction(&svm->vcpu);
2814 
2815  if (int_type != SVM_EXITINTINFO_TYPE_SOFT)
2816  int_vec = -1;
2817 
2818  if (kvm_task_switch(&svm->vcpu, tss_selector, int_vec, reason,
2819  has_error_code, error_code) == EMULATE_FAIL) {
2820  svm->vcpu.run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
2821  svm->vcpu.run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
2822  svm->vcpu.run->internal.ndata = 0;
2823  return 0;
2824  }
2825  return 1;
2826 }
2827 
2828 static int cpuid_interception(struct vcpu_svm *svm)
2829 {
2830  svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
2831  kvm_emulate_cpuid(&svm->vcpu);
2832  return 1;
2833 }
2834 
2835 static int iret_interception(struct vcpu_svm *svm)
2836 {
2837  ++svm->vcpu.stat.nmi_window_exits;
2838  clr_intercept(svm, INTERCEPT_IRET);
2839  svm->vcpu.arch.hflags |= HF_IRET_MASK;
2840  svm->nmi_iret_rip = kvm_rip_read(&svm->vcpu);
2841  return 1;
2842 }
2843 
2844 static int invlpg_interception(struct vcpu_svm *svm)
2845 {
2846  if (!static_cpu_has(X86_FEATURE_DECODEASSISTS))
2847  return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE;
2848 
2849  kvm_mmu_invlpg(&svm->vcpu, svm->vmcb->control.exit_info_1);
2850  skip_emulated_instruction(&svm->vcpu);
2851  return 1;
2852 }
2853 
2854 static int emulate_on_interception(struct vcpu_svm *svm)
2855 {
2856  return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE;
2857 }
2858 
2859 static int rdpmc_interception(struct vcpu_svm *svm)
2860 {
2861  int err;
2862 
2863  if (!static_cpu_has(X86_FEATURE_NRIPS))
2864  return emulate_on_interception(svm);
2865 
2866  err = kvm_rdpmc(&svm->vcpu);
2867  kvm_complete_insn_gp(&svm->vcpu, err);
2868 
2869  return 1;
2870 }
2871 
2872 bool check_selective_cr0_intercepted(struct vcpu_svm *svm, unsigned long val)
2873 {
2874  unsigned long cr0 = svm->vcpu.arch.cr0;
2875  bool ret = false;
2876  u64 intercept;
2877 
2878  intercept = svm->nested.intercept;
2879 
2880  if (!is_guest_mode(&svm->vcpu) ||
2881  (!(intercept & (1ULL << INTERCEPT_SELECTIVE_CR0))))
2882  return false;
2883 
2884  cr0 &= ~SVM_CR0_SELECTIVE_MASK;
2885  val &= ~SVM_CR0_SELECTIVE_MASK;
2886 
2887  if (cr0 ^ val) {
2888  svm->vmcb->control.exit_code = SVM_EXIT_CR0_SEL_WRITE;
2889  ret = (nested_svm_exit_handled(svm) == NESTED_EXIT_DONE);
2890  }
2891 
2892  return ret;
2893 }
2894 
2895 #define CR_VALID (1ULL << 63)
2896 
2897 static int cr_interception(struct vcpu_svm *svm)
2898 {
2899  int reg, cr;
2900  unsigned long val;
2901  int err;
2902 
2903  if (!static_cpu_has(X86_FEATURE_DECODEASSISTS))
2904  return emulate_on_interception(svm);
2905 
2906  if (unlikely((svm->vmcb->control.exit_info_1 & CR_VALID) == 0))
2907  return emulate_on_interception(svm);
2908 
2909  reg = svm->vmcb->control.exit_info_1 & SVM_EXITINFO_REG_MASK;
2910  cr = svm->vmcb->control.exit_code - SVM_EXIT_READ_CR0;
2911 
2912  err = 0;
2913  if (cr >= 16) { /* mov to cr */
2914  cr -= 16;
2915  val = kvm_register_read(&svm->vcpu, reg);
2916  switch (cr) {
2917  case 0:
2918  if (!check_selective_cr0_intercepted(svm, val))
2919  err = kvm_set_cr0(&svm->vcpu, val);
2920  else
2921  return 1;
2922 
2923  break;
2924  case 3:
2925  err = kvm_set_cr3(&svm->vcpu, val);
2926  break;
2927  case 4:
2928  err = kvm_set_cr4(&svm->vcpu, val);
2929  break;
2930  case 8:
2931  err = kvm_set_cr8(&svm->vcpu, val);
2932  break;
2933  default:
2934  WARN(1, "unhandled write to CR%d", cr);
2936  return 1;
2937  }
2938  } else { /* mov from cr */
2939  switch (cr) {
2940  case 0:
2941  val = kvm_read_cr0(&svm->vcpu);
2942  break;
2943  case 2:
2944  val = svm->vcpu.arch.cr2;
2945  break;
2946  case 3:
2947  val = kvm_read_cr3(&svm->vcpu);
2948  break;
2949  case 4:
2950  val = kvm_read_cr4(&svm->vcpu);
2951  break;
2952  case 8:
2953  val = kvm_get_cr8(&svm->vcpu);
2954  break;
2955  default:
2956  WARN(1, "unhandled read from CR%d", cr);
2958  return 1;
2959  }
2960  kvm_register_write(&svm->vcpu, reg, val);
2961  }
2962  kvm_complete_insn_gp(&svm->vcpu, err);
2963 
2964  return 1;
2965 }
2966 
2967 static int dr_interception(struct vcpu_svm *svm)
2968 {
2969  int reg, dr;
2970  unsigned long val;
2971  int err;
2972 
2973  if (!boot_cpu_has(X86_FEATURE_DECODEASSISTS))
2974  return emulate_on_interception(svm);
2975 
2976  reg = svm->vmcb->control.exit_info_1 & SVM_EXITINFO_REG_MASK;
2977  dr = svm->vmcb->control.exit_code - SVM_EXIT_READ_DR0;
2978 
2979  if (dr >= 16) { /* mov to DRn */
2980  val = kvm_register_read(&svm->vcpu, reg);
2981  kvm_set_dr(&svm->vcpu, dr - 16, val);
2982  } else {
2983  err = kvm_get_dr(&svm->vcpu, dr, &val);
2984  if (!err)
2985  kvm_register_write(&svm->vcpu, reg, val);
2986  }
2987 
2988  skip_emulated_instruction(&svm->vcpu);
2989 
2990  return 1;
2991 }
2992 
2993 static int cr8_write_interception(struct vcpu_svm *svm)
2994 {
2995  struct kvm_run *kvm_run = svm->vcpu.run;
2996  int r;
2997 
2998  u8 cr8_prev = kvm_get_cr8(&svm->vcpu);
2999  /* instruction emulation calls kvm_set_cr8() */
3000  r = cr_interception(svm);
3001  if (irqchip_in_kernel(svm->vcpu.kvm)) {
3002  clr_cr_intercept(svm, INTERCEPT_CR8_WRITE);
3003  return r;
3004  }
3005  if (cr8_prev <= kvm_get_cr8(&svm->vcpu))
3006  return r;
3007  kvm_run->exit_reason = KVM_EXIT_SET_TPR;
3008  return 0;
3009 }
3010 
3012 {
3013  struct vmcb *vmcb = get_host_vmcb(to_svm(vcpu));
3014  return vmcb->control.tsc_offset +
3015  svm_scale_tsc(vcpu, native_read_tsc());
3016 }
3017 
3018 static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
3019 {
3020  struct vcpu_svm *svm = to_svm(vcpu);
3021 
3022  switch (ecx) {
3023  case MSR_IA32_TSC: {
3024  *data = svm->vmcb->control.tsc_offset +
3025  svm_scale_tsc(vcpu, native_read_tsc());
3026 
3027  break;
3028  }
3029  case MSR_STAR:
3030  *data = svm->vmcb->save.star;
3031  break;
3032 #ifdef CONFIG_X86_64
3033  case MSR_LSTAR:
3034  *data = svm->vmcb->save.lstar;
3035  break;
3036  case MSR_CSTAR:
3037  *data = svm->vmcb->save.cstar;
3038  break;
3039  case MSR_KERNEL_GS_BASE:
3040  *data = svm->vmcb->save.kernel_gs_base;
3041  break;
3042  case MSR_SYSCALL_MASK:
3043  *data = svm->vmcb->save.sfmask;
3044  break;
3045 #endif
3046  case MSR_IA32_SYSENTER_CS:
3047  *data = svm->vmcb->save.sysenter_cs;
3048  break;
3049  case MSR_IA32_SYSENTER_EIP:
3050  *data = svm->sysenter_eip;
3051  break;
3052  case MSR_IA32_SYSENTER_ESP:
3053  *data = svm->sysenter_esp;
3054  break;
3055  /*
3056  * Nobody will change the following 5 values in the VMCB so we can
3057  * safely return them on rdmsr. They will always be 0 until LBRV is
3058  * implemented.
3059  */
3060  case MSR_IA32_DEBUGCTLMSR:
3061  *data = svm->vmcb->save.dbgctl;
3062  break;
3064  *data = svm->vmcb->save.br_from;
3065  break;
3067  *data = svm->vmcb->save.br_to;
3068  break;
3070  *data = svm->vmcb->save.last_excp_from;
3071  break;
3072  case MSR_IA32_LASTINTTOIP:
3073  *data = svm->vmcb->save.last_excp_to;
3074  break;
3075  case MSR_VM_HSAVE_PA:
3076  *data = svm->nested.hsave_msr;
3077  break;
3078  case MSR_VM_CR:
3079  *data = svm->nested.vm_cr_msr;
3080  break;
3081  case MSR_IA32_UCODE_REV:
3082  *data = 0x01000065;
3083  break;
3084  default:
3085  return kvm_get_msr_common(vcpu, ecx, data);
3086  }
3087  return 0;
3088 }
3089 
3090 static int rdmsr_interception(struct vcpu_svm *svm)
3091 {
3092  u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX];
3093  u64 data;
3094 
3095  if (svm_get_msr(&svm->vcpu, ecx, &data)) {
3096  trace_kvm_msr_read_ex(ecx);
3097  kvm_inject_gp(&svm->vcpu, 0);
3098  } else {
3099  trace_kvm_msr_read(ecx, data);
3100 
3101  svm->vcpu.arch.regs[VCPU_REGS_RAX] = data & 0xffffffff;
3102  svm->vcpu.arch.regs[VCPU_REGS_RDX] = data >> 32;
3103  svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
3104  skip_emulated_instruction(&svm->vcpu);
3105  }
3106  return 1;
3107 }
3108 
3109 static int svm_set_vm_cr(struct kvm_vcpu *vcpu, u64 data)
3110 {
3111  struct vcpu_svm *svm = to_svm(vcpu);
3112  int svm_dis, chg_mask;
3113 
3114  if (data & ~SVM_VM_CR_VALID_MASK)
3115  return 1;
3116 
3117  chg_mask = SVM_VM_CR_VALID_MASK;
3118 
3119  if (svm->nested.vm_cr_msr & SVM_VM_CR_SVM_DIS_MASK)
3120  chg_mask &= ~(SVM_VM_CR_SVM_LOCK_MASK | SVM_VM_CR_SVM_DIS_MASK);
3121 
3122  svm->nested.vm_cr_msr &= ~chg_mask;
3123  svm->nested.vm_cr_msr |= (data & chg_mask);
3124 
3125  svm_dis = svm->nested.vm_cr_msr & SVM_VM_CR_SVM_DIS_MASK;
3126 
3127  /* check for svm_disable while efer.svme is set */
3128  if (svm_dis && (vcpu->arch.efer & EFER_SVME))
3129  return 1;
3130 
3131  return 0;
3132 }
3133 
3134 static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data)
3135 {
3136  struct vcpu_svm *svm = to_svm(vcpu);
3137 
3138  switch (ecx) {
3139  case MSR_IA32_TSC:
3140  kvm_write_tsc(vcpu, data);
3141  break;
3142  case MSR_STAR:
3143  svm->vmcb->save.star = data;
3144  break;
3145 #ifdef CONFIG_X86_64
3146  case MSR_LSTAR:
3147  svm->vmcb->save.lstar = data;
3148  break;
3149  case MSR_CSTAR:
3150  svm->vmcb->save.cstar = data;
3151  break;
3152  case MSR_KERNEL_GS_BASE:
3153  svm->vmcb->save.kernel_gs_base = data;
3154  break;
3155  case MSR_SYSCALL_MASK:
3156  svm->vmcb->save.sfmask = data;
3157  break;
3158 #endif
3159  case MSR_IA32_SYSENTER_CS:
3160  svm->vmcb->save.sysenter_cs = data;
3161  break;
3162  case MSR_IA32_SYSENTER_EIP:
3163  svm->sysenter_eip = data;
3164  svm->vmcb->save.sysenter_eip = data;
3165  break;
3166  case MSR_IA32_SYSENTER_ESP:
3167  svm->sysenter_esp = data;
3168  svm->vmcb->save.sysenter_esp = data;
3169  break;
3170  case MSR_IA32_DEBUGCTLMSR:
3171  if (!boot_cpu_has(X86_FEATURE_LBRV)) {
3172  vcpu_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTL 0x%llx, nop\n",
3173  __func__, data);
3174  break;
3175  }
3176  if (data & DEBUGCTL_RESERVED_BITS)
3177  return 1;
3178 
3179  svm->vmcb->save.dbgctl = data;
3180  mark_dirty(svm->vmcb, VMCB_LBR);
3181  if (data & (1ULL<<0))
3182  svm_enable_lbrv(svm);
3183  else
3184  svm_disable_lbrv(svm);
3185  break;
3186  case MSR_VM_HSAVE_PA:
3187  svm->nested.hsave_msr = data;
3188  break;
3189  case MSR_VM_CR:
3190  return svm_set_vm_cr(vcpu, data);
3191  case MSR_VM_IGNNE:
3192  vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data);
3193  break;
3194  default:
3195  return kvm_set_msr_common(vcpu, ecx, data);
3196  }
3197  return 0;
3198 }
3199 
3200 static int wrmsr_interception(struct vcpu_svm *svm)
3201 {
3202  u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX];
3203  u64 data = (svm->vcpu.arch.regs[VCPU_REGS_RAX] & -1u)
3204  | ((u64)(svm->vcpu.arch.regs[VCPU_REGS_RDX] & -1u) << 32);
3205 
3206 
3207  svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
3208  if (svm_set_msr(&svm->vcpu, ecx, data)) {
3209  trace_kvm_msr_write_ex(ecx, data);
3210  kvm_inject_gp(&svm->vcpu, 0);
3211  } else {
3212  trace_kvm_msr_write(ecx, data);
3213  skip_emulated_instruction(&svm->vcpu);
3214  }
3215  return 1;
3216 }
3217 
3218 static int msr_interception(struct vcpu_svm *svm)
3219 {
3220  if (svm->vmcb->control.exit_info_1)
3221  return wrmsr_interception(svm);
3222  else
3223  return rdmsr_interception(svm);
3224 }
3225 
3226 static int interrupt_window_interception(struct vcpu_svm *svm)
3227 {
3228  struct kvm_run *kvm_run = svm->vcpu.run;
3229 
3230  kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
3231  svm_clear_vintr(svm);
3232  svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
3233  mark_dirty(svm->vmcb, VMCB_INTR);
3234  ++svm->vcpu.stat.irq_window_exits;
3235  /*
3236  * If the user space waits to inject interrupts, exit as soon as
3237  * possible
3238  */
3239  if (!irqchip_in_kernel(svm->vcpu.kvm) &&
3240  kvm_run->request_interrupt_window &&
3241  !kvm_cpu_has_interrupt(&svm->vcpu)) {
3243  return 0;
3244  }
3245 
3246  return 1;
3247 }
3248 
3249 static int pause_interception(struct vcpu_svm *svm)
3250 {
3251  kvm_vcpu_on_spin(&(svm->vcpu));
3252  return 1;
3253 }
3254 
3255 static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {
3256  [SVM_EXIT_READ_CR0] = cr_interception,
3257  [SVM_EXIT_READ_CR3] = cr_interception,
3258  [SVM_EXIT_READ_CR4] = cr_interception,
3259  [SVM_EXIT_READ_CR8] = cr_interception,
3260  [SVM_EXIT_CR0_SEL_WRITE] = emulate_on_interception,
3261  [SVM_EXIT_WRITE_CR0] = cr_interception,
3262  [SVM_EXIT_WRITE_CR3] = cr_interception,
3263  [SVM_EXIT_WRITE_CR4] = cr_interception,
3264  [SVM_EXIT_WRITE_CR8] = cr8_write_interception,
3265  [SVM_EXIT_READ_DR0] = dr_interception,
3266  [SVM_EXIT_READ_DR1] = dr_interception,
3267  [SVM_EXIT_READ_DR2] = dr_interception,
3268  [SVM_EXIT_READ_DR3] = dr_interception,
3269  [SVM_EXIT_READ_DR4] = dr_interception,
3270  [SVM_EXIT_READ_DR5] = dr_interception,
3271  [SVM_EXIT_READ_DR6] = dr_interception,
3272  [SVM_EXIT_READ_DR7] = dr_interception,
3273  [SVM_EXIT_WRITE_DR0] = dr_interception,
3274  [SVM_EXIT_WRITE_DR1] = dr_interception,
3275  [SVM_EXIT_WRITE_DR2] = dr_interception,
3276  [SVM_EXIT_WRITE_DR3] = dr_interception,
3277  [SVM_EXIT_WRITE_DR4] = dr_interception,
3278  [SVM_EXIT_WRITE_DR5] = dr_interception,
3279  [SVM_EXIT_WRITE_DR6] = dr_interception,
3280  [SVM_EXIT_WRITE_DR7] = dr_interception,
3281  [SVM_EXIT_EXCP_BASE + DB_VECTOR] = db_interception,
3282  [SVM_EXIT_EXCP_BASE + BP_VECTOR] = bp_interception,
3283  [SVM_EXIT_EXCP_BASE + UD_VECTOR] = ud_interception,
3284  [SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception,
3285  [SVM_EXIT_EXCP_BASE + NM_VECTOR] = nm_interception,
3286  [SVM_EXIT_EXCP_BASE + MC_VECTOR] = mc_interception,
3287  [SVM_EXIT_INTR] = intr_interception,
3288  [SVM_EXIT_NMI] = nmi_interception,
3289  [SVM_EXIT_SMI] = nop_on_interception,
3290  [SVM_EXIT_INIT] = nop_on_interception,
3291  [SVM_EXIT_VINTR] = interrupt_window_interception,
3292  [SVM_EXIT_RDPMC] = rdpmc_interception,
3293  [SVM_EXIT_CPUID] = cpuid_interception,
3294  [SVM_EXIT_IRET] = iret_interception,
3295  [SVM_EXIT_INVD] = emulate_on_interception,
3296  [SVM_EXIT_PAUSE] = pause_interception,
3297  [SVM_EXIT_HLT] = halt_interception,
3298  [SVM_EXIT_INVLPG] = invlpg_interception,
3299  [SVM_EXIT_INVLPGA] = invlpga_interception,
3300  [SVM_EXIT_IOIO] = io_interception,
3301  [SVM_EXIT_MSR] = msr_interception,
3302  [SVM_EXIT_TASK_SWITCH] = task_switch_interception,
3303  [SVM_EXIT_SHUTDOWN] = shutdown_interception,
3304  [SVM_EXIT_VMRUN] = vmrun_interception,
3305  [SVM_EXIT_VMMCALL] = vmmcall_interception,
3306  [SVM_EXIT_VMLOAD] = vmload_interception,
3307  [SVM_EXIT_VMSAVE] = vmsave_interception,
3308  [SVM_EXIT_STGI] = stgi_interception,
3309  [SVM_EXIT_CLGI] = clgi_interception,
3310  [SVM_EXIT_SKINIT] = skinit_interception,
3311  [SVM_EXIT_WBINVD] = emulate_on_interception,
3312  [SVM_EXIT_MONITOR] = invalid_op_interception,
3313  [SVM_EXIT_MWAIT] = invalid_op_interception,
3314  [SVM_EXIT_XSETBV] = xsetbv_interception,
3315  [SVM_EXIT_NPF] = pf_interception,
3316 };
3317 
3318 static void dump_vmcb(struct kvm_vcpu *vcpu)
3319 {
3320  struct vcpu_svm *svm = to_svm(vcpu);
3321  struct vmcb_control_area *control = &svm->vmcb->control;
3322  struct vmcb_save_area *save = &svm->vmcb->save;
3323 
3324  pr_err("VMCB Control Area:\n");
3325  pr_err("%-20s%04x\n", "cr_read:", control->intercept_cr & 0xffff);
3326  pr_err("%-20s%04x\n", "cr_write:", control->intercept_cr >> 16);
3327  pr_err("%-20s%04x\n", "dr_read:", control->intercept_dr & 0xffff);
3328  pr_err("%-20s%04x\n", "dr_write:", control->intercept_dr >> 16);
3329  pr_err("%-20s%08x\n", "exceptions:", control->intercept_exceptions);
3330  pr_err("%-20s%016llx\n", "intercepts:", control->intercept);
3331  pr_err("%-20s%d\n", "pause filter count:", control->pause_filter_count);
3332  pr_err("%-20s%016llx\n", "iopm_base_pa:", control->iopm_base_pa);
3333  pr_err("%-20s%016llx\n", "msrpm_base_pa:", control->msrpm_base_pa);
3334  pr_err("%-20s%016llx\n", "tsc_offset:", control->tsc_offset);
3335  pr_err("%-20s%d\n", "asid:", control->asid);
3336  pr_err("%-20s%d\n", "tlb_ctl:", control->tlb_ctl);
3337  pr_err("%-20s%08x\n", "int_ctl:", control->int_ctl);
3338  pr_err("%-20s%08x\n", "int_vector:", control->int_vector);
3339  pr_err("%-20s%08x\n", "int_state:", control->int_state);
3340  pr_err("%-20s%08x\n", "exit_code:", control->exit_code);
3341  pr_err("%-20s%016llx\n", "exit_info1:", control->exit_info_1);
3342  pr_err("%-20s%016llx\n", "exit_info2:", control->exit_info_2);
3343  pr_err("%-20s%08x\n", "exit_int_info:", control->exit_int_info);
3344  pr_err("%-20s%08x\n", "exit_int_info_err:", control->exit_int_info_err);
3345  pr_err("%-20s%lld\n", "nested_ctl:", control->nested_ctl);
3346  pr_err("%-20s%016llx\n", "nested_cr3:", control->nested_cr3);
3347  pr_err("%-20s%08x\n", "event_inj:", control->event_inj);
3348  pr_err("%-20s%08x\n", "event_inj_err:", control->event_inj_err);
3349  pr_err("%-20s%lld\n", "lbr_ctl:", control->lbr_ctl);
3350  pr_err("%-20s%016llx\n", "next_rip:", control->next_rip);
3351  pr_err("VMCB State Save Area:\n");
3352  pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
3353  "es:",
3354  save->es.selector, save->es.attrib,
3355  save->es.limit, save->es.base);
3356  pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
3357  "cs:",
3358  save->cs.selector, save->cs.attrib,
3359  save->cs.limit, save->cs.base);
3360  pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
3361  "ss:",
3362  save->ss.selector, save->ss.attrib,
3363  save->ss.limit, save->ss.base);
3364  pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
3365  "ds:",
3366  save->ds.selector, save->ds.attrib,
3367  save->ds.limit, save->ds.base);
3368  pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
3369  "fs:",
3370  save->fs.selector, save->fs.attrib,
3371  save->fs.limit, save->fs.base);
3372  pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
3373  "gs:",
3374  save->gs.selector, save->gs.attrib,
3375  save->gs.limit, save->gs.base);
3376  pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
3377  "gdtr:",
3378  save->gdtr.selector, save->gdtr.attrib,
3379  save->gdtr.limit, save->gdtr.base);
3380  pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
3381  "ldtr:",
3382  save->ldtr.selector, save->ldtr.attrib,
3383  save->ldtr.limit, save->ldtr.base);
3384  pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
3385  "idtr:",
3386  save->idtr.selector, save->idtr.attrib,
3387  save->idtr.limit, save->idtr.base);
3388  pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
3389  "tr:",
3390  save->tr.selector, save->tr.attrib,
3391  save->tr.limit, save->tr.base);
3392  pr_err("cpl: %d efer: %016llx\n",
3393  save->cpl, save->efer);
3394  pr_err("%-15s %016llx %-13s %016llx\n",
3395  "cr0:", save->cr0, "cr2:", save->cr2);
3396  pr_err("%-15s %016llx %-13s %016llx\n",
3397  "cr3:", save->cr3, "cr4:", save->cr4);
3398  pr_err("%-15s %016llx %-13s %016llx\n",
3399  "dr6:", save->dr6, "dr7:", save->dr7);
3400  pr_err("%-15s %016llx %-13s %016llx\n",
3401  "rip:", save->rip, "rflags:", save->rflags);
3402  pr_err("%-15s %016llx %-13s %016llx\n",
3403  "rsp:", save->rsp, "rax:", save->rax);
3404  pr_err("%-15s %016llx %-13s %016llx\n",
3405  "star:", save->star, "lstar:", save->lstar);
3406  pr_err("%-15s %016llx %-13s %016llx\n",
3407  "cstar:", save->cstar, "sfmask:", save->sfmask);
3408  pr_err("%-15s %016llx %-13s %016llx\n",
3409  "kernel_gs_base:", save->kernel_gs_base,
3410  "sysenter_cs:", save->sysenter_cs);
3411  pr_err("%-15s %016llx %-13s %016llx\n",
3412  "sysenter_esp:", save->sysenter_esp,
3413  "sysenter_eip:", save->sysenter_eip);
3414  pr_err("%-15s %016llx %-13s %016llx\n",
3415  "gpat:", save->g_pat, "dbgctl:", save->dbgctl);
3416  pr_err("%-15s %016llx %-13s %016llx\n",
3417  "br_from:", save->br_from, "br_to:", save->br_to);
3418  pr_err("%-15s %016llx %-13s %016llx\n",
3419  "excp_from:", save->last_excp_from,
3420  "excp_to:", save->last_excp_to);
3421 }
3422 
3423 static void svm_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
3424 {
3425  struct vmcb_control_area *control = &to_svm(vcpu)->vmcb->control;
3426 
3427  *info1 = control->exit_info_1;
3428  *info2 = control->exit_info_2;
3429 }
3430 
3431 static int handle_exit(struct kvm_vcpu *vcpu)
3432 {
3433  struct vcpu_svm *svm = to_svm(vcpu);
3434  struct kvm_run *kvm_run = vcpu->run;
3435  u32 exit_code = svm->vmcb->control.exit_code;
3436 
3437  if (!is_cr_intercept(svm, INTERCEPT_CR0_WRITE))
3438  vcpu->arch.cr0 = svm->vmcb->save.cr0;
3439  if (npt_enabled)
3440  vcpu->arch.cr3 = svm->vmcb->save.cr3;
3441 
3442  if (unlikely(svm->nested.exit_required)) {
3443  nested_svm_vmexit(svm);
3444  svm->nested.exit_required = false;
3445 
3446  return 1;
3447  }
3448 
3449  if (is_guest_mode(vcpu)) {
3450  int vmexit;
3451 
3452  trace_kvm_nested_vmexit(svm->vmcb->save.rip, exit_code,
3453  svm->vmcb->control.exit_info_1,
3454  svm->vmcb->control.exit_info_2,
3455  svm->vmcb->control.exit_int_info,
3456  svm->vmcb->control.exit_int_info_err,
3457  KVM_ISA_SVM);
3458 
3459  vmexit = nested_svm_exit_special(svm);
3460 
3461  if (vmexit == NESTED_EXIT_CONTINUE)
3462  vmexit = nested_svm_exit_handled(svm);
3463 
3464  if (vmexit == NESTED_EXIT_DONE)
3465  return 1;
3466  }
3467 
3468  svm_complete_interrupts(svm);
3469 
3470  if (svm->vmcb->control.exit_code == SVM_EXIT_ERR) {
3471  kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
3472  kvm_run->fail_entry.hardware_entry_failure_reason
3473  = svm->vmcb->control.exit_code;
3474  pr_err("KVM: FAILED VMRUN WITH VMCB:\n");
3475  dump_vmcb(vcpu);
3476  return 0;
3477  }
3478 
3479  if (is_external_interrupt(svm->vmcb->control.exit_int_info) &&
3480  exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR &&
3481  exit_code != SVM_EXIT_NPF && exit_code != SVM_EXIT_TASK_SWITCH &&
3482  exit_code != SVM_EXIT_INTR && exit_code != SVM_EXIT_NMI)
3483  printk(KERN_ERR "%s: unexpected exit_ini_info 0x%x "
3484  "exit_code 0x%x\n",
3485  __func__, svm->vmcb->control.exit_int_info,
3486  exit_code);
3487 
3488  if (exit_code >= ARRAY_SIZE(svm_exit_handlers)
3489  || !svm_exit_handlers[exit_code]) {
3490  kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
3491  kvm_run->hw.hardware_exit_reason = exit_code;
3492  return 0;
3493  }
3494 
3495  return svm_exit_handlers[exit_code](svm);
3496 }
3497 
3498 static void reload_tss(struct kvm_vcpu *vcpu)
3499 {
3500  int cpu = raw_smp_processor_id();
3501 
3502  struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
3503  sd->tss_desc->type = 9; /* available 32/64-bit TSS */
3504  load_TR_desc();
3505 }
3506 
3507 static void pre_svm_run(struct vcpu_svm *svm)
3508 {
3509  int cpu = raw_smp_processor_id();
3510 
3511  struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
3512 
3513  /* FIXME: handle wraparound of asid_generation */
3514  if (svm->asid_generation != sd->asid_generation)
3515  new_asid(svm, sd);
3516 }
3517 
3518 static void svm_inject_nmi(struct kvm_vcpu *vcpu)
3519 {
3520  struct vcpu_svm *svm = to_svm(vcpu);
3521 
3522  svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI;
3523  vcpu->arch.hflags |= HF_NMI_MASK;
3524  set_intercept(svm, INTERCEPT_IRET);
3525  ++vcpu->stat.nmi_injections;
3526 }
3527 
3528 static inline void svm_inject_irq(struct vcpu_svm *svm, int irq)
3529 {
3530  struct vmcb_control_area *control;
3531 
3532  control = &svm->vmcb->control;
3533  control->int_vector = irq;
3534  control->int_ctl &= ~V_INTR_PRIO_MASK;
3535  control->int_ctl |= V_IRQ_MASK |
3536  ((/*control->int_vector >> 4*/ 0xf) << V_INTR_PRIO_SHIFT);
3537  mark_dirty(svm->vmcb, VMCB_INTR);
3538 }
3539 
3540 static void svm_set_irq(struct kvm_vcpu *vcpu)
3541 {
3542  struct vcpu_svm *svm = to_svm(vcpu);
3543 
3544  BUG_ON(!(gif_set(svm)));
3545 
3546  trace_kvm_inj_virq(vcpu->arch.interrupt.nr);
3547  ++vcpu->stat.irq_injections;
3548 
3549  svm->vmcb->control.event_inj = vcpu->arch.interrupt.nr |
3550  SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR;
3551 }
3552 
3553 static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
3554 {
3555  struct vcpu_svm *svm = to_svm(vcpu);
3556 
3557  if (is_guest_mode(vcpu) && (vcpu->arch.hflags & HF_VINTR_MASK))
3558  return;
3559 
3560  if (irr == -1)
3561  return;
3562 
3563  if (tpr >= irr)
3564  set_cr_intercept(svm, INTERCEPT_CR8_WRITE);
3565 }
3566 
3567 static int svm_nmi_allowed(struct kvm_vcpu *vcpu)
3568 {
3569  struct vcpu_svm *svm = to_svm(vcpu);
3570  struct vmcb *vmcb = svm->vmcb;
3571  int ret;
3572  ret = !(vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) &&
3573  !(svm->vcpu.arch.hflags & HF_NMI_MASK);
3574  ret = ret && gif_set(svm) && nested_svm_nmi(svm);
3575 
3576  return ret;
3577 }
3578 
3579 static bool svm_get_nmi_mask(struct kvm_vcpu *vcpu)
3580 {
3581  struct vcpu_svm *svm = to_svm(vcpu);
3582 
3583  return !!(svm->vcpu.arch.hflags & HF_NMI_MASK);
3584 }
3585 
3586 static void svm_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
3587 {
3588  struct vcpu_svm *svm = to_svm(vcpu);
3589 
3590  if (masked) {
3591  svm->vcpu.arch.hflags |= HF_NMI_MASK;
3592  set_intercept(svm, INTERCEPT_IRET);
3593  } else {
3594  svm->vcpu.arch.hflags &= ~HF_NMI_MASK;
3595  clr_intercept(svm, INTERCEPT_IRET);
3596  }
3597 }
3598 
3599 static int svm_interrupt_allowed(struct kvm_vcpu *vcpu)
3600 {
3601  struct vcpu_svm *svm = to_svm(vcpu);
3602  struct vmcb *vmcb = svm->vmcb;
3603  int ret;
3604 
3605  if (!gif_set(svm) ||
3606  (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK))
3607  return 0;
3608 
3609  ret = !!(kvm_get_rflags(vcpu) & X86_EFLAGS_IF);
3610 
3611  if (is_guest_mode(vcpu))
3612  return ret && !(svm->vcpu.arch.hflags & HF_VINTR_MASK);
3613 
3614  return ret;
3615 }
3616 
3617 static void enable_irq_window(struct kvm_vcpu *vcpu)
3618 {
3619  struct vcpu_svm *svm = to_svm(vcpu);
3620 
3621  /*
3622  * In case GIF=0 we can't rely on the CPU to tell us when GIF becomes
3623  * 1, because that's a separate STGI/VMRUN intercept. The next time we
3624  * get that intercept, this function will be called again though and
3625  * we'll get the vintr intercept.
3626  */
3627  if (gif_set(svm) && nested_svm_intr(svm)) {
3628  svm_set_vintr(svm);
3629  svm_inject_irq(svm, 0x0);
3630  }
3631 }
3632 
3633 static void enable_nmi_window(struct kvm_vcpu *vcpu)
3634 {
3635  struct vcpu_svm *svm = to_svm(vcpu);
3636 
3637  if ((svm->vcpu.arch.hflags & (HF_NMI_MASK | HF_IRET_MASK))
3638  == HF_NMI_MASK)
3639  return; /* IRET will cause a vm exit */
3640 
3641  /*
3642  * Something prevents NMI from been injected. Single step over possible
3643  * problem (IRET or exception injection or interrupt shadow)
3644  */
3645  svm->nmi_singlestep = true;
3646  svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
3647  update_db_bp_intercept(vcpu);
3648 }
3649 
3650 static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr)
3651 {
3652  return 0;
3653 }
3654 
3655 static void svm_flush_tlb(struct kvm_vcpu *vcpu)
3656 {
3657  struct vcpu_svm *svm = to_svm(vcpu);
3658 
3659  if (static_cpu_has(X86_FEATURE_FLUSHBYASID))
3660  svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ASID;
3661  else
3662  svm->asid_generation--;
3663 }
3664 
3665 static void svm_prepare_guest_switch(struct kvm_vcpu *vcpu)
3666 {
3667 }
3668 
3669 static inline void sync_cr8_to_lapic(struct kvm_vcpu *vcpu)
3670 {
3671  struct vcpu_svm *svm = to_svm(vcpu);
3672 
3673  if (is_guest_mode(vcpu) && (vcpu->arch.hflags & HF_VINTR_MASK))
3674  return;
3675 
3676  if (!is_cr_intercept(svm, INTERCEPT_CR8_WRITE)) {
3677  int cr8 = svm->vmcb->control.int_ctl & V_TPR_MASK;
3678  kvm_set_cr8(vcpu, cr8);
3679  }
3680 }
3681 
3682 static inline void sync_lapic_to_cr8(struct kvm_vcpu *vcpu)
3683 {
3684  struct vcpu_svm *svm = to_svm(vcpu);
3685  u64 cr8;
3686 
3687  if (is_guest_mode(vcpu) && (vcpu->arch.hflags & HF_VINTR_MASK))
3688  return;
3689 
3690  cr8 = kvm_get_cr8(vcpu);
3691  svm->vmcb->control.int_ctl &= ~V_TPR_MASK;
3692  svm->vmcb->control.int_ctl |= cr8 & V_TPR_MASK;
3693 }
3694 
3695 static void svm_complete_interrupts(struct vcpu_svm *svm)
3696 {
3697  u8 vector;
3698  int type;
3699  u32 exitintinfo = svm->vmcb->control.exit_int_info;
3700  unsigned int3_injected = svm->int3_injected;
3701 
3702  svm->int3_injected = 0;
3703 
3704  /*
3705  * If we've made progress since setting HF_IRET_MASK, we've
3706  * executed an IRET and can allow NMI injection.
3707  */
3708  if ((svm->vcpu.arch.hflags & HF_IRET_MASK)
3709  && kvm_rip_read(&svm->vcpu) != svm->nmi_iret_rip) {
3710  svm->vcpu.arch.hflags &= ~(HF_NMI_MASK | HF_IRET_MASK);
3711  kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
3712  }
3713 
3714  svm->vcpu.arch.nmi_injected = false;
3715  kvm_clear_exception_queue(&svm->vcpu);
3716  kvm_clear_interrupt_queue(&svm->vcpu);
3717 
3718  if (!(exitintinfo & SVM_EXITINTINFO_VALID))
3719  return;
3720 
3721  kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
3722 
3723  vector = exitintinfo & SVM_EXITINTINFO_VEC_MASK;
3724  type = exitintinfo & SVM_EXITINTINFO_TYPE_MASK;
3725 
3726  switch (type) {
3727  case SVM_EXITINTINFO_TYPE_NMI:
3728  svm->vcpu.arch.nmi_injected = true;
3729  break;
3730  case SVM_EXITINTINFO_TYPE_EXEPT:
3731  /*
3732  * In case of software exceptions, do not reinject the vector,
3733  * but re-execute the instruction instead. Rewind RIP first
3734  * if we emulated INT3 before.
3735  */
3736  if (kvm_exception_is_soft(vector)) {
3737  if (vector == BP_VECTOR && int3_injected &&
3738  kvm_is_linear_rip(&svm->vcpu, svm->int3_rip))
3739  kvm_rip_write(&svm->vcpu,
3740  kvm_rip_read(&svm->vcpu) -
3741  int3_injected);
3742  break;
3743  }
3744  if (exitintinfo & SVM_EXITINTINFO_VALID_ERR) {
3745  u32 err = svm->vmcb->control.exit_int_info_err;
3746  kvm_requeue_exception_e(&svm->vcpu, vector, err);
3747 
3748  } else
3749  kvm_requeue_exception(&svm->vcpu, vector);
3750  break;
3751  case SVM_EXITINTINFO_TYPE_INTR:
3752  kvm_queue_interrupt(&svm->vcpu, vector, false);
3753  break;
3754  default:
3755  break;
3756  }
3757 }
3758 
3759 static void svm_cancel_injection(struct kvm_vcpu *vcpu)
3760 {
3761  struct vcpu_svm *svm = to_svm(vcpu);
3762  struct vmcb_control_area *control = &svm->vmcb->control;
3763 
3764  control->exit_int_info = control->event_inj;
3765  control->exit_int_info_err = control->event_inj_err;
3766  control->event_inj = 0;
3767  svm_complete_interrupts(svm);
3768 }
3769 
3770 static void svm_vcpu_run(struct kvm_vcpu *vcpu)
3771 {
3772  struct vcpu_svm *svm = to_svm(vcpu);
3773 
3774  svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX];
3775  svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP];
3776  svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP];
3777 
3778  /*
3779  * A vmexit emulation is required before the vcpu can be executed
3780  * again.
3781  */
3782  if (unlikely(svm->nested.exit_required))
3783  return;
3784 
3785  pre_svm_run(svm);
3786 
3787  sync_lapic_to_cr8(vcpu);
3788 
3789  svm->vmcb->save.cr2 = vcpu->arch.cr2;
3790 
3791  clgi();
3792 
3793  local_irq_enable();
3794 
3795  asm volatile (
3796  "push %%" _ASM_BP "; \n\t"
3797  "mov %c[rbx](%[svm]), %%" _ASM_BX " \n\t"
3798  "mov %c[rcx](%[svm]), %%" _ASM_CX " \n\t"
3799  "mov %c[rdx](%[svm]), %%" _ASM_DX " \n\t"
3800  "mov %c[rsi](%[svm]), %%" _ASM_SI " \n\t"
3801  "mov %c[rdi](%[svm]), %%" _ASM_DI " \n\t"
3802  "mov %c[rbp](%[svm]), %%" _ASM_BP " \n\t"
3803 #ifdef CONFIG_X86_64
3804  "mov %c[r8](%[svm]), %%r8 \n\t"
3805  "mov %c[r9](%[svm]), %%r9 \n\t"
3806  "mov %c[r10](%[svm]), %%r10 \n\t"
3807  "mov %c[r11](%[svm]), %%r11 \n\t"
3808  "mov %c[r12](%[svm]), %%r12 \n\t"
3809  "mov %c[r13](%[svm]), %%r13 \n\t"
3810  "mov %c[r14](%[svm]), %%r14 \n\t"
3811  "mov %c[r15](%[svm]), %%r15 \n\t"
3812 #endif
3813 
3814  /* Enter guest mode */
3815  "push %%" _ASM_AX " \n\t"
3816  "mov %c[vmcb](%[svm]), %%" _ASM_AX " \n\t"
3817  __ex(SVM_VMLOAD) "\n\t"
3818  __ex(SVM_VMRUN) "\n\t"
3819  __ex(SVM_VMSAVE) "\n\t"
3820  "pop %%" _ASM_AX " \n\t"
3821 
3822  /* Save guest registers, load host registers */
3823  "mov %%" _ASM_BX ", %c[rbx](%[svm]) \n\t"
3824  "mov %%" _ASM_CX ", %c[rcx](%[svm]) \n\t"
3825  "mov %%" _ASM_DX ", %c[rdx](%[svm]) \n\t"
3826  "mov %%" _ASM_SI ", %c[rsi](%[svm]) \n\t"
3827  "mov %%" _ASM_DI ", %c[rdi](%[svm]) \n\t"
3828  "mov %%" _ASM_BP ", %c[rbp](%[svm]) \n\t"
3829 #ifdef CONFIG_X86_64
3830  "mov %%r8, %c[r8](%[svm]) \n\t"
3831  "mov %%r9, %c[r9](%[svm]) \n\t"
3832  "mov %%r10, %c[r10](%[svm]) \n\t"
3833  "mov %%r11, %c[r11](%[svm]) \n\t"
3834  "mov %%r12, %c[r12](%[svm]) \n\t"
3835  "mov %%r13, %c[r13](%[svm]) \n\t"
3836  "mov %%r14, %c[r14](%[svm]) \n\t"
3837  "mov %%r15, %c[r15](%[svm]) \n\t"
3838 #endif
3839  "pop %%" _ASM_BP
3840  :
3841  : [svm]"a"(svm),
3842  [vmcb]"i"(offsetof(struct vcpu_svm, vmcb_pa)),
3843  [rbx]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RBX])),
3844  [rcx]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RCX])),
3845  [rdx]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RDX])),
3846  [rsi]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RSI])),
3847  [rdi]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RDI])),
3848  [rbp]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RBP]))
3849 #ifdef CONFIG_X86_64
3850  , [r8]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R8])),
3851  [r9]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R9])),
3852  [r10]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R10])),
3853  [r11]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R11])),
3854  [r12]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R12])),
3855  [r13]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R13])),
3856  [r14]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R14])),
3857  [r15]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R15]))
3858 #endif
3859  : "cc", "memory"
3860 #ifdef CONFIG_X86_64
3861  , "rbx", "rcx", "rdx", "rsi", "rdi"
3862  , "r8", "r9", "r10", "r11" , "r12", "r13", "r14", "r15"
3863 #else
3864  , "ebx", "ecx", "edx", "esi", "edi"
3865 #endif
3866  );
3867 
3868 #ifdef CONFIG_X86_64
3869  wrmsrl(MSR_GS_BASE, svm->host.gs_base);
3870 #else
3871  loadsegment(fs, svm->host.fs);
3872 #ifndef CONFIG_X86_32_LAZY_GS
3873  loadsegment(gs, svm->host.gs);
3874 #endif
3875 #endif
3876 
3877  reload_tss(vcpu);
3878 
3880 
3881  vcpu->arch.cr2 = svm->vmcb->save.cr2;
3882  vcpu->arch.regs[VCPU_REGS_RAX] = svm->vmcb->save.rax;
3883  vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp;
3884  vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip;
3885 
3886  trace_kvm_exit(svm->vmcb->control.exit_code, vcpu, KVM_ISA_SVM);
3887 
3888  if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI))
3889  kvm_before_handle_nmi(&svm->vcpu);
3890 
3891  stgi();
3892 
3893  /* Any pending NMI will happen here */
3894 
3895  if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI))
3896  kvm_after_handle_nmi(&svm->vcpu);
3897 
3898  sync_cr8_to_lapic(vcpu);
3899 
3900  svm->next_rip = 0;
3901 
3902  svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING;
3903 
3904  /* if exit due to PF check for async PF */
3905  if (svm->vmcb->control.exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR)
3907 
3908  if (npt_enabled) {
3909  vcpu->arch.regs_avail &= ~(1 << VCPU_EXREG_PDPTR);
3910  vcpu->arch.regs_dirty &= ~(1 << VCPU_EXREG_PDPTR);
3911  }
3912 
3913  /*
3914  * We need to handle MC intercepts here before the vcpu has a chance to
3915  * change the physical cpu
3916  */
3917  if (unlikely(svm->vmcb->control.exit_code ==
3919  svm_handle_mce(svm);
3920 
3921  mark_all_clean(svm->vmcb);
3922 }
3923 
3924 static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root)
3925 {
3926  struct vcpu_svm *svm = to_svm(vcpu);
3927 
3928  svm->vmcb->save.cr3 = root;
3929  mark_dirty(svm->vmcb, VMCB_CR);
3930  svm_flush_tlb(vcpu);
3931 }
3932 
3933 static void set_tdp_cr3(struct kvm_vcpu *vcpu, unsigned long root)
3934 {
3935  struct vcpu_svm *svm = to_svm(vcpu);
3936 
3937  svm->vmcb->control.nested_cr3 = root;
3938  mark_dirty(svm->vmcb, VMCB_NPT);
3939 
3940  /* Also sync guest cr3 here in case we live migrate */
3941  svm->vmcb->save.cr3 = kvm_read_cr3(vcpu);
3942  mark_dirty(svm->vmcb, VMCB_CR);
3943 
3944  svm_flush_tlb(vcpu);
3945 }
3946 
3947 static int is_disabled(void)
3948 {
3949  u64 vm_cr;
3950 
3951  rdmsrl(MSR_VM_CR, vm_cr);
3952  if (vm_cr & (1 << SVM_VM_CR_SVM_DISABLE))
3953  return 1;
3954 
3955  return 0;
3956 }
3957 
3958 static void
3959 svm_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)
3960 {
3961  /*
3962  * Patch in the VMMCALL instruction:
3963  */
3964  hypercall[0] = 0x0f;
3965  hypercall[1] = 0x01;
3966  hypercall[2] = 0xd9;
3967 }
3968 
3969 static void svm_check_processor_compat(void *rtn)
3970 {
3971  *(int *)rtn = 0;
3972 }
3973 
3974 static bool svm_cpu_has_accelerated_tpr(void)
3975 {
3976  return false;
3977 }
3978 
3979 static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
3980 {
3981  return 0;
3982 }
3983 
3984 static void svm_cpuid_update(struct kvm_vcpu *vcpu)
3985 {
3986 }
3987 
3988 static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
3989 {
3990  switch (func) {
3991  case 0x80000001:
3992  if (nested)
3993  entry->ecx |= (1 << 2); /* Set SVM bit */
3994  break;
3995  case 0x8000000A:
3996  entry->eax = 1; /* SVM revision 1 */
3997  entry->ebx = 8; /* Lets support 8 ASIDs in case we add proper
3998  ASID emulation to nested SVM */
3999  entry->ecx = 0; /* Reserved */
4000  entry->edx = 0; /* Per default do not support any
4001  additional features */
4002 
4003  /* Support next_rip if host supports it */
4004  if (boot_cpu_has(X86_FEATURE_NRIPS))
4005  entry->edx |= SVM_FEATURE_NRIP;
4006 
4007  /* Support NPT for the guest if enabled */
4008  if (npt_enabled)
4009  entry->edx |= SVM_FEATURE_NPT;
4010 
4011  break;
4012  }
4013 }
4014 
4015 static int svm_get_lpage_level(void)
4016 {
4017  return PT_PDPE_LEVEL;
4018 }
4019 
4020 static bool svm_rdtscp_supported(void)
4021 {
4022  return false;
4023 }
4024 
4025 static bool svm_invpcid_supported(void)
4026 {
4027  return false;
4028 }
4029 
4030 static bool svm_has_wbinvd_exit(void)
4031 {
4032  return true;
4033 }
4034 
4035 static void svm_fpu_deactivate(struct kvm_vcpu *vcpu)
4036 {
4037  struct vcpu_svm *svm = to_svm(vcpu);
4038 
4039  set_exception_intercept(svm, NM_VECTOR);
4040  update_cr0_intercept(svm);
4041 }
4042 
4043 #define PRE_EX(exit) { .exit_code = (exit), \
4044  .stage = X86_ICPT_PRE_EXCEPT, }
4045 #define POST_EX(exit) { .exit_code = (exit), \
4046  .stage = X86_ICPT_POST_EXCEPT, }
4047 #define POST_MEM(exit) { .exit_code = (exit), \
4048  .stage = X86_ICPT_POST_MEMACCESS, }
4049 
4050 static const struct __x86_intercept {
4051  u32 exit_code;
4052  enum x86_intercept_stage stage;
4053 } x86_intercept_map[] = {
4100 };
4101 
4102 #undef PRE_EX
4103 #undef POST_EX
4104 #undef POST_MEM
4105 
4106 static int svm_check_intercept(struct kvm_vcpu *vcpu,
4107  struct x86_instruction_info *info,
4108  enum x86_intercept_stage stage)
4109 {
4110  struct vcpu_svm *svm = to_svm(vcpu);
4111  int vmexit, ret = X86EMUL_CONTINUE;
4112  struct __x86_intercept icpt_info;
4113  struct vmcb *vmcb = svm->vmcb;
4114 
4115  if (info->intercept >= ARRAY_SIZE(x86_intercept_map))
4116  goto out;
4117 
4118  icpt_info = x86_intercept_map[info->intercept];
4119 
4120  if (stage != icpt_info.stage)
4121  goto out;
4122 
4123  switch (icpt_info.exit_code) {
4124  case SVM_EXIT_READ_CR0:
4125  if (info->intercept == x86_intercept_cr_read)
4126  icpt_info.exit_code += info->modrm_reg;
4127  break;
4128  case SVM_EXIT_WRITE_CR0: {
4129  unsigned long cr0, val;
4130  u64 intercept;
4131 
4132  if (info->intercept == x86_intercept_cr_write)
4133  icpt_info.exit_code += info->modrm_reg;
4134 
4135  if (icpt_info.exit_code != SVM_EXIT_WRITE_CR0)
4136  break;
4137 
4138  intercept = svm->nested.intercept;
4139 
4140  if (!(intercept & (1ULL << INTERCEPT_SELECTIVE_CR0)))
4141  break;
4142 
4143  cr0 = vcpu->arch.cr0 & ~SVM_CR0_SELECTIVE_MASK;
4144  val = info->src_val & ~SVM_CR0_SELECTIVE_MASK;
4145 
4146  if (info->intercept == x86_intercept_lmsw) {
4147  cr0 &= 0xfUL;
4148  val &= 0xfUL;
4149  /* lmsw can't clear PE - catch this here */
4150  if (cr0 & X86_CR0_PE)
4151  val |= X86_CR0_PE;
4152  }
4153 
4154  if (cr0 ^ val)
4155  icpt_info.exit_code = SVM_EXIT_CR0_SEL_WRITE;
4156 
4157  break;
4158  }
4159  case SVM_EXIT_READ_DR0:
4160  case SVM_EXIT_WRITE_DR0:
4161  icpt_info.exit_code += info->modrm_reg;
4162  break;
4163  case SVM_EXIT_MSR:
4164  if (info->intercept == x86_intercept_wrmsr)
4165  vmcb->control.exit_info_1 = 1;
4166  else
4167  vmcb->control.exit_info_1 = 0;
4168  break;
4169  case SVM_EXIT_PAUSE:
4170  /*
4171  * We get this for NOP only, but pause
4172  * is rep not, check this here
4173  */
4174  if (info->rep_prefix != REPE_PREFIX)
4175  goto out;
4176  case SVM_EXIT_IOIO: {
4177  u64 exit_info;
4178  u32 bytes;
4179 
4180  exit_info = (vcpu->arch.regs[VCPU_REGS_RDX] & 0xffff) << 16;
4181 
4182  if (info->intercept == x86_intercept_in ||
4183  info->intercept == x86_intercept_ins) {
4184  exit_info |= SVM_IOIO_TYPE_MASK;
4185  bytes = info->src_bytes;
4186  } else {
4187  bytes = info->dst_bytes;
4188  }
4189 
4190  if (info->intercept == x86_intercept_outs ||
4191  info->intercept == x86_intercept_ins)
4192  exit_info |= SVM_IOIO_STR_MASK;
4193 
4194  if (info->rep_prefix)
4195  exit_info |= SVM_IOIO_REP_MASK;
4196 
4197  bytes = min(bytes, 4u);
4198 
4199  exit_info |= bytes << SVM_IOIO_SIZE_SHIFT;
4200 
4201  exit_info |= (u32)info->ad_bytes << (SVM_IOIO_ASIZE_SHIFT - 1);
4202 
4203  vmcb->control.exit_info_1 = exit_info;
4204  vmcb->control.exit_info_2 = info->next_rip;
4205 
4206  break;
4207  }
4208  default:
4209  break;
4210  }
4211 
4212  vmcb->control.next_rip = info->next_rip;
4213  vmcb->control.exit_code = icpt_info.exit_code;
4214  vmexit = nested_svm_exit_handled(svm);
4215 
4216  ret = (vmexit == NESTED_EXIT_DONE) ? X86EMUL_INTERCEPTED
4217  : X86EMUL_CONTINUE;
4218 
4219 out:
4220  return ret;
4221 }
4222 
4223 static struct kvm_x86_ops svm_x86_ops = {
4224  .cpu_has_kvm_support = has_svm,
4225  .disabled_by_bios = is_disabled,
4226  .hardware_setup = svm_hardware_setup,
4227  .hardware_unsetup = svm_hardware_unsetup,
4228  .check_processor_compatibility = svm_check_processor_compat,
4229  .hardware_enable = svm_hardware_enable,
4230  .hardware_disable = svm_hardware_disable,
4231  .cpu_has_accelerated_tpr = svm_cpu_has_accelerated_tpr,
4232 
4233  .vcpu_create = svm_create_vcpu,
4234  .vcpu_free = svm_free_vcpu,
4235  .vcpu_reset = svm_vcpu_reset,
4236 
4237  .prepare_guest_switch = svm_prepare_guest_switch,
4238  .vcpu_load = svm_vcpu_load,
4239  .vcpu_put = svm_vcpu_put,
4240 
4241  .update_db_bp_intercept = update_db_bp_intercept,
4242  .get_msr = svm_get_msr,
4243  .set_msr = svm_set_msr,
4244  .get_segment_base = svm_get_segment_base,
4245  .get_segment = svm_get_segment,
4246  .set_segment = svm_set_segment,
4247  .get_cpl = svm_get_cpl,
4248  .get_cs_db_l_bits = kvm_get_cs_db_l_bits,
4249  .decache_cr0_guest_bits = svm_decache_cr0_guest_bits,
4250  .decache_cr3 = svm_decache_cr3,
4251  .decache_cr4_guest_bits = svm_decache_cr4_guest_bits,
4252  .set_cr0 = svm_set_cr0,
4253  .set_cr3 = svm_set_cr3,
4254  .set_cr4 = svm_set_cr4,
4255  .set_efer = svm_set_efer,
4256  .get_idt = svm_get_idt,
4257  .set_idt = svm_set_idt,
4258  .get_gdt = svm_get_gdt,
4259  .set_gdt = svm_set_gdt,
4260  .set_dr7 = svm_set_dr7,
4261  .cache_reg = svm_cache_reg,
4262  .get_rflags = svm_get_rflags,
4263  .set_rflags = svm_set_rflags,
4264  .fpu_activate = svm_fpu_activate,
4265  .fpu_deactivate = svm_fpu_deactivate,
4266 
4267  .tlb_flush = svm_flush_tlb,
4268 
4269  .run = svm_vcpu_run,
4270  .handle_exit = handle_exit,
4271  .skip_emulated_instruction = skip_emulated_instruction,
4272  .set_interrupt_shadow = svm_set_interrupt_shadow,
4273  .get_interrupt_shadow = svm_get_interrupt_shadow,
4274  .patch_hypercall = svm_patch_hypercall,
4275  .set_irq = svm_set_irq,
4276  .set_nmi = svm_inject_nmi,
4277  .queue_exception = svm_queue_exception,
4278  .cancel_injection = svm_cancel_injection,
4279  .interrupt_allowed = svm_interrupt_allowed,
4280  .nmi_allowed = svm_nmi_allowed,
4281  .get_nmi_mask = svm_get_nmi_mask,
4282  .set_nmi_mask = svm_set_nmi_mask,
4283  .enable_nmi_window = enable_nmi_window,
4284  .enable_irq_window = enable_irq_window,
4285  .update_cr8_intercept = update_cr8_intercept,
4286 
4287  .set_tss_addr = svm_set_tss_addr,
4288  .get_tdp_level = get_npt_level,
4289  .get_mt_mask = svm_get_mt_mask,
4290 
4291  .get_exit_info = svm_get_exit_info,
4292 
4293  .get_lpage_level = svm_get_lpage_level,
4294 
4295  .cpuid_update = svm_cpuid_update,
4296 
4297  .rdtscp_supported = svm_rdtscp_supported,
4298  .invpcid_supported = svm_invpcid_supported,
4299 
4300  .set_supported_cpuid = svm_set_supported_cpuid,
4301 
4302  .has_wbinvd_exit = svm_has_wbinvd_exit,
4303 
4304  .set_tsc_khz = svm_set_tsc_khz,
4305  .write_tsc_offset = svm_write_tsc_offset,
4306  .adjust_tsc_offset = svm_adjust_tsc_offset,
4307  .compute_tsc_offset = svm_compute_tsc_offset,
4308  .read_l1_tsc = svm_read_l1_tsc,
4309 
4310  .set_tdp_cr3 = set_tdp_cr3,
4311 
4312  .check_intercept = svm_check_intercept,
4313 };
4314 
4315 static int __init svm_init(void)
4316 {
4317  return kvm_init(&svm_x86_ops, sizeof(struct vcpu_svm),
4318  __alignof__(struct vcpu_svm), THIS_MODULE);
4319 }
4320 
4321 static void __exit svm_exit(void)
4322 {
4323  kvm_exit();
4324 }
4325 
4326 module_init(svm_init)
4327 module_exit(svm_exit)