24 #include <linux/module.h>
25 #include <linux/kernel.h>
28 #include <linux/sched.h>
32 #include <linux/slab.h>
44 #include <asm/perf_event.h>
48 #define __ex(x) __kvm_handle_fault_on_reboot(x)
49 #define __ex_clear(x, reg) \
50 ____kvm_handle_fault_on_reboot(x, "xor " reg " , " reg)
55 static const struct x86_cpu_id vmx_cpu_id[] = {
72 enable_unrestricted_guest,
bool,
S_IRUGO);
94 #define KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST \
95 (X86_CR0_WP | X86_CR0_NE | X86_CR0_NW | X86_CR0_CD)
96 #define KVM_GUEST_CR0_MASK \
97 (KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE)
98 #define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST \
99 (X86_CR0_WP | X86_CR0_NE)
100 #define KVM_VM_CR0_ALWAYS_ON \
101 (KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE)
102 #define KVM_CR4_GUEST_OWNED_BITS \
103 (X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \
104 | X86_CR4_OSXMMEXCPT)
106 #define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE)
107 #define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE)
109 #define RMODE_GUEST_OWNED_EFLAGS_BITS (~(X86_EFLAGS_IOPL | X86_EFLAGS_VM))
122 #define KVM_VMX_DEFAULT_PLE_GAP 128
123 #define KVM_VMX_DEFAULT_PLE_WINDOW 4096
132 #define NR_AUTOLOAD_MSRS 8
133 #define VMCS02_POOL_SIZE 1
326 #define VMCS12_REVISION 0x11e57ed0
333 #define VMCS12_SIZE 0x1000
382 u64 msr_host_kernel_gs_base;
383 u64 msr_guest_kernel_gs_base;
414 struct kvm_save_segment {
450 #define VMCS12_OFFSET(x) offsetof(struct vmcs12, x)
451 #define FIELD(number, name) [number] = VMCS12_OFFSET(name)
452 #define FIELD64(number, name) [number] = VMCS12_OFFSET(name), \
453 [number##_HIGH] = VMCS12_OFFSET(name)+4
455 static const unsigned short vmcs_field_to_offset_table[] = {
456 FIELD(VIRTUAL_PROCESSOR_ID, virtual_processor_id),
457 FIELD(GUEST_ES_SELECTOR, guest_es_selector),
458 FIELD(GUEST_CS_SELECTOR, guest_cs_selector),
459 FIELD(GUEST_SS_SELECTOR, guest_ss_selector),
460 FIELD(GUEST_DS_SELECTOR, guest_ds_selector),
461 FIELD(GUEST_FS_SELECTOR, guest_fs_selector),
462 FIELD(GUEST_GS_SELECTOR, guest_gs_selector),
463 FIELD(GUEST_LDTR_SELECTOR, guest_ldtr_selector),
464 FIELD(GUEST_TR_SELECTOR, guest_tr_selector),
465 FIELD(HOST_ES_SELECTOR, host_es_selector),
466 FIELD(HOST_CS_SELECTOR, host_cs_selector),
467 FIELD(HOST_SS_SELECTOR, host_ss_selector),
468 FIELD(HOST_DS_SELECTOR, host_ds_selector),
469 FIELD(HOST_FS_SELECTOR, host_fs_selector),
470 FIELD(HOST_GS_SELECTOR, host_gs_selector),
471 FIELD(HOST_TR_SELECTOR, host_tr_selector),
472 FIELD64(IO_BITMAP_A, io_bitmap_a),
473 FIELD64(IO_BITMAP_B, io_bitmap_b),
474 FIELD64(MSR_BITMAP, msr_bitmap),
475 FIELD64(VM_EXIT_MSR_STORE_ADDR, vm_exit_msr_store_addr),
476 FIELD64(VM_EXIT_MSR_LOAD_ADDR, vm_exit_msr_load_addr),
477 FIELD64(VM_ENTRY_MSR_LOAD_ADDR, vm_entry_msr_load_addr),
478 FIELD64(TSC_OFFSET, tsc_offset),
479 FIELD64(VIRTUAL_APIC_PAGE_ADDR, virtual_apic_page_addr),
480 FIELD64(APIC_ACCESS_ADDR, apic_access_addr),
481 FIELD64(EPT_POINTER, ept_pointer),
482 FIELD64(GUEST_PHYSICAL_ADDRESS, guest_physical_address),
483 FIELD64(VMCS_LINK_POINTER, vmcs_link_pointer),
484 FIELD64(GUEST_IA32_DEBUGCTL, guest_ia32_debugctl),
485 FIELD64(GUEST_IA32_PAT, guest_ia32_pat),
486 FIELD64(GUEST_IA32_EFER, guest_ia32_efer),
487 FIELD64(GUEST_IA32_PERF_GLOBAL_CTRL, guest_ia32_perf_global_ctrl),
488 FIELD64(GUEST_PDPTR0, guest_pdptr0),
489 FIELD64(GUEST_PDPTR1, guest_pdptr1),
490 FIELD64(GUEST_PDPTR2, guest_pdptr2),
491 FIELD64(GUEST_PDPTR3, guest_pdptr3),
492 FIELD64(HOST_IA32_PAT, host_ia32_pat),
493 FIELD64(HOST_IA32_EFER, host_ia32_efer),
494 FIELD64(HOST_IA32_PERF_GLOBAL_CTRL, host_ia32_perf_global_ctrl),
495 FIELD(PIN_BASED_VM_EXEC_CONTROL, pin_based_vm_exec_control),
496 FIELD(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control),
497 FIELD(EXCEPTION_BITMAP, exception_bitmap),
498 FIELD(PAGE_FAULT_ERROR_CODE_MASK, page_fault_error_code_mask),
499 FIELD(PAGE_FAULT_ERROR_CODE_MATCH, page_fault_error_code_match),
500 FIELD(CR3_TARGET_COUNT, cr3_target_count),
501 FIELD(VM_EXIT_CONTROLS, vm_exit_controls),
502 FIELD(VM_EXIT_MSR_STORE_COUNT, vm_exit_msr_store_count),
503 FIELD(VM_EXIT_MSR_LOAD_COUNT, vm_exit_msr_load_count),
504 FIELD(VM_ENTRY_CONTROLS, vm_entry_controls),
505 FIELD(VM_ENTRY_MSR_LOAD_COUNT, vm_entry_msr_load_count),
506 FIELD(VM_ENTRY_INTR_INFO_FIELD, vm_entry_intr_info_field),
507 FIELD(VM_ENTRY_EXCEPTION_ERROR_CODE, vm_entry_exception_error_code),
508 FIELD(VM_ENTRY_INSTRUCTION_LEN, vm_entry_instruction_len),
509 FIELD(TPR_THRESHOLD, tpr_threshold),
510 FIELD(SECONDARY_VM_EXEC_CONTROL, secondary_vm_exec_control),
511 FIELD(VM_INSTRUCTION_ERROR, vm_instruction_error),
512 FIELD(VM_EXIT_REASON, vm_exit_reason),
513 FIELD(VM_EXIT_INTR_INFO, vm_exit_intr_info),
514 FIELD(VM_EXIT_INTR_ERROR_CODE, vm_exit_intr_error_code),
515 FIELD(IDT_VECTORING_INFO_FIELD, idt_vectoring_info_field),
516 FIELD(IDT_VECTORING_ERROR_CODE, idt_vectoring_error_code),
517 FIELD(VM_EXIT_INSTRUCTION_LEN, vm_exit_instruction_len),
518 FIELD(VMX_INSTRUCTION_INFO, vmx_instruction_info),
519 FIELD(GUEST_ES_LIMIT, guest_es_limit),
520 FIELD(GUEST_CS_LIMIT, guest_cs_limit),
521 FIELD(GUEST_SS_LIMIT, guest_ss_limit),
522 FIELD(GUEST_DS_LIMIT, guest_ds_limit),
523 FIELD(GUEST_FS_LIMIT, guest_fs_limit),
524 FIELD(GUEST_GS_LIMIT, guest_gs_limit),
525 FIELD(GUEST_LDTR_LIMIT, guest_ldtr_limit),
526 FIELD(GUEST_TR_LIMIT, guest_tr_limit),
527 FIELD(GUEST_GDTR_LIMIT, guest_gdtr_limit),
528 FIELD(GUEST_IDTR_LIMIT, guest_idtr_limit),
529 FIELD(GUEST_ES_AR_BYTES, guest_es_ar_bytes),
530 FIELD(GUEST_CS_AR_BYTES, guest_cs_ar_bytes),
531 FIELD(GUEST_SS_AR_BYTES, guest_ss_ar_bytes),
532 FIELD(GUEST_DS_AR_BYTES, guest_ds_ar_bytes),
533 FIELD(GUEST_FS_AR_BYTES, guest_fs_ar_bytes),
534 FIELD(GUEST_GS_AR_BYTES, guest_gs_ar_bytes),
535 FIELD(GUEST_LDTR_AR_BYTES, guest_ldtr_ar_bytes),
536 FIELD(GUEST_TR_AR_BYTES, guest_tr_ar_bytes),
537 FIELD(GUEST_INTERRUPTIBILITY_INFO, guest_interruptibility_info),
538 FIELD(GUEST_ACTIVITY_STATE, guest_activity_state),
539 FIELD(GUEST_SYSENTER_CS, guest_sysenter_cs),
540 FIELD(HOST_IA32_SYSENTER_CS, host_ia32_sysenter_cs),
541 FIELD(CR0_GUEST_HOST_MASK, cr0_guest_host_mask),
542 FIELD(CR4_GUEST_HOST_MASK, cr4_guest_host_mask),
543 FIELD(CR0_READ_SHADOW, cr0_read_shadow),
544 FIELD(CR4_READ_SHADOW, cr4_read_shadow),
545 FIELD(CR3_TARGET_VALUE0, cr3_target_value0),
546 FIELD(CR3_TARGET_VALUE1, cr3_target_value1),
547 FIELD(CR3_TARGET_VALUE2, cr3_target_value2),
548 FIELD(CR3_TARGET_VALUE3, cr3_target_value3),
549 FIELD(EXIT_QUALIFICATION, exit_qualification),
550 FIELD(GUEST_LINEAR_ADDRESS, guest_linear_address),
551 FIELD(GUEST_CR0, guest_cr0),
552 FIELD(GUEST_CR3, guest_cr3),
553 FIELD(GUEST_CR4, guest_cr4),
554 FIELD(GUEST_ES_BASE, guest_es_base),
555 FIELD(GUEST_CS_BASE, guest_cs_base),
556 FIELD(GUEST_SS_BASE, guest_ss_base),
557 FIELD(GUEST_DS_BASE, guest_ds_base),
558 FIELD(GUEST_FS_BASE, guest_fs_base),
559 FIELD(GUEST_GS_BASE, guest_gs_base),
560 FIELD(GUEST_LDTR_BASE, guest_ldtr_base),
561 FIELD(GUEST_TR_BASE, guest_tr_base),
562 FIELD(GUEST_GDTR_BASE, guest_gdtr_base),
563 FIELD(GUEST_IDTR_BASE, guest_idtr_base),
564 FIELD(GUEST_DR7, guest_dr7),
565 FIELD(GUEST_RSP, guest_rsp),
566 FIELD(GUEST_RIP, guest_rip),
567 FIELD(GUEST_RFLAGS, guest_rflags),
568 FIELD(GUEST_PENDING_DBG_EXCEPTIONS, guest_pending_dbg_exceptions),
569 FIELD(GUEST_SYSENTER_ESP, guest_sysenter_esp),
570 FIELD(GUEST_SYSENTER_EIP, guest_sysenter_eip),
571 FIELD(HOST_CR0, host_cr0),
572 FIELD(HOST_CR3, host_cr3),
573 FIELD(HOST_CR4, host_cr4),
576 FIELD(HOST_TR_BASE, host_tr_base),
577 FIELD(HOST_GDTR_BASE, host_gdtr_base),
578 FIELD(HOST_IDTR_BASE, host_idtr_base),
579 FIELD(HOST_IA32_SYSENTER_ESP, host_ia32_sysenter_esp),
580 FIELD(HOST_IA32_SYSENTER_EIP, host_ia32_sysenter_eip),
582 FIELD(HOST_RIP, host_rip),
584 static const int max_vmcs_field =
ARRAY_SIZE(vmcs_field_to_offset_table);
586 static inline short vmcs_field_to_offset(
unsigned long field)
588 if (field >= max_vmcs_field || vmcs_field_to_offset_table[field] == 0)
590 return vmcs_field_to_offset_table[
field];
595 return to_vmx(vcpu)->nested.current_vmcs12;
601 if (is_error_page(page))
607 static void nested_release_page(
struct page *page)
612 static void nested_release_page_clean(
struct page *page)
617 static u64 construct_eptp(
unsigned long root_hpa);
618 static void kvm_cpu_vmxon(
u64 addr);
619 static void kvm_cpu_vmxoff(
void);
620 static void vmx_set_cr3(
struct kvm_vcpu *vcpu,
unsigned long cr3);
621 static int vmx_set_tss_addr(
struct kvm *
kvm,
unsigned int addr);
622 static void vmx_set_segment(
struct kvm_vcpu *vcpu,
624 static void vmx_get_segment(
struct kvm_vcpu *vcpu,
636 static unsigned long *vmx_io_bitmap_a;
637 static unsigned long *vmx_io_bitmap_b;
638 static unsigned long *vmx_msr_bitmap_legacy;
639 static unsigned long *vmx_msr_bitmap_longmode;
641 static bool cpu_has_load_ia32_efer;
642 static bool cpu_has_load_perf_global_ctrl;
647 static struct vmcs_config {
651 u32 pin_based_exec_ctrl;
652 u32 cpu_based_exec_ctrl;
653 u32 cpu_based_2nd_exec_ctrl;
658 static struct vmx_capability {
663 #define VMX_SEGMENT_FIELD(seg) \
664 [VCPU_SREG_##seg] = { \
665 .selector = GUEST_##seg##_SELECTOR, \
666 .base = GUEST_##seg##_BASE, \
667 .limit = GUEST_##seg##_LIMIT, \
668 .ar_bytes = GUEST_##seg##_AR_BYTES, \
671 static const struct kvm_vmx_segment_field {
676 } kvm_vmx_segment_fields[] = {
687 static u64 host_efer;
689 static void ept_save_pdptrs(
struct kvm_vcpu *vcpu);
695 static const u32 vmx_msr_index[] = {
701 #define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index)
705 return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
706 INTR_INFO_VALID_MASK)) ==
707 (INTR_TYPE_HARD_EXCEPTION |
PF_VECTOR | INTR_INFO_VALID_MASK);
712 return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
713 INTR_INFO_VALID_MASK)) ==
714 (INTR_TYPE_HARD_EXCEPTION |
NM_VECTOR | INTR_INFO_VALID_MASK);
717 static inline bool is_invalid_opcode(
u32 intr_info)
719 return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
720 INTR_INFO_VALID_MASK)) ==
721 (INTR_TYPE_HARD_EXCEPTION |
UD_VECTOR | INTR_INFO_VALID_MASK);
724 static inline bool is_external_interrupt(
u32 intr_info)
726 return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK))
727 == (INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK);
730 static inline bool is_machine_check(
u32 intr_info)
732 return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
733 INTR_INFO_VALID_MASK)) ==
734 (INTR_TYPE_HARD_EXCEPTION |
MC_VECTOR | INTR_INFO_VALID_MASK);
737 static inline bool cpu_has_vmx_msr_bitmap(
void)
739 return vmcs_config.cpu_based_exec_ctrl & CPU_BASED_USE_MSR_BITMAPS;
742 static inline bool cpu_has_vmx_tpr_shadow(
void)
744 return vmcs_config.cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW;
747 static inline bool vm_need_tpr_shadow(
struct kvm *
kvm)
749 return (cpu_has_vmx_tpr_shadow()) && (irqchip_in_kernel(kvm));
752 static inline bool cpu_has_secondary_exec_ctrls(
void)
754 return vmcs_config.cpu_based_exec_ctrl &
755 CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
758 static inline bool cpu_has_vmx_virtualize_apic_accesses(
void)
760 return vmcs_config.cpu_based_2nd_exec_ctrl &
761 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
764 static inline bool cpu_has_vmx_flexpriority(
void)
766 return cpu_has_vmx_tpr_shadow() &&
767 cpu_has_vmx_virtualize_apic_accesses();
770 static inline bool cpu_has_vmx_ept_execute_only(
void)
772 return vmx_capability.ept & VMX_EPT_EXECUTE_ONLY_BIT;
775 static inline bool cpu_has_vmx_eptp_uncacheable(
void)
777 return vmx_capability.ept & VMX_EPTP_UC_BIT;
780 static inline bool cpu_has_vmx_eptp_writeback(
void)
782 return vmx_capability.ept & VMX_EPTP_WB_BIT;
785 static inline bool cpu_has_vmx_ept_2m_page(
void)
787 return vmx_capability.ept & VMX_EPT_2MB_PAGE_BIT;
790 static inline bool cpu_has_vmx_ept_1g_page(
void)
792 return vmx_capability.ept & VMX_EPT_1GB_PAGE_BIT;
795 static inline bool cpu_has_vmx_ept_4levels(
void)
797 return vmx_capability.ept & VMX_EPT_PAGE_WALK_4_BIT;
800 static inline bool cpu_has_vmx_ept_ad_bits(
void)
802 return vmx_capability.ept & VMX_EPT_AD_BIT;
805 static inline bool cpu_has_vmx_invept_individual_addr(
void)
807 return vmx_capability.ept & VMX_EPT_EXTENT_INDIVIDUAL_BIT;
810 static inline bool cpu_has_vmx_invept_context(
void)
812 return vmx_capability.ept & VMX_EPT_EXTENT_CONTEXT_BIT;
815 static inline bool cpu_has_vmx_invept_global(
void)
817 return vmx_capability.ept & VMX_EPT_EXTENT_GLOBAL_BIT;
820 static inline bool cpu_has_vmx_invvpid_single(
void)
822 return vmx_capability.vpid & VMX_VPID_EXTENT_SINGLE_CONTEXT_BIT;
825 static inline bool cpu_has_vmx_invvpid_global(
void)
827 return vmx_capability.vpid & VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT;
830 static inline bool cpu_has_vmx_ept(
void)
832 return vmcs_config.cpu_based_2nd_exec_ctrl &
833 SECONDARY_EXEC_ENABLE_EPT;
836 static inline bool cpu_has_vmx_unrestricted_guest(
void)
838 return vmcs_config.cpu_based_2nd_exec_ctrl &
839 SECONDARY_EXEC_UNRESTRICTED_GUEST;
842 static inline bool cpu_has_vmx_ple(
void)
844 return vmcs_config.cpu_based_2nd_exec_ctrl &
845 SECONDARY_EXEC_PAUSE_LOOP_EXITING;
848 static inline bool vm_need_virtualize_apic_accesses(
struct kvm *kvm)
850 return flexpriority_enabled && irqchip_in_kernel(kvm);
853 static inline bool cpu_has_vmx_vpid(
void)
855 return vmcs_config.cpu_based_2nd_exec_ctrl &
856 SECONDARY_EXEC_ENABLE_VPID;
859 static inline bool cpu_has_vmx_rdtscp(
void)
861 return vmcs_config.cpu_based_2nd_exec_ctrl &
862 SECONDARY_EXEC_RDTSCP;
865 static inline bool cpu_has_vmx_invpcid(
void)
867 return vmcs_config.cpu_based_2nd_exec_ctrl &
868 SECONDARY_EXEC_ENABLE_INVPCID;
871 static inline bool cpu_has_virtual_nmis(
void)
873 return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS;
876 static inline bool cpu_has_vmx_wbinvd_exit(
void)
878 return vmcs_config.cpu_based_2nd_exec_ctrl &
879 SECONDARY_EXEC_WBINVD_EXITING;
882 static inline bool report_flexpriority(
void)
884 return flexpriority_enabled;
895 CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) &&
899 static inline bool nested_cpu_has_virtual_nmis(
struct vmcs12 *vmcs12,
905 static inline bool is_exception(
u32 intr_info)
907 return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK))
908 == (INTR_TYPE_HARD_EXCEPTION | INTR_INFO_VALID_MASK);
911 static void nested_vmx_vmexit(
struct kvm_vcpu *vcpu);
912 static void nested_vmx_entry_failure(
struct kvm_vcpu *vcpu,
913 struct vmcs12 *vmcs12,
914 u32 reason,
unsigned long qualification);
916 static int __find_msr_index(
struct vcpu_vmx *vmx,
u32 msr)
920 for (i = 0; i < vmx->
nmsrs; ++
i)
921 if (vmx_msr_index[vmx->
guest_msrs[i].index] == msr)
926 static inline void __invvpid(
int ext,
u16 vpid,
gva_t gva)
934 asm volatile (
__ex(ASM_VMX_INVVPID)
937 : :
"a"(&
operand),
"c"(ext) :
"cc",
"memory");
940 static inline void __invept(
int ext,
u64 eptp,
gpa_t gpa)
946 asm volatile (
__ex(ASM_VMX_INVEPT)
948 "; ja 1f ; ud2 ; 1:\n"
949 : :
"a" (&
operand),
"c" (ext) :
"cc",
"memory");
956 i = __find_msr_index(vmx, msr);
962 static void vmcs_clear(
struct vmcs *
vmcs)
967 asm volatile (
__ex(ASM_VMX_VMCLEAR_RAX)
"; setna %0"
977 vmcs_clear(loaded_vmcs->
vmcs);
978 loaded_vmcs->
cpu = -1;
982 static void vmcs_load(
struct vmcs *vmcs)
987 asm volatile (
__ex(ASM_VMX_VMPTRLD_RAX)
"; setna %0"
995 static void __loaded_vmcs_clear(
void *
arg)
1000 if (loaded_vmcs->
cpu != cpu)
1002 if (
per_cpu(current_vmcs, cpu) == loaded_vmcs->
vmcs)
1005 loaded_vmcs_init(loaded_vmcs);
1008 static void loaded_vmcs_clear(
struct loaded_vmcs *loaded_vmcs)
1010 if (loaded_vmcs->
cpu != -1)
1012 loaded_vmcs->
cpu, __loaded_vmcs_clear, loaded_vmcs, 1);
1015 static inline void vpid_sync_vcpu_single(
struct vcpu_vmx *vmx)
1020 if (cpu_has_vmx_invvpid_single())
1021 __invvpid(VMX_VPID_EXTENT_SINGLE_CONTEXT, vmx->
vpid, 0);
1024 static inline void vpid_sync_vcpu_global(
void)
1026 if (cpu_has_vmx_invvpid_global())
1027 __invvpid(VMX_VPID_EXTENT_ALL_CONTEXT, 0, 0);
1030 static inline void vpid_sync_context(
struct vcpu_vmx *vmx)
1032 if (cpu_has_vmx_invvpid_single())
1033 vpid_sync_vcpu_single(vmx);
1035 vpid_sync_vcpu_global();
1038 static inline void ept_sync_global(
void)
1040 if (cpu_has_vmx_invept_global())
1041 __invept(VMX_EPT_EXTENT_GLOBAL, 0, 0);
1044 static inline void ept_sync_context(
u64 eptp)
1047 if (cpu_has_vmx_invept_context())
1048 __invept(VMX_EPT_EXTENT_CONTEXT, eptp, 0);
1054 static inline void ept_sync_individual_addr(
u64 eptp,
gpa_t gpa)
1057 if (cpu_has_vmx_invept_individual_addr())
1058 __invept(VMX_EPT_EXTENT_INDIVIDUAL_ADDR,
1061 ept_sync_context(eptp);
1067 unsigned long value;
1069 asm volatile (
__ex_clear(ASM_VMX_VMREAD_RDX_RAX,
"%0")
1070 :
"=a"(value) :
"d"(field) :
"cc");
1076 return vmcs_readl(field);
1081 return vmcs_readl(field);
1086 #ifdef CONFIG_X86_64
1087 return vmcs_readl(field);
1089 return vmcs_readl(field) | ((
u64)vmcs_readl(field+1) << 32);
1093 static noinline void vmwrite_error(
unsigned long field,
unsigned long value)
1096 field, value, vmcs_read32(VM_INSTRUCTION_ERROR));
1100 static void vmcs_writel(
unsigned long field,
unsigned long value)
1104 asm volatile (
__ex(ASM_VMX_VMWRITE_RAX_RDX)
"; setna %0"
1105 :
"=q"(
error) :
"a"(value),
"d"(
field) :
"cc");
1107 vmwrite_error(field, value);
1110 static void vmcs_write16(
unsigned long field,
u16 value)
1112 vmcs_writel(field, value);
1115 static void vmcs_write32(
unsigned long field,
u32 value)
1117 vmcs_writel(field, value);
1120 static void vmcs_write64(
unsigned long field,
u64 value)
1122 vmcs_writel(field, value);
1123 #ifndef CONFIG_X86_64
1125 vmcs_writel(field+1, value >> 32);
1129 static void vmcs_clear_bits(
unsigned long field,
u32 mask)
1131 vmcs_writel(field, vmcs_readl(field) & ~mask);
1134 static void vmcs_set_bits(
unsigned long field,
u32 mask)
1136 vmcs_writel(field, vmcs_readl(field) | mask);
1139 static void vmx_segment_cache_clear(
struct vcpu_vmx *vmx)
1144 static bool vmx_segment_cache_test_set(
struct vcpu_vmx *vmx,
unsigned seg,
1159 static u16 vmx_read_guest_seg_selector(
struct vcpu_vmx *vmx,
unsigned seg)
1164 *p = vmcs_read16(kvm_vmx_segment_fields[seg].
selector);
1168 static ulong vmx_read_guest_seg_base(
struct vcpu_vmx *vmx,
unsigned seg)
1173 *p = vmcs_readl(kvm_vmx_segment_fields[seg].base);
1177 static u32 vmx_read_guest_seg_limit(
struct vcpu_vmx *vmx,
unsigned seg)
1182 *p = vmcs_read32(kvm_vmx_segment_fields[seg].
limit);
1186 static u32 vmx_read_guest_seg_ar(
struct vcpu_vmx *vmx,
unsigned seg)
1190 if (!vmx_segment_cache_test_set(vmx, seg,
SEG_FIELD_AR))
1191 *p = vmcs_read32(kvm_vmx_segment_fields[seg].ar_bytes);
1195 static void update_exception_bitmap(
struct kvm_vcpu *vcpu)
1205 if (to_vmx(vcpu)->rmode.vm86_active)
1217 if (is_guest_mode(vcpu))
1218 eb |= get_vmcs12(vcpu)->exception_bitmap;
1220 vmcs_write32(EXCEPTION_BITMAP, eb);
1223 static void clear_atomic_switch_msr_special(
unsigned long entry,
1226 vmcs_clear_bits(VM_ENTRY_CONTROLS, entry);
1227 vmcs_clear_bits(VM_EXIT_CONTROLS, exit);
1230 static void clear_atomic_switch_msr(
struct vcpu_vmx *vmx,
unsigned msr)
1237 if (cpu_has_load_ia32_efer) {
1238 clear_atomic_switch_msr_special(VM_ENTRY_LOAD_IA32_EFER,
1239 VM_EXIT_LOAD_IA32_EFER);
1244 if (cpu_has_load_perf_global_ctrl) {
1245 clear_atomic_switch_msr_special(
1246 VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL,
1247 VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL);
1253 for (i = 0; i < m->nr; ++
i)
1254 if (m->guest[i].index == msr)
1260 m->guest[
i] = m->guest[m->nr];
1261 m->host[
i] = m->host[m->nr];
1262 vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->nr);
1263 vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->nr);
1266 static void add_atomic_switch_msr_special(
unsigned long entry,
1267 unsigned long exit,
unsigned long guest_val_vmcs,
1268 unsigned long host_val_vmcs,
u64 guest_val,
u64 host_val)
1270 vmcs_write64(guest_val_vmcs, guest_val);
1271 vmcs_write64(host_val_vmcs, host_val);
1272 vmcs_set_bits(VM_ENTRY_CONTROLS, entry);
1273 vmcs_set_bits(VM_EXIT_CONTROLS, exit);
1276 static void add_atomic_switch_msr(
struct vcpu_vmx *vmx,
unsigned msr,
1277 u64 guest_val,
u64 host_val)
1284 if (cpu_has_load_ia32_efer) {
1285 add_atomic_switch_msr_special(VM_ENTRY_LOAD_IA32_EFER,
1286 VM_EXIT_LOAD_IA32_EFER,
1289 guest_val, host_val);
1294 if (cpu_has_load_perf_global_ctrl) {
1295 add_atomic_switch_msr_special(
1296 VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL,
1297 VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL,
1298 GUEST_IA32_PERF_GLOBAL_CTRL,
1299 HOST_IA32_PERF_GLOBAL_CTRL,
1300 guest_val, host_val);
1306 for (i = 0; i < m->nr; ++
i)
1307 if (m->guest[i].index == msr)
1312 "Can't add msr %x\n", msr);
1314 }
else if (i == m->nr) {
1316 vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->nr);
1317 vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->nr);
1320 m->guest[
i].index = msr;
1321 m->guest[
i].value = guest_val;
1322 m->host[
i].index = msr;
1323 m->host[
i].value = host_val;
1326 static void reload_tss(
void)
1339 static bool update_transition_efer(
struct vcpu_vmx *vmx,
int efer_offset)
1344 guest_efer = vmx->
vcpu.arch.efer;
1351 #ifdef CONFIG_X86_64
1357 guest_efer &= ~ignore_bits;
1358 guest_efer |= host_efer & ignore_bits;
1359 vmx->
guest_msrs[efer_offset].data = guest_efer;
1360 vmx->
guest_msrs[efer_offset].mask = ~ignore_bits;
1362 clear_atomic_switch_msr(vmx,
MSR_EFER);
1364 if (enable_ept && ((vmx->
vcpu.arch.efer ^ host_efer) &
EFER_NX)) {
1365 guest_efer = vmx->
vcpu.arch.efer;
1366 if (!(guest_efer & EFER_LMA))
1368 add_atomic_switch_msr(vmx,
MSR_EFER, guest_efer, host_efer);
1379 unsigned long table_base;
1382 if (!(selector & ~3))
1388 u16 ldt_selector = kvm_read_ldt();
1390 if (!(ldt_selector & ~3))
1393 table_base = segment_base(ldt_selector);
1395 d = (
struct desc_struct *)(table_base + (selector & ~7));
1396 v = get_desc_base(d);
1397 #ifdef CONFIG_X86_64
1398 if (d->
s == 0 && (d->
type == 2 || d->
type == 9 || d->
type == 11))
1404 static inline unsigned long kvm_read_tr_base(
void)
1407 asm(
"str %0" :
"=g"(tr));
1408 return segment_base(tr);
1411 static void vmx_save_host_state(
struct kvm_vcpu *vcpu)
1413 struct vcpu_vmx *vmx = to_vmx(vcpu);
1428 vmcs_write16(HOST_FS_SELECTOR, vmx->
host_state.fs_sel);
1431 vmcs_write16(HOST_FS_SELECTOR, 0);
1436 vmcs_write16(HOST_GS_SELECTOR, vmx->
host_state.gs_sel);
1438 vmcs_write16(HOST_GS_SELECTOR, 0);
1442 #ifdef CONFIG_X86_64
1447 #ifdef CONFIG_X86_64
1455 #ifdef CONFIG_X86_64
1457 if (is_long_mode(&vmx->
vcpu))
1466 static void __vmx_load_host_state(
struct vcpu_vmx *vmx)
1471 ++vmx->
vcpu.stat.host_state_reload;
1473 #ifdef CONFIG_X86_64
1474 if (is_long_mode(&vmx->
vcpu))
1479 #ifdef CONFIG_X86_64
1487 #ifdef CONFIG_X86_64
1494 #ifdef CONFIG_X86_64
1501 if (!user_has_fpu() && !vmx->
vcpu.guest_fpu_loaded)
1506 static void vmx_load_host_state(
struct vcpu_vmx *vmx)
1509 __vmx_load_host_state(vmx);
1517 static void vmx_vcpu_load(
struct kvm_vcpu *vcpu,
int cpu)
1519 struct vcpu_vmx *vmx = to_vmx(vcpu);
1523 kvm_cpu_vmxon(phys_addr);
1534 unsigned long sysenter_esp;
1538 list_add(&vmx->
loaded_vmcs->loaded_vmcss_on_cpu_link,
1539 &
per_cpu(loaded_vmcss_on_cpu, cpu));
1546 vmcs_writel(HOST_TR_BASE, kvm_read_tr_base());
1547 vmcs_writel(HOST_GDTR_BASE, gdt->
address);
1550 vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp);
1555 static void vmx_vcpu_put(
struct kvm_vcpu *vcpu)
1557 __vmx_load_host_state(to_vmx(vcpu));
1558 if (!vmm_exclusive) {
1559 __loaded_vmcs_clear(to_vmx(vcpu)->loaded_vmcs);
1565 static void vmx_fpu_activate(
struct kvm_vcpu *vcpu)
1572 cr0 = vmcs_readl(GUEST_CR0);
1575 vmcs_writel(GUEST_CR0, cr0);
1576 update_exception_bitmap(vcpu);
1578 if (is_guest_mode(vcpu))
1579 vcpu->
arch.cr0_guest_owned_bits &=
1580 ~get_vmcs12(vcpu)->cr0_guest_host_mask;
1581 vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->
arch.cr0_guest_owned_bits);
1584 static void vmx_decache_cr0_guest_bits(
struct kvm_vcpu *vcpu);
1591 static inline unsigned long nested_read_cr0(
struct vmcs12 *
fields)
1596 static inline unsigned long nested_read_cr4(
struct vmcs12 *fields)
1602 static void vmx_fpu_deactivate(
struct kvm_vcpu *vcpu)
1607 vmx_decache_cr0_guest_bits(vcpu);
1609 update_exception_bitmap(vcpu);
1610 vcpu->
arch.cr0_guest_owned_bits = 0;
1611 vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->
arch.cr0_guest_owned_bits);
1612 if (is_guest_mode(vcpu)) {
1621 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
1624 vmcs_writel(CR0_READ_SHADOW, nested_read_cr0(vmcs12));
1626 vmcs_writel(CR0_READ_SHADOW, vcpu->
arch.cr0);
1629 static unsigned long vmx_get_rflags(
struct kvm_vcpu *vcpu)
1631 unsigned long rflags, save_rflags;
1635 rflags = vmcs_readl(GUEST_RFLAGS);
1636 if (to_vmx(vcpu)->rmode.vm86_active) {
1638 save_rflags = to_vmx(vcpu)->rmode.save_rflags;
1641 to_vmx(vcpu)->rflags =
rflags;
1643 return to_vmx(vcpu)->rflags;
1646 static void vmx_set_rflags(
struct kvm_vcpu *vcpu,
unsigned long rflags)
1650 to_vmx(vcpu)->rflags =
rflags;
1651 if (to_vmx(vcpu)->rmode.vm86_active) {
1652 to_vmx(vcpu)->rmode.save_rflags =
rflags;
1655 vmcs_writel(GUEST_RFLAGS, rflags);
1658 static u32 vmx_get_interrupt_shadow(
struct kvm_vcpu *vcpu,
int mask)
1660 u32 interruptibility = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
1663 if (interruptibility & GUEST_INTR_STATE_STI)
1665 if (interruptibility & GUEST_INTR_STATE_MOV_SS)
1671 static void vmx_set_interrupt_shadow(
struct kvm_vcpu *vcpu,
int mask)
1673 u32 interruptibility_old = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
1674 u32 interruptibility = interruptibility_old;
1676 interruptibility &= ~(GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS);
1679 interruptibility |= GUEST_INTR_STATE_MOV_SS;
1681 interruptibility |= GUEST_INTR_STATE_STI;
1683 if ((interruptibility != interruptibility_old))
1684 vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, interruptibility);
1687 static void skip_emulated_instruction(
struct kvm_vcpu *vcpu)
1691 rip = kvm_rip_read(vcpu);
1692 rip += vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
1693 kvm_rip_write(vcpu, rip);
1696 vmx_set_interrupt_shadow(vcpu, 0);
1706 static int nested_pf_handled(
struct kvm_vcpu *vcpu)
1708 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
1714 nested_vmx_vmexit(vcpu);
1718 static void vmx_queue_exception(
struct kvm_vcpu *vcpu,
unsigned nr,
1722 struct vcpu_vmx *vmx = to_vmx(vcpu);
1723 u32 intr_info = nr | INTR_INFO_VALID_MASK;
1725 if (nr ==
PF_VECTOR && is_guest_mode(vcpu) &&
1726 nested_pf_handled(vcpu))
1729 if (has_error_code) {
1730 vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code);
1731 intr_info |= INTR_INFO_DELIVER_CODE_MASK;
1734 if (vmx->
rmode.vm86_active) {
1736 if (kvm_exception_is_soft(nr))
1737 inc_eip = vcpu->
arch.event_exit_inst_len;
1743 if (kvm_exception_is_soft(nr)) {
1744 vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
1745 vmx->
vcpu.arch.event_exit_inst_len);
1746 intr_info |= INTR_TYPE_SOFT_EXCEPTION;
1748 intr_info |= INTR_TYPE_HARD_EXCEPTION;
1750 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info);
1753 static bool vmx_rdtscp_supported(
void)
1755 return cpu_has_vmx_rdtscp();
1758 static bool vmx_invpcid_supported(
void)
1760 return cpu_has_vmx_invpcid() && enable_ept;
1766 static void move_msr_up(
struct vcpu_vmx *vmx,
int from,
int to)
1780 static void setup_msrs(
struct vcpu_vmx *vmx)
1782 int save_nmsrs,
index;
1783 unsigned long *msr_bitmap;
1786 #ifdef CONFIG_X86_64
1787 if (is_long_mode(&vmx->
vcpu)) {
1790 move_msr_up(vmx, index, save_nmsrs++);
1791 index = __find_msr_index(vmx,
MSR_LSTAR);
1793 move_msr_up(vmx, index, save_nmsrs++);
1794 index = __find_msr_index(vmx,
MSR_CSTAR);
1796 move_msr_up(vmx, index, save_nmsrs++);
1799 move_msr_up(vmx, index, save_nmsrs++);
1804 index = __find_msr_index(vmx,
MSR_STAR);
1806 move_msr_up(vmx, index, save_nmsrs++);
1809 index = __find_msr_index(vmx,
MSR_EFER);
1810 if (index >= 0 && update_transition_efer(vmx, index))
1811 move_msr_up(vmx, index, save_nmsrs++);
1815 if (cpu_has_vmx_msr_bitmap()) {
1816 if (is_long_mode(&vmx->
vcpu))
1817 msr_bitmap = vmx_msr_bitmap_longmode;
1819 msr_bitmap = vmx_msr_bitmap_legacy;
1821 vmcs_write64(MSR_BITMAP,
__pa(msr_bitmap));
1829 static u64 guest_read_tsc(
void)
1831 u64 host_tsc, tsc_offset;
1834 tsc_offset = vmcs_read64(TSC_OFFSET);
1835 return host_tsc + tsc_offset;
1844 u64 host_tsc, tsc_offset;
1847 tsc_offset = is_guest_mode(vcpu) ?
1848 to_vmx(vcpu)->nested.vmcs01_tsc_offset :
1849 vmcs_read64(TSC_OFFSET);
1850 return host_tsc + tsc_offset;
1857 static void vmx_set_tsc_khz(
struct kvm_vcpu *vcpu,
u32 user_tsc_khz,
bool scale)
1863 vcpu->
arch.tsc_catchup = 1;
1864 vcpu->
arch.tsc_always_catchup = 1;
1866 WARN(1,
"user requested TSC rate below hardware speed\n");
1874 if (is_guest_mode(vcpu)) {
1881 struct vmcs12 *vmcs12;
1882 to_vmx(vcpu)->nested.vmcs01_tsc_offset =
offset;
1884 vmcs12 = get_vmcs12(vcpu);
1885 vmcs_write64(TSC_OFFSET, offset +
1886 (nested_cpu_has(vmcs12, CPU_BASED_USE_TSC_OFFSETING) ?
1889 vmcs_write64(TSC_OFFSET, offset);
1893 static void vmx_adjust_tsc_offset(
struct kvm_vcpu *vcpu,
s64 adjustment,
bool host)
1895 u64 offset = vmcs_read64(TSC_OFFSET);
1896 vmcs_write64(TSC_OFFSET, offset + adjustment);
1897 if (is_guest_mode(vcpu)) {
1899 to_vmx(vcpu)->nested.vmcs01_tsc_offset += adjustment;
1903 static u64 vmx_compute_tsc_offset(
struct kvm_vcpu *vcpu,
u64 target_tsc)
1908 static bool guest_cpuid_has_vmx(
struct kvm_vcpu *vcpu)
1920 static inline bool nested_vmx_allowed(
struct kvm_vcpu *vcpu)
1922 return nested && guest_cpuid_has_vmx(vcpu);
1937 static u32 nested_vmx_procbased_ctls_low, nested_vmx_procbased_ctls_high;
1938 static u32 nested_vmx_secondary_ctls_low, nested_vmx_secondary_ctls_high;
1939 static u32 nested_vmx_pinbased_ctls_low, nested_vmx_pinbased_ctls_high;
1940 static u32 nested_vmx_exit_ctls_low, nested_vmx_exit_ctls_high;
1941 static u32 nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high;
1942 static __init void nested_vmx_setup_ctls_msrs(
void)
1964 nested_vmx_pinbased_ctls_low = 0x16 ;
1965 nested_vmx_pinbased_ctls_high = 0x16 |
1966 PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING |
1967 PIN_BASED_VIRTUAL_NMIS;
1970 nested_vmx_exit_ctls_low = 0;
1972 #ifdef CONFIG_X86_64
1973 nested_vmx_exit_ctls_high = VM_EXIT_HOST_ADDR_SPACE_SIZE;
1975 nested_vmx_exit_ctls_high = 0;
1980 nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high);
1981 nested_vmx_entry_ctls_low = 0;
1982 nested_vmx_entry_ctls_high &=
1983 VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_IA32E_MODE;
1987 nested_vmx_procbased_ctls_low, nested_vmx_procbased_ctls_high);
1988 nested_vmx_procbased_ctls_low = 0;
1989 nested_vmx_procbased_ctls_high &=
1990 CPU_BASED_VIRTUAL_INTR_PENDING | CPU_BASED_USE_TSC_OFFSETING |
1991 CPU_BASED_HLT_EXITING | CPU_BASED_INVLPG_EXITING |
1992 CPU_BASED_MWAIT_EXITING | CPU_BASED_CR3_LOAD_EXITING |
1993 CPU_BASED_CR3_STORE_EXITING |
1994 #ifdef CONFIG_X86_64
1995 CPU_BASED_CR8_LOAD_EXITING | CPU_BASED_CR8_STORE_EXITING |
1997 CPU_BASED_MOV_DR_EXITING | CPU_BASED_UNCOND_IO_EXITING |
1998 CPU_BASED_USE_IO_BITMAPS | CPU_BASED_MONITOR_EXITING |
1999 CPU_BASED_RDPMC_EXITING | CPU_BASED_RDTSC_EXITING |
2000 CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
2007 nested_vmx_procbased_ctls_high |= CPU_BASED_USE_MSR_BITMAPS;
2011 nested_vmx_secondary_ctls_low, nested_vmx_secondary_ctls_high);
2012 nested_vmx_secondary_ctls_low = 0;
2013 nested_vmx_secondary_ctls_high &=
2014 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
2022 return ((control & high) | low) ==
control;
2025 static inline u64 vmx_control_msr(
u32 low,
u32 high)
2027 return low | ((
u64)high << 32);
2049 switch (msr_index) {
2066 *pdata = vmx_control_msr(nested_vmx_pinbased_ctls_low,
2067 nested_vmx_pinbased_ctls_high);
2071 *pdata = vmx_control_msr(nested_vmx_procbased_ctls_low,
2072 nested_vmx_procbased_ctls_high);
2076 *pdata = vmx_control_msr(nested_vmx_exit_ctls_low,
2077 nested_vmx_exit_ctls_high);
2081 *pdata = vmx_control_msr(nested_vmx_entry_ctls_low,
2082 nested_vmx_entry_ctls_high);
2092 #define VMXON_CR0_ALWAYSON (X86_CR0_PE | X86_CR0_PG | X86_CR0_NE)
2093 #define VMXON_CR4_ALWAYSON X86_CR4_VMXE
2110 *pdata = vmx_control_msr(nested_vmx_secondary_ctls_low,
2111 nested_vmx_secondary_ctls_high);
2126 if (!nested_vmx_allowed(vcpu))
2144 static int vmx_get_msr(
struct kvm_vcpu *vcpu,
u32 msr_index,
u64 *pdata)
2154 switch (msr_index) {
2155 #ifdef CONFIG_X86_64
2157 data = vmcs_readl(GUEST_FS_BASE);
2160 data = vmcs_readl(GUEST_GS_BASE);
2163 vmx_load_host_state(to_vmx(vcpu));
2164 data = to_vmx(vcpu)->msr_guest_kernel_gs_base;
2170 data = guest_read_tsc();
2173 data = vmcs_read32(GUEST_SYSENTER_CS);
2176 data = vmcs_readl(GUEST_SYSENTER_EIP);
2179 data = vmcs_readl(GUEST_SYSENTER_ESP);
2182 if (!to_vmx(vcpu)->rdtscp_enabled)
2186 if (vmx_get_vmx_msr(vcpu, msr_index, pdata))
2188 msr = find_msr_entry(to_vmx(vcpu), msr_index);
2205 static int vmx_set_msr(
struct kvm_vcpu *vcpu,
u32 msr_index,
u64 data)
2207 struct vcpu_vmx *vmx = to_vmx(vcpu);
2211 switch (msr_index) {
2215 #ifdef CONFIG_X86_64
2217 vmx_segment_cache_clear(vmx);
2218 vmcs_writel(GUEST_FS_BASE, data);
2221 vmx_segment_cache_clear(vmx);
2222 vmcs_writel(GUEST_GS_BASE, data);
2225 vmx_load_host_state(vmx);
2226 vmx->msr_guest_kernel_gs_base =
data;
2230 vmcs_write32(GUEST_SYSENTER_CS, data);
2233 vmcs_writel(GUEST_SYSENTER_EIP, data);
2236 vmcs_writel(GUEST_SYSENTER_ESP, data);
2242 if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
2243 vmcs_write64(GUEST_IA32_PAT, data);
2253 if ((data >> 32) != 0)
2257 if (vmx_set_vmx_msr(vcpu, msr_index, data))
2259 msr = find_msr_entry(vmx, msr_index);
2288 ept_save_pdptrs(vcpu);
2295 static __init int cpu_has_kvm_support(
void)
2297 return cpu_has_vmx();
2300 static __init int vmx_disabled_by_bios(
void)
2312 && (msr & FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX)
2315 "activate TXT before enabling KVM\n");
2319 if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX)
2327 static void kvm_cpu_vmxon(
u64 addr)
2329 asm volatile (ASM_VMX_VMXON_RAX
2330 : :
"a"(&
addr),
"m"(addr)
2334 static int hardware_enable(
void *garbage)
2343 INIT_LIST_HEAD(&
per_cpu(loaded_vmcss_on_cpu, cpu));
2351 if ((old & test_bits) != test_bits) {
2357 if (vmm_exclusive) {
2358 kvm_cpu_vmxon(phys_addr);
2367 static void vmclear_local_loaded_vmcss(
void)
2370 struct loaded_vmcs *
v, *
n;
2374 __loaded_vmcs_clear(v);
2381 static
void kvm_cpu_vmxoff(
void)
2383 asm volatile (
__ex(ASM_VMX_VMXOFF) : : :
"cc");
2386 static void hardware_disable(
void *garbage)
2388 if (vmm_exclusive) {
2389 vmclear_local_loaded_vmcss();
2395 static __init int adjust_vmx_controls(
u32 ctl_min,
u32 ctl_opt,
2398 u32 vmx_msr_low, vmx_msr_high;
2399 u32 ctl = ctl_min | ctl_opt;
2401 rdmsr(msr, vmx_msr_low, vmx_msr_high);
2403 ctl &= vmx_msr_high;
2416 u32 vmx_msr_low, vmx_msr_high;
2418 rdmsr(msr, vmx_msr_low, vmx_msr_high);
2419 return vmx_msr_high &
ctl;
2422 static __init int setup_vmcs_config(
struct vmcs_config *vmcs_conf)
2424 u32 vmx_msr_low, vmx_msr_high;
2426 u32 _pin_based_exec_control = 0;
2427 u32 _cpu_based_exec_control = 0;
2428 u32 _cpu_based_2nd_exec_control = 0;
2429 u32 _vmexit_control = 0;
2430 u32 _vmentry_control = 0;
2432 min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING;
2433 opt = PIN_BASED_VIRTUAL_NMIS;
2435 &_pin_based_exec_control) < 0)
2438 min = CPU_BASED_HLT_EXITING |
2439 #ifdef CONFIG_X86_64
2440 CPU_BASED_CR8_LOAD_EXITING |
2441 CPU_BASED_CR8_STORE_EXITING |
2443 CPU_BASED_CR3_LOAD_EXITING |
2444 CPU_BASED_CR3_STORE_EXITING |
2445 CPU_BASED_USE_IO_BITMAPS |
2446 CPU_BASED_MOV_DR_EXITING |
2447 CPU_BASED_USE_TSC_OFFSETING |
2448 CPU_BASED_MWAIT_EXITING |
2449 CPU_BASED_MONITOR_EXITING |
2450 CPU_BASED_INVLPG_EXITING |
2451 CPU_BASED_RDPMC_EXITING;
2453 opt = CPU_BASED_TPR_SHADOW |
2454 CPU_BASED_USE_MSR_BITMAPS |
2455 CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
2457 &_cpu_based_exec_control) < 0)
2459 #ifdef CONFIG_X86_64
2460 if ((_cpu_based_exec_control & CPU_BASED_TPR_SHADOW))
2461 _cpu_based_exec_control &= ~CPU_BASED_CR8_LOAD_EXITING &
2462 ~CPU_BASED_CR8_STORE_EXITING;
2464 if (_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) {
2466 opt2 = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
2467 SECONDARY_EXEC_WBINVD_EXITING |
2468 SECONDARY_EXEC_ENABLE_VPID |
2469 SECONDARY_EXEC_ENABLE_EPT |
2470 SECONDARY_EXEC_UNRESTRICTED_GUEST |
2471 SECONDARY_EXEC_PAUSE_LOOP_EXITING |
2472 SECONDARY_EXEC_RDTSCP |
2473 SECONDARY_EXEC_ENABLE_INVPCID;
2474 if (adjust_vmx_controls(min2, opt2,
2476 &_cpu_based_2nd_exec_control) < 0)
2479 #ifndef CONFIG_X86_64
2480 if (!(_cpu_based_2nd_exec_control &
2481 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES))
2482 _cpu_based_exec_control &= ~CPU_BASED_TPR_SHADOW;
2484 if (_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_EPT) {
2487 _cpu_based_exec_control &= ~(CPU_BASED_CR3_LOAD_EXITING |
2488 CPU_BASED_CR3_STORE_EXITING |
2489 CPU_BASED_INVLPG_EXITING);
2491 vmx_capability.ept, vmx_capability.vpid);
2495 #ifdef CONFIG_X86_64
2496 min |= VM_EXIT_HOST_ADDR_SPACE_SIZE;
2498 opt = VM_EXIT_SAVE_IA32_PAT | VM_EXIT_LOAD_IA32_PAT;
2500 &_vmexit_control) < 0)
2504 opt = VM_ENTRY_LOAD_IA32_PAT;
2506 &_vmentry_control) < 0)
2512 if ((vmx_msr_high & 0x1fff) >
PAGE_SIZE)
2515 #ifdef CONFIG_X86_64
2517 if (vmx_msr_high & (1
u<<16))
2522 if (((vmx_msr_high >> 18) & 15) != 6)
2525 vmcs_conf->size = vmx_msr_high & 0x1fff;
2526 vmcs_conf->order =
get_order(vmcs_config.size);
2527 vmcs_conf->revision_id = vmx_msr_low;
2529 vmcs_conf->pin_based_exec_ctrl = _pin_based_exec_control;
2530 vmcs_conf->cpu_based_exec_ctrl = _cpu_based_exec_control;
2531 vmcs_conf->cpu_based_2nd_exec_ctrl = _cpu_based_2nd_exec_control;
2532 vmcs_conf->vmexit_ctrl = _vmexit_control;
2533 vmcs_conf->vmentry_ctrl = _vmentry_control;
2535 cpu_has_load_ia32_efer =
2537 VM_ENTRY_LOAD_IA32_EFER)
2539 VM_EXIT_LOAD_IA32_EFER);
2541 cpu_has_load_perf_global_ctrl =
2543 VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL)
2545 VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL);
2561 if (cpu_has_load_perf_global_ctrl &&
boot_cpu_data.x86 == 0x6) {
2568 cpu_has_load_perf_global_ctrl =
false;
2570 "does not work properly. Using workaround\n");
2580 static struct vmcs *alloc_vmcs_cpu(
int cpu)
2586 pages = alloc_pages_exact_node(node,
GFP_KERNEL, vmcs_config.order);
2590 memset(vmcs, 0, vmcs_config.size);
2595 static struct vmcs *alloc_vmcs(
void)
2600 static void free_vmcs(
struct vmcs *vmcs)
2602 free_pages((
unsigned long)vmcs, vmcs_config.order);
2608 static void free_loaded_vmcs(
struct loaded_vmcs *loaded_vmcs)
2610 if (!loaded_vmcs->
vmcs)
2612 loaded_vmcs_clear(loaded_vmcs);
2613 free_vmcs(loaded_vmcs->
vmcs);
2617 static void free_kvm_area(
void)
2622 free_vmcs(
per_cpu(vmxarea, cpu));
2627 static __init int alloc_kvm_area(
void)
2634 vmcs = alloc_vmcs_cpu(cpu);
2645 static __init int hardware_setup(
void)
2647 if (setup_vmcs_config(&vmcs_config) < 0)
2653 if (!cpu_has_vmx_vpid())
2656 if (!cpu_has_vmx_ept() ||
2657 !cpu_has_vmx_ept_4levels()) {
2659 enable_unrestricted_guest = 0;
2660 enable_ept_ad_bits = 0;
2663 if (!cpu_has_vmx_ept_ad_bits())
2664 enable_ept_ad_bits = 0;
2666 if (!cpu_has_vmx_unrestricted_guest())
2667 enable_unrestricted_guest = 0;
2669 if (!cpu_has_vmx_flexpriority())
2670 flexpriority_enabled = 0;
2672 if (!cpu_has_vmx_tpr_shadow())
2675 if (enable_ept && !cpu_has_vmx_ept_2m_page())
2678 if (!cpu_has_vmx_ple())
2682 nested_vmx_setup_ctls_msrs();
2684 return alloc_kvm_area();
2687 static __exit void hardware_unsetup(
void)
2694 const struct kvm_vmx_segment_field *
sf = &kvm_vmx_segment_fields[
seg];
2697 if (!(vmcs_readl(sf->base) == tmp.
base && tmp.
s)) {
2698 tmp.
base = vmcs_readl(sf->base);
2699 tmp.
selector = vmcs_read16(sf->selector);
2702 vmx_set_segment(vcpu, &tmp, seg);
2705 static void enter_pmode(
struct kvm_vcpu *vcpu)
2707 unsigned long flags;
2708 struct vcpu_vmx *vmx = to_vmx(vcpu);
2711 vmx->
rmode.vm86_active = 0;
2713 vmx_segment_cache_clear(vmx);
2717 flags = vmcs_readl(GUEST_RFLAGS);
2720 vmcs_writel(GUEST_RFLAGS, flags);
2722 vmcs_writel(GUEST_CR4, (vmcs_readl(GUEST_CR4) & ~
X86_CR4_VME) |
2725 update_exception_bitmap(vcpu);
2727 if (emulate_invalid_guest_state)
2735 vmx_segment_cache_clear(vmx);
2737 vmcs_write16(GUEST_SS_SELECTOR, 0);
2738 vmcs_write32(GUEST_SS_AR_BYTES, 0x93);
2740 vmcs_write16(GUEST_CS_SELECTOR,
2742 vmcs_write32(GUEST_CS_AR_BYTES, 0x9b);
2745 static gva_t rmode_tss_base(
struct kvm *kvm)
2747 if (!kvm->
arch.tss_addr) {
2753 slot = id_to_memslot(slots, 0);
2758 return kvm->
arch.tss_addr;
2761 static void fix_rmode_seg(
int seg,
struct kvm_segment *save)
2763 const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[
seg];
2765 vmcs_write16(sf->selector, save->
base >> 4);
2766 vmcs_write32(sf->base, save->
base & 0xffff0);
2767 vmcs_write32(sf->limit, 0xffff);
2768 vmcs_write32(sf->ar_bytes, 0xf3);
2769 if (save->
base & 0xf)
2771 " aligned when entering protected mode (seg=%d)",
2775 static void enter_rmode(
struct kvm_vcpu *vcpu)
2777 unsigned long flags;
2778 struct vcpu_vmx *vmx = to_vmx(vcpu);
2781 if (enable_unrestricted_guest)
2791 vmx->
rmode.vm86_active = 1;
2798 if (!vcpu->
kvm->arch.tss_addr) {
2800 "called before entering vcpu\n");
2801 srcu_read_unlock(&vcpu->
kvm->srcu, vcpu->
srcu_idx);
2802 vmx_set_tss_addr(vcpu->
kvm, 0xfeffd000);
2803 vcpu->
srcu_idx = srcu_read_lock(&vcpu->
kvm->srcu);
2806 vmx_segment_cache_clear(vmx);
2808 vmcs_writel(GUEST_TR_BASE, rmode_tss_base(vcpu->
kvm));
2810 vmcs_write32(GUEST_TR_AR_BYTES, 0x008b);
2812 flags = vmcs_readl(GUEST_RFLAGS);
2817 vmcs_writel(GUEST_RFLAGS, flags);
2818 vmcs_writel(GUEST_CR4, vmcs_readl(GUEST_CR4) |
X86_CR4_VME);
2819 update_exception_bitmap(vcpu);
2821 if (emulate_invalid_guest_state)
2822 goto continue_rmode;
2848 struct vcpu_vmx *vmx = to_vmx(vcpu);
2858 vmx_load_host_state(to_vmx(vcpu));
2860 if (efer & EFER_LMA) {
2861 vmcs_write32(VM_ENTRY_CONTROLS,
2862 vmcs_read32(VM_ENTRY_CONTROLS) |
2863 VM_ENTRY_IA32E_MODE);
2866 vmcs_write32(VM_ENTRY_CONTROLS,
2867 vmcs_read32(VM_ENTRY_CONTROLS) &
2868 ~VM_ENTRY_IA32E_MODE);
2875 #ifdef CONFIG_X86_64
2877 static void enter_lmode(
struct kvm_vcpu *vcpu)
2881 vmx_segment_cache_clear(to_vmx(vcpu));
2883 guest_tr_ar = vmcs_read32(GUEST_TR_AR_BYTES);
2884 if ((guest_tr_ar & AR_TYPE_MASK) != AR_TYPE_BUSY_64_TSS) {
2887 vmcs_write32(GUEST_TR_AR_BYTES,
2888 (guest_tr_ar & ~AR_TYPE_MASK)
2889 | AR_TYPE_BUSY_64_TSS);
2891 vmx_set_efer(vcpu, vcpu->
arch.efer | EFER_LMA);
2894 static void exit_lmode(
struct kvm_vcpu *vcpu)
2896 vmcs_write32(VM_ENTRY_CONTROLS,
2897 vmcs_read32(VM_ENTRY_CONTROLS)
2898 & ~VM_ENTRY_IA32E_MODE);
2899 vmx_set_efer(vcpu, vcpu->
arch.efer & ~EFER_LMA);
2904 static void vmx_flush_tlb(
struct kvm_vcpu *vcpu)
2906 vpid_sync_context(to_vmx(vcpu));
2910 ept_sync_context(construct_eptp(vcpu->
arch.mmu.root_hpa));
2914 static void vmx_decache_cr0_guest_bits(
struct kvm_vcpu *vcpu)
2916 ulong cr0_guest_owned_bits = vcpu->
arch.cr0_guest_owned_bits;
2918 vcpu->
arch.cr0 &= ~cr0_guest_owned_bits;
2919 vcpu->
arch.cr0 |= vmcs_readl(GUEST_CR0) & cr0_guest_owned_bits;
2922 static void vmx_decache_cr3(
struct kvm_vcpu *vcpu)
2924 if (enable_ept && is_paging(vcpu))
2925 vcpu->
arch.cr3 = vmcs_readl(GUEST_CR3);
2929 static void vmx_decache_cr4_guest_bits(
struct kvm_vcpu *vcpu)
2931 ulong cr4_guest_owned_bits = vcpu->
arch.cr4_guest_owned_bits;
2933 vcpu->
arch.cr4 &= ~cr4_guest_owned_bits;
2934 vcpu->
arch.cr4 |= vmcs_readl(GUEST_CR4) & cr4_guest_owned_bits;
2937 static void ept_load_pdptrs(
struct kvm_vcpu *vcpu)
2940 (
unsigned long *)&vcpu->
arch.regs_dirty))
2943 if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) {
2944 vmcs_write64(GUEST_PDPTR0, vcpu->
arch.mmu.pdptrs[0]);
2945 vmcs_write64(GUEST_PDPTR1, vcpu->
arch.mmu.pdptrs[1]);
2946 vmcs_write64(GUEST_PDPTR2, vcpu->
arch.mmu.pdptrs[2]);
2947 vmcs_write64(GUEST_PDPTR3, vcpu->
arch.mmu.pdptrs[3]);
2951 static void ept_save_pdptrs(
struct kvm_vcpu *vcpu)
2953 if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) {
2954 vcpu->
arch.mmu.pdptrs[0] = vmcs_read64(GUEST_PDPTR0);
2955 vcpu->
arch.mmu.pdptrs[1] = vmcs_read64(GUEST_PDPTR1);
2956 vcpu->
arch.mmu.pdptrs[2] = vmcs_read64(GUEST_PDPTR2);
2957 vcpu->
arch.mmu.pdptrs[3] = vmcs_read64(GUEST_PDPTR3);
2961 (
unsigned long *)&vcpu->
arch.regs_avail);
2963 (
unsigned long *)&vcpu->
arch.regs_dirty);
2966 static int vmx_set_cr4(
struct kvm_vcpu *vcpu,
unsigned long cr4);
2968 static void ept_update_paging_mode_cr0(
unsigned long *hw_cr0,
2973 vmx_decache_cr3(vcpu);
2976 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL,
2977 vmcs_read32(CPU_BASED_VM_EXEC_CONTROL) |
2978 (CPU_BASED_CR3_LOAD_EXITING |
2979 CPU_BASED_CR3_STORE_EXITING));
2981 vmx_set_cr4(vcpu, kvm_read_cr4(vcpu));
2982 }
else if (!is_paging(vcpu)) {
2984 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL,
2985 vmcs_read32(CPU_BASED_VM_EXEC_CONTROL) &
2986 ~(CPU_BASED_CR3_LOAD_EXITING |
2987 CPU_BASED_CR3_STORE_EXITING));
2989 vmx_set_cr4(vcpu, kvm_read_cr4(vcpu));
2993 *hw_cr0 &= ~X86_CR0_WP;
2996 static void vmx_set_cr0(
struct kvm_vcpu *vcpu,
unsigned long cr0)
2998 struct vcpu_vmx *vmx = to_vmx(vcpu);
2999 unsigned long hw_cr0;
3001 if (enable_unrestricted_guest)
3010 if (!vmx->
rmode.vm86_active && !(cr0 & X86_CR0_PE))
3013 #ifdef CONFIG_X86_64
3017 if (is_paging(vcpu) && !(cr0 & X86_CR0_PG))
3023 ept_update_paging_mode_cr0(&hw_cr0, cr0, vcpu);
3028 vmcs_writel(CR0_READ_SHADOW, cr0);
3029 vmcs_writel(GUEST_CR0, hw_cr0);
3034 static u64 construct_eptp(
unsigned long root_hpa)
3039 eptp = VMX_EPT_DEFAULT_MT |
3040 VMX_EPT_DEFAULT_GAW << VMX_EPT_GAW_EPTP_SHIFT;
3041 if (enable_ept_ad_bits)
3042 eptp |= VMX_EPT_AD_ENABLE_BIT;
3048 static void vmx_set_cr3(
struct kvm_vcpu *vcpu,
unsigned long cr3)
3050 unsigned long guest_cr3;
3055 eptp = construct_eptp(cr3);
3056 vmcs_write64(EPT_POINTER, eptp);
3057 guest_cr3 = is_paging(vcpu) ? kvm_read_cr3(vcpu) :
3058 vcpu->kvm->
arch.ept_identity_map_addr;
3059 ept_load_pdptrs(vcpu);
3062 vmx_flush_tlb(vcpu);
3063 vmcs_writel(GUEST_CR3, guest_cr3);
3066 static int vmx_set_cr4(
struct kvm_vcpu *vcpu,
unsigned long cr4)
3068 unsigned long hw_cr4 = cr4 | (to_vmx(vcpu)->rmode.vm86_active ?
3078 if (!nested_vmx_allowed(vcpu))
3080 }
else if (to_vmx(vcpu)->nested.vmxon)
3085 if (!is_paging(vcpu)) {
3089 hw_cr4 &= ~X86_CR4_PAE;
3093 vmcs_writel(CR4_READ_SHADOW, cr4);
3094 vmcs_writel(GUEST_CR4, hw_cr4);
3098 static void vmx_get_segment(
struct kvm_vcpu *vcpu,
3101 struct vcpu_vmx *vmx = to_vmx(vcpu);
3104 if (vmx->
rmode.vm86_active
3110 || var->
selector == vmx_read_guest_seg_selector(vmx, seg))
3112 var->
base = vmx_read_guest_seg_base(vmx, seg);
3113 var->
selector = vmx_read_guest_seg_selector(vmx, seg);
3116 var->
base = vmx_read_guest_seg_base(vmx, seg);
3117 var->
limit = vmx_read_guest_seg_limit(vmx, seg);
3118 var->
selector = vmx_read_guest_seg_selector(vmx, seg);
3119 ar = vmx_read_guest_seg_ar(vmx, seg);
3120 if ((ar & AR_UNUSABLE_MASK) && !emulate_invalid_guest_state)
3122 var->
type = ar & 15;
3123 var->
s = (ar >> 4) & 1;
3124 var->
dpl = (ar >> 5) & 3;
3126 var->
avl = (ar >> 12) & 1;
3127 var->
l = (ar >> 13) & 1;
3128 var->
db = (ar >> 14) & 1;
3129 var->
g = (ar >> 15) & 1;
3133 static u64 vmx_get_segment_base(
struct kvm_vcpu *vcpu,
int seg)
3137 if (to_vmx(vcpu)->rmode.vm86_active) {
3138 vmx_get_segment(vcpu, &
s, seg);
3141 return vmx_read_guest_seg_base(to_vmx(vcpu), seg);
3144 static int __vmx_get_cpl(
struct kvm_vcpu *vcpu)
3146 if (!is_protmode(vcpu))
3149 if (!is_long_mode(vcpu)
3153 return vmx_read_guest_seg_selector(to_vmx(vcpu),
VCPU_SREG_CS) & 3;
3156 static int vmx_get_cpl(
struct kvm_vcpu *vcpu)
3158 struct vcpu_vmx *vmx = to_vmx(vcpu);
3170 vmx->
cpl = __vmx_get_cpl(vcpu);
3184 ar = var->
type & 15;
3185 ar |= (var->
s & 1) << 4;
3186 ar |= (var->
dpl & 3) << 5;
3187 ar |= (var->
present & 1) << 7;
3188 ar |= (var->
avl & 1) << 12;
3189 ar |= (var->
l & 1) << 13;
3190 ar |= (var->
db & 1) << 14;
3191 ar |= (var->
g & 1) << 15;
3197 static void vmx_set_segment(
struct kvm_vcpu *vcpu,
3200 struct vcpu_vmx *vmx = to_vmx(vcpu);
3201 const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[
seg];
3204 vmx_segment_cache_clear(vmx);
3207 vmcs_write16(sf->selector, var->
selector);
3211 vmcs_writel(sf->base, var->
base);
3212 vmcs_write32(sf->limit, var->
limit);
3213 vmcs_write16(sf->selector, var->
selector);
3214 if (vmx->
rmode.vm86_active && var->
s) {
3219 if (var->
base == 0xffff0000 && var->
selector == 0xf000)
3220 vmcs_writel(sf->base, 0xf0000);
3223 ar = vmx_segment_access_rights(var);
3239 vmcs_write32(sf->ar_bytes, ar);
3249 if (!enable_unrestricted_guest && vmx->
rmode.vm86_active) {
3252 vmcs_write32(GUEST_CS_AR_BYTES, 0xf3);
3253 vmcs_write32(GUEST_CS_LIMIT, 0xffff);
3254 if (vmcs_readl(GUEST_CS_BASE) == 0xffff0000)
3255 vmcs_writel(GUEST_CS_BASE, 0xf0000);
3256 vmcs_write16(GUEST_CS_SELECTOR,
3257 vmcs_readl(GUEST_CS_BASE) >> 4);
3263 fix_rmode_seg(seg, &vmx->
rmode.segs[seg]);
3266 vmcs_write16(GUEST_SS_SELECTOR,
3267 vmcs_readl(GUEST_SS_BASE) >> 4);
3268 vmcs_write32(GUEST_SS_LIMIT, 0xffff);
3269 vmcs_write32(GUEST_SS_AR_BYTES, 0xf3);
3275 static void vmx_get_cs_db_l_bits(
struct kvm_vcpu *vcpu,
int *db,
int *
l)
3279 *db = (ar >> 14) & 1;
3280 *l = (ar >> 13) & 1;
3285 dt->
size = vmcs_read32(GUEST_IDTR_LIMIT);
3286 dt->
address = vmcs_readl(GUEST_IDTR_BASE);
3291 vmcs_write32(GUEST_IDTR_LIMIT, dt->
size);
3292 vmcs_writel(GUEST_IDTR_BASE, dt->
address);
3297 dt->
size = vmcs_read32(GUEST_GDTR_LIMIT);
3298 dt->
address = vmcs_readl(GUEST_GDTR_BASE);
3303 vmcs_write32(GUEST_GDTR_LIMIT, dt->
size);
3304 vmcs_writel(GUEST_GDTR_BASE, dt->
address);
3307 static bool rmode_segment_valid(
struct kvm_vcpu *vcpu,
int seg)
3312 vmx_get_segment(vcpu, &var, seg);
3313 ar = vmx_segment_access_rights(&var);
3317 if (var.
limit < 0xffff)
3319 if (((ar | (3 << AR_DPL_SHIFT)) & ~(AR_G_MASK | AR_DB_MASK)) != 0xf3)
3325 static bool code_segment_valid(
struct kvm_vcpu *vcpu)
3328 unsigned int cs_rpl;
3335 if (~
cs.type & (AR_TYPE_CODE_MASK|AR_TYPE_ACCESSES_MASK))
3339 if (
cs.type & AR_TYPE_WRITEABLE_MASK) {
3340 if (
cs.dpl > cs_rpl)
3343 if (
cs.dpl != cs_rpl)
3353 static bool stack_segment_valid(
struct kvm_vcpu *vcpu)
3356 unsigned int ss_rpl;
3363 if (
ss.type != 3 &&
ss.type != 7)
3367 if (
ss.dpl != ss_rpl)
3375 static bool data_segment_valid(
struct kvm_vcpu *vcpu,
int seg)
3380 vmx_get_segment(vcpu, &var, seg);
3389 if (~var.
type & (AR_TYPE_CODE_MASK|AR_TYPE_WRITEABLE_MASK)) {
3400 static bool tr_valid(
struct kvm_vcpu *vcpu)
3410 if (
tr.type != 3 &&
tr.type != 11)
3418 static bool ldtr_valid(
struct kvm_vcpu *vcpu)
3436 static bool cs_ss_rpl_check(
struct kvm_vcpu *vcpu)
3452 static bool guest_state_valid(
struct kvm_vcpu *vcpu)
3455 if (!is_protmode(vcpu)) {
3470 if (!cs_ss_rpl_check(vcpu))
3472 if (!code_segment_valid(vcpu))
3474 if (!stack_segment_valid(vcpu))
3484 if (!tr_valid(vcpu))
3486 if (!ldtr_valid(vcpu))
3497 static int init_rmode_tss(
struct kvm *kvm)
3501 int r,
idx, ret = 0;
3503 idx = srcu_read_lock(&kvm->
srcu);
3528 srcu_read_unlock(&kvm->
srcu, idx);
3532 static int init_rmode_identity_map(
struct kvm *kvm)
3535 pfn_t identity_map_pfn;
3542 "haven't been allocated!\n");
3545 if (
likely(kvm->
arch.ept_identity_pagetable_done))
3548 identity_map_pfn = kvm->
arch.ept_identity_map_addr >>
PAGE_SHIFT;
3549 idx = srcu_read_lock(&kvm->
srcu);
3558 &tmp, i *
sizeof(tmp),
sizeof(tmp));
3562 kvm->
arch.ept_identity_pagetable_done =
true;
3565 srcu_read_unlock(&kvm->
srcu, idx);
3569 static void seg_setup(
int seg)
3571 const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[
seg];
3574 vmcs_write16(sf->selector, 0);
3575 vmcs_writel(sf->base, 0);
3576 vmcs_write32(sf->limit, 0xffff);
3577 if (enable_unrestricted_guest) {
3584 vmcs_write32(sf->ar_bytes, ar);
3587 static int alloc_apic_access_page(
struct kvm *kvm)
3594 if (kvm->
arch.apic_access_page)
3596 kvm_userspace_mem.slot = APIC_ACCESS_PAGE_PRIVATE_MEMSLOT;
3597 kvm_userspace_mem.flags = 0;
3598 kvm_userspace_mem.guest_phys_addr = 0xfee00000ULL;
3599 kvm_userspace_mem.memory_size =
PAGE_SIZE;
3605 if (is_error_page(page)) {
3616 static int alloc_identity_pagetable(
struct kvm *kvm)
3623 if (kvm->
arch.ept_identity_pagetable)
3625 kvm_userspace_mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT;
3626 kvm_userspace_mem.flags = 0;
3627 kvm_userspace_mem.guest_phys_addr =
3628 kvm->
arch.ept_identity_map_addr;
3629 kvm_userspace_mem.memory_size =
PAGE_SIZE;
3635 if (is_error_page(page)) {
3640 kvm->
arch.ept_identity_pagetable =
page;
3646 static void allocate_vpid(
struct vcpu_vmx *vmx)
3653 spin_lock(&vmx_vpid_lock);
3655 if (vpid < VMX_NR_VPIDS) {
3659 spin_unlock(&vmx_vpid_lock);
3662 static void free_vpid(
struct vcpu_vmx *vmx)
3666 spin_lock(&vmx_vpid_lock);
3669 spin_unlock(&vmx_vpid_lock);
3672 static void __vmx_disable_intercept_for_msr(
unsigned long *msr_bitmap,
u32 msr)
3674 int f =
sizeof(
unsigned long);
3676 if (!cpu_has_vmx_msr_bitmap())
3684 if (msr <= 0x1fff) {
3687 }
else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
3694 static void vmx_disable_intercept_for_msr(
u32 msr,
bool longmode_only)
3697 __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy, msr);
3698 __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode, msr);
3707 static void vmx_set_constant_host_state(
void)
3713 vmcs_writel(HOST_CR0, read_cr0() & ~
X86_CR0_TS);
3714 vmcs_writel(HOST_CR4, read_cr4());
3715 vmcs_writel(HOST_CR3, read_cr3());
3718 #ifdef CONFIG_X86_64
3724 vmcs_write16(HOST_DS_SELECTOR, 0);
3725 vmcs_write16(HOST_ES_SELECTOR, 0);
3733 native_store_idt(&dt);
3734 vmcs_writel(HOST_IDTR_BASE, dt.address);
3736 vmcs_writel(HOST_RIP, vmx_return);
3739 vmcs_write32(HOST_IA32_SYSENTER_CS, low32);
3741 vmcs_writel(HOST_IA32_SYSENTER_EIP, tmpl);
3743 if (vmcs_config.vmexit_ctrl & VM_EXIT_LOAD_IA32_PAT) {
3745 vmcs_write64(HOST_IA32_PAT, low32 | ((
u64) high32 << 32));
3749 static void set_cr4_guest_host_mask(
struct vcpu_vmx *vmx)
3754 if (is_guest_mode(&vmx->
vcpu))
3755 vmx->
vcpu.arch.cr4_guest_owned_bits &=
3756 ~get_vmcs12(&vmx->
vcpu)->cr4_guest_host_mask;
3757 vmcs_writel(CR4_GUEST_HOST_MASK, ~vmx->
vcpu.arch.cr4_guest_owned_bits);
3762 u32 exec_control = vmcs_config.cpu_based_exec_ctrl;
3763 if (!vm_need_tpr_shadow(vmx->
vcpu.kvm)) {
3764 exec_control &= ~CPU_BASED_TPR_SHADOW;
3765 #ifdef CONFIG_X86_64
3766 exec_control |= CPU_BASED_CR8_STORE_EXITING |
3767 CPU_BASED_CR8_LOAD_EXITING;
3771 exec_control |= CPU_BASED_CR3_STORE_EXITING |
3772 CPU_BASED_CR3_LOAD_EXITING |
3773 CPU_BASED_INVLPG_EXITING;
3774 return exec_control;
3777 static u32 vmx_secondary_exec_control(
struct vcpu_vmx *vmx)
3779 u32 exec_control = vmcs_config.cpu_based_2nd_exec_ctrl;
3780 if (!vm_need_virtualize_apic_accesses(vmx->
vcpu.kvm))
3781 exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
3783 exec_control &= ~SECONDARY_EXEC_ENABLE_VPID;
3785 exec_control &= ~SECONDARY_EXEC_ENABLE_EPT;
3786 enable_unrestricted_guest = 0;
3788 exec_control &= ~SECONDARY_EXEC_ENABLE_INVPCID;
3790 if (!enable_unrestricted_guest)
3791 exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST;
3793 exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING;
3794 return exec_control;
3797 static void ept_set_mmio_spte_mask(
void)
3811 static int vmx_vcpu_setup(
struct vcpu_vmx *vmx)
3813 #ifdef CONFIG_X86_64
3819 vmcs_write64(IO_BITMAP_A,
__pa(vmx_io_bitmap_a));
3820 vmcs_write64(IO_BITMAP_B,
__pa(vmx_io_bitmap_b));
3822 if (cpu_has_vmx_msr_bitmap())
3823 vmcs_write64(MSR_BITMAP,
__pa(vmx_msr_bitmap_legacy));
3825 vmcs_write64(VMCS_LINK_POINTER, -1ull);
3828 vmcs_write32(PIN_BASED_VM_EXEC_CONTROL,
3829 vmcs_config.pin_based_exec_ctrl);
3831 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, vmx_exec_control(vmx));
3833 if (cpu_has_secondary_exec_ctrls()) {
3834 vmcs_write32(SECONDARY_VM_EXEC_CONTROL,
3835 vmx_secondary_exec_control(vmx));
3839 vmcs_write32(PLE_GAP, ple_gap);
3840 vmcs_write32(PLE_WINDOW, ple_window);
3843 vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, 0);
3844 vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, 0);
3845 vmcs_write32(CR3_TARGET_COUNT, 0);
3847 vmcs_write16(HOST_FS_SELECTOR, 0);
3848 vmcs_write16(HOST_GS_SELECTOR, 0);
3849 vmx_set_constant_host_state();
3850 #ifdef CONFIG_X86_64
3860 vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0);
3861 vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0);
3863 vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0);
3866 if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
3867 u32 msr_low, msr_high;
3870 host_pat = msr_low | ((
u64) msr_high << 32);
3872 vmcs_write64(GUEST_IA32_PAT, host_pat);
3874 vmx->
vcpu.arch.pat = host_pat;
3878 u32 index = vmx_msr_index[
i];
3879 u32 data_low, data_high;
3882 if (rdmsr_safe(index, &data_low, &data_high) < 0)
3884 if (wrmsr_safe(index, data_low, data_high) < 0)
3892 vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl);
3895 vmcs_write32(VM_ENTRY_CONTROLS, vmcs_config.vmentry_ctrl);
3897 vmcs_writel(CR0_GUEST_HOST_MASK, ~0
UL);
3898 set_cr4_guest_host_mask(vmx);
3905 static int vmx_vcpu_reset(
struct kvm_vcpu *vcpu)
3907 struct vcpu_vmx *vmx = to_vmx(vcpu);
3913 vmx->
rmode.vm86_active = 0;
3920 if (kvm_vcpu_is_bsp(&vmx->
vcpu))
3928 vmx_segment_cache_clear(vmx);
3935 if (kvm_vcpu_is_bsp(&vmx->
vcpu)) {
3936 vmcs_write16(GUEST_CS_SELECTOR, 0xf000);
3937 vmcs_writel(GUEST_CS_BASE, 0x000f0000);
3939 vmcs_write16(GUEST_CS_SELECTOR, vmx->
vcpu.arch.sipi_vector << 8);
3940 vmcs_writel(GUEST_CS_BASE, vmx->
vcpu.arch.sipi_vector << 12);
3949 vmcs_write16(GUEST_TR_SELECTOR, 0);
3950 vmcs_writel(GUEST_TR_BASE, 0);
3951 vmcs_write32(GUEST_TR_LIMIT, 0xffff);
3952 vmcs_write32(GUEST_TR_AR_BYTES, 0x008b);
3954 vmcs_write16(GUEST_LDTR_SELECTOR, 0);
3955 vmcs_writel(GUEST_LDTR_BASE, 0);
3956 vmcs_write32(GUEST_LDTR_LIMIT, 0xffff);
3957 vmcs_write32(GUEST_LDTR_AR_BYTES, 0x00082);
3959 vmcs_write32(GUEST_SYSENTER_CS, 0);
3960 vmcs_writel(GUEST_SYSENTER_ESP, 0);
3961 vmcs_writel(GUEST_SYSENTER_EIP, 0);
3963 vmcs_writel(GUEST_RFLAGS, 0x02);
3964 if (kvm_vcpu_is_bsp(&vmx->
vcpu))
3965 kvm_rip_write(vcpu, 0xfff0);
3967 kvm_rip_write(vcpu, 0);
3970 vmcs_writel(GUEST_GDTR_BASE, 0);
3971 vmcs_write32(GUEST_GDTR_LIMIT, 0xffff);
3973 vmcs_writel(GUEST_IDTR_BASE, 0);
3974 vmcs_write32(GUEST_IDTR_LIMIT, 0xffff);
3976 vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE);
3977 vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0);
3978 vmcs_write32(GUEST_PENDING_DBG_EXCEPTIONS, 0);
3981 vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
3985 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0);
3987 if (cpu_has_vmx_tpr_shadow()) {
3988 vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, 0);
3989 if (vm_need_tpr_shadow(vmx->
vcpu.kvm))
3990 vmcs_write64(VIRTUAL_APIC_PAGE_ADDR,
3992 vmcs_write32(TPR_THRESHOLD, 0);
3995 if (vm_need_virtualize_apic_accesses(vmx->
vcpu.kvm))
3996 vmcs_write64(APIC_ACCESS_ADDR,
4000 vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->
vpid);
4003 vcpu->
srcu_idx = srcu_read_lock(&vcpu->
kvm->srcu);
4004 vmx_set_cr0(&vmx->
vcpu, kvm_read_cr0(vcpu));
4005 srcu_read_unlock(&vcpu->
kvm->srcu, vcpu->
srcu_idx);
4006 vmx_set_cr4(&vmx->
vcpu, 0);
4007 vmx_set_efer(&vmx->
vcpu, 0);
4008 vmx_fpu_activate(&vmx->
vcpu);
4009 update_exception_bitmap(&vmx->
vcpu);
4011 vpid_sync_context(vmx);
4026 static bool nested_exit_on_intr(
struct kvm_vcpu *vcpu)
4028 return get_vmcs12(vcpu)->pin_based_vm_exec_control &
4029 PIN_BASED_EXT_INTR_MASK;
4032 static void enable_irq_window(
struct kvm_vcpu *vcpu)
4034 u32 cpu_based_vm_exec_control;
4035 if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu)) {
4045 cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
4046 cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING;
4047 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
4050 static void enable_nmi_window(
struct kvm_vcpu *vcpu)
4052 u32 cpu_based_vm_exec_control;
4054 if (!cpu_has_virtual_nmis()) {
4055 enable_irq_window(vcpu);
4059 if (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) {
4060 enable_irq_window(vcpu);
4063 cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
4064 cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING;
4065 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
4068 static void vmx_inject_irq(
struct kvm_vcpu *vcpu)
4070 struct vcpu_vmx *vmx = to_vmx(vcpu);
4072 int irq = vcpu->
arch.interrupt.nr;
4074 trace_kvm_inj_virq(irq);
4076 ++vcpu->
stat.irq_injections;
4077 if (vmx->
rmode.vm86_active) {
4079 if (vcpu->
arch.interrupt.soft)
4080 inc_eip = vcpu->
arch.event_exit_inst_len;
4085 intr = irq | INTR_INFO_VALID_MASK;
4086 if (vcpu->
arch.interrupt.soft) {
4087 intr |= INTR_TYPE_SOFT_INTR;
4088 vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
4089 vmx->
vcpu.arch.event_exit_inst_len);
4091 intr |= INTR_TYPE_EXT_INTR;
4092 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr);
4095 static void vmx_inject_nmi(
struct kvm_vcpu *vcpu)
4097 struct vcpu_vmx *vmx = to_vmx(vcpu);
4099 if (is_guest_mode(vcpu))
4102 if (!cpu_has_virtual_nmis()) {
4115 ++vcpu->
stat.nmi_injections;
4117 if (vmx->
rmode.vm86_active) {
4122 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
4123 INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK |
NMI_VECTOR);
4126 static int vmx_nmi_allowed(
struct kvm_vcpu *vcpu)
4131 return !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
4132 (GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_STI
4133 | GUEST_INTR_STATE_NMI));
4136 static bool vmx_get_nmi_mask(
struct kvm_vcpu *vcpu)
4138 if (!cpu_has_virtual_nmis())
4139 return to_vmx(vcpu)->soft_vnmi_blocked;
4142 return vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_NMI;
4145 static void vmx_set_nmi_mask(
struct kvm_vcpu *vcpu,
bool masked)
4147 struct vcpu_vmx *vmx = to_vmx(vcpu);
4149 if (!cpu_has_virtual_nmis()) {
4157 vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
4158 GUEST_INTR_STATE_NMI);
4160 vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
4161 GUEST_INTR_STATE_NMI);
4165 static int vmx_interrupt_allowed(
struct kvm_vcpu *vcpu)
4167 if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu)) {
4168 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
4169 if (to_vmx(vcpu)->nested.nested_run_pending ||
4171 VECTORING_INFO_VALID_MASK))
4173 nested_vmx_vmexit(vcpu);
4180 !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
4181 (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS));
4184 static int vmx_set_tss_addr(
struct kvm *kvm,
unsigned int addr)
4188 .
slot = TSS_PRIVATE_MEMSLOT,
4189 .guest_phys_addr =
addr,
4198 if (!init_rmode_tss(kvm))
4204 static int handle_rmode_exception(
struct kvm_vcpu *vcpu,
4231 to_vmx(vcpu)->vcpu.arch.event_exit_inst_len =
4232 vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
4257 static void kvm_machine_check(
void)
4259 #if defined(CONFIG_X86_MCE) && defined(CONFIG_X86_64)
4269 static int handle_machine_check(
struct kvm_vcpu *vcpu)
4277 struct vcpu_vmx *vmx = to_vmx(vcpu);
4280 unsigned long cr2,
rip, dr6;
4287 if (is_machine_check(intr_info))
4288 return handle_machine_check(vcpu);
4290 if ((vect_info & VECTORING_INFO_VALID_MASK) &&
4291 !is_page_fault(intr_info)) {
4294 vcpu->
run->internal.ndata = 2;
4295 vcpu->
run->internal.data[0] = vect_info;
4296 vcpu->
run->internal.data[1] = intr_info;
4300 if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR)
4303 if (is_no_device(intr_info)) {
4304 vmx_fpu_activate(vcpu);
4308 if (is_invalid_opcode(intr_info)) {
4316 if (intr_info & INTR_INFO_DELIVER_CODE_MASK)
4317 error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
4318 if (is_page_fault(intr_info)) {
4321 cr2 = vmcs_readl(EXIT_QUALIFICATION);
4322 trace_kvm_page_fault(cr2, error_code);
4324 if (kvm_event_needs_reinjection(vcpu))
4329 if (vmx->
rmode.vm86_active &&
4330 handle_rmode_exception(vcpu, intr_info & INTR_INFO_VECTOR_MASK,
4332 if (vcpu->
arch.halt_request) {
4333 vcpu->
arch.halt_request = 0;
4339 ex_no = intr_info & INTR_INFO_VECTOR_MASK;
4342 dr6 = vmcs_readl(EXIT_QUALIFICATION);
4350 kvm_run->
debug.arch.dr7 = vmcs_readl(GUEST_DR7);
4358 vmx->
vcpu.arch.event_exit_inst_len =
4359 vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
4361 rip = kvm_rip_read(vcpu);
4362 kvm_run->
debug.arch.pc = vmcs_readl(GUEST_CS_BASE) +
rip;
4363 kvm_run->
debug.arch.exception = ex_no;
4367 kvm_run->
ex.exception = ex_no;
4374 static int handle_external_interrupt(
struct kvm_vcpu *vcpu)
4376 ++vcpu->
stat.irq_exits;
4380 static int handle_triple_fault(
struct kvm_vcpu *vcpu)
4386 static int handle_io(
struct kvm_vcpu *vcpu)
4388 unsigned long exit_qualification;
4392 exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
4393 string = (exit_qualification & 16) != 0;
4394 in = (exit_qualification & 8) != 0;
4396 ++vcpu->
stat.io_exits;
4401 port = exit_qualification >> 16;
4402 size = (exit_qualification & 7) + 1;
4403 skip_emulated_instruction(vcpu);
4414 hypercall[0] = 0x0f;
4415 hypercall[1] = 0x01;
4416 hypercall[2] = 0xc1;
4420 static int handle_set_cr0(
struct kvm_vcpu *vcpu,
unsigned long val)
4422 if (to_vmx(vcpu)->nested.vmxon &&
4423 ((val & VMXON_CR0_ALWAYSON) != VMXON_CR0_ALWAYSON))
4426 if (is_guest_mode(vcpu)) {
4435 (vcpu->
arch.cr0 & ~vcpu->
arch.cr0_guest_owned_bits)))
4437 vmcs_writel(CR0_READ_SHADOW, val);
4443 static int handle_set_cr4(
struct kvm_vcpu *vcpu,
unsigned long val)
4445 if (is_guest_mode(vcpu)) {
4447 (vcpu->
arch.cr4 & ~vcpu->
arch.cr4_guest_owned_bits)))
4449 vmcs_writel(CR4_READ_SHADOW, val);
4456 static void handle_clts(
struct kvm_vcpu *vcpu)
4458 if (is_guest_mode(vcpu)) {
4464 vmcs_writel(CR0_READ_SHADOW,
4468 vmx_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~
X86_CR0_TS));
4471 static int handle_cr(
struct kvm_vcpu *vcpu)
4473 unsigned long exit_qualification,
val;
4478 exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
4479 cr = exit_qualification & 15;
4480 reg = (exit_qualification >> 8) & 15;
4481 switch ((exit_qualification >> 4) & 3) {
4483 val = kvm_register_read(vcpu, reg);
4487 err = handle_set_cr0(vcpu, val);
4495 err = handle_set_cr4(vcpu, val);
4500 u8 cr8 = kvm_register_read(vcpu, reg);
4503 if (irqchip_in_kernel(vcpu->
kvm))
4505 if (cr8_prev <= cr8)
4515 skip_emulated_instruction(vcpu);
4516 vmx_fpu_activate(vcpu);
4521 val = kvm_read_cr3(vcpu);
4522 kvm_register_write(vcpu, reg, val);
4524 skip_emulated_instruction(vcpu);
4528 kvm_register_write(vcpu, reg, val);
4530 skip_emulated_instruction(vcpu);
4535 val = (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f;
4539 skip_emulated_instruction(vcpu);
4544 vcpu->
run->exit_reason = 0;
4545 vcpu_unimpl(vcpu,
"unhandled control register: op %d cr %d\n",
4546 (
int)(exit_qualification >> 4) & 3, cr);
4550 static int handle_dr(
struct kvm_vcpu *vcpu)
4552 unsigned long exit_qualification;
4558 dr = vmcs_readl(GUEST_DR7);
4566 vcpu->
run->debug.arch.dr6 = vcpu->
arch.dr6;
4567 vcpu->
run->debug.arch.dr7 = dr;
4568 vcpu->
run->debug.arch.pc =
4569 vmcs_readl(GUEST_CS_BASE) +
4570 vmcs_readl(GUEST_RIP);
4575 vcpu->
arch.dr7 &= ~DR7_GD;
4577 vmcs_writel(GUEST_DR7, vcpu->
arch.dr7);
4583 exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
4584 dr = exit_qualification & DEBUG_REG_ACCESS_NUM;
4585 reg = DEBUG_REG_ACCESS_REG(exit_qualification);
4586 if (exit_qualification & TYPE_MOV_FROM_DR) {
4589 kvm_register_write(vcpu, reg, val);
4592 skip_emulated_instruction(vcpu);
4596 static void vmx_set_dr7(
struct kvm_vcpu *vcpu,
unsigned long val)
4598 vmcs_writel(GUEST_DR7, val);
4601 static int handle_cpuid(
struct kvm_vcpu *vcpu)
4607 static int handle_rdmsr(
struct kvm_vcpu *vcpu)
4612 if (vmx_get_msr(vcpu, ecx, &data)) {
4614 kvm_inject_gp(vcpu, 0);
4623 skip_emulated_instruction(vcpu);
4627 static int handle_wrmsr(
struct kvm_vcpu *vcpu)
4633 if (vmx_set_msr(vcpu, ecx, data) != 0) {
4635 kvm_inject_gp(vcpu, 0);
4640 skip_emulated_instruction(vcpu);
4644 static int handle_tpr_below_threshold(
struct kvm_vcpu *vcpu)
4650 static int handle_interrupt_window(
struct kvm_vcpu *vcpu)
4652 u32 cpu_based_vm_exec_control;
4655 cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
4656 cpu_based_vm_exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING;
4657 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
4661 ++vcpu->
stat.irq_window_exits;
4667 if (!irqchip_in_kernel(vcpu->
kvm) &&
4668 vcpu->
run->request_interrupt_window &&
4676 static int handle_halt(
struct kvm_vcpu *vcpu)
4678 skip_emulated_instruction(vcpu);
4682 static int handle_vmcall(
struct kvm_vcpu *vcpu)
4684 skip_emulated_instruction(vcpu);
4689 static int handle_invd(
struct kvm_vcpu *vcpu)
4694 static int handle_invlpg(
struct kvm_vcpu *vcpu)
4696 unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
4699 skip_emulated_instruction(vcpu);
4703 static int handle_rdpmc(
struct kvm_vcpu *vcpu)
4713 static int handle_wbinvd(
struct kvm_vcpu *vcpu)
4715 skip_emulated_instruction(vcpu);
4720 static int handle_xsetbv(
struct kvm_vcpu *vcpu)
4722 u64 new_bv = kvm_read_edx_eax(vcpu);
4726 skip_emulated_instruction(vcpu);
4730 static int handle_apic_access(
struct kvm_vcpu *vcpu)
4733 unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
4736 access_type = exit_qualification & APIC_ACCESS_TYPE;
4737 offset = exit_qualification & APIC_ACCESS_OFFSET;
4743 if ((access_type == TYPE_LINEAR_APIC_INST_WRITE) &&
4746 skip_emulated_instruction(vcpu);
4753 static int handle_task_switch(
struct kvm_vcpu *vcpu)
4755 struct vcpu_vmx *vmx = to_vmx(vcpu);
4756 unsigned long exit_qualification;
4757 bool has_error_code =
false;
4766 exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
4768 reason = (
u32)exit_qualification >> 30;
4771 case INTR_TYPE_NMI_INTR:
4772 vcpu->
arch.nmi_injected =
false;
4773 vmx_set_nmi_mask(vcpu,
true);
4775 case INTR_TYPE_EXT_INTR:
4776 case INTR_TYPE_SOFT_INTR:
4777 kvm_clear_interrupt_queue(vcpu);
4779 case INTR_TYPE_HARD_EXCEPTION:
4781 VECTORING_INFO_DELIVER_CODE_MASK) {
4782 has_error_code =
true;
4784 vmcs_read32(IDT_VECTORING_ERROR_CODE);
4787 case INTR_TYPE_SOFT_EXCEPTION:
4788 kvm_clear_exception_queue(vcpu);
4794 tss_selector = exit_qualification;
4796 if (!idt_v || (type != INTR_TYPE_HARD_EXCEPTION &&
4797 type != INTR_TYPE_EXT_INTR &&
4798 type != INTR_TYPE_NMI_INTR))
4799 skip_emulated_instruction(vcpu);
4802 type == INTR_TYPE_SOFT_INTR ? idt_index : -1, reason,
4806 vcpu->
run->internal.ndata = 0;
4811 vmcs_writel(GUEST_DR7, vmcs_readl(GUEST_DR7) & ~55);
4821 static int handle_ept_violation(
struct kvm_vcpu *vcpu)
4823 unsigned long exit_qualification;
4828 exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
4830 if (exit_qualification & (1 << 6)) {
4835 gla_validity = (exit_qualification >> 7) & 0x3;
4836 if (gla_validity != 0x3 && gla_validity != 0x1 && gla_validity != 0) {
4839 (
long unsigned int)vmcs_read64(GUEST_PHYSICAL_ADDRESS),
4840 vmcs_readl(GUEST_LINEAR_ADDRESS));
4842 (
long unsigned int)exit_qualification);
4848 gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
4849 trace_kvm_page_fault(gpa, exit_qualification);
4852 error_code = exit_qualification & (1
U << 1);
4854 error_code |= (exit_qualification >> 3) & 0x1;
4865 mask |= (1ULL << i);
4870 else if (level == 2) {
4871 if (spte & (1ULL << 7))
4882 static void ept_misconfig_inspect_spte(
struct kvm_vcpu *vcpu,
u64 spte,
4885 printk(
KERN_ERR "%s: spte 0x%llx level %d\n", __func__, spte, level);
4894 if (!cpu_has_vmx_ept_execute_only())
4899 u64 rsvd_bits = spte & ept_rsvd_mask(spte, level);
4901 if (rsvd_bits != 0) {
4903 __func__, rsvd_bits);
4907 if (level == 1 || (level == 2 && (spte & (1ULL << 7)))) {
4908 u64 ept_mem_type = (spte & 0x38) >> 3;
4910 if (ept_mem_type == 2 || ept_mem_type == 3 ||
4911 ept_mem_type == 7) {
4913 __func__, ept_mem_type);
4920 static int handle_ept_misconfig(
struct kvm_vcpu *vcpu)
4923 int nr_sptes,
i,
ret;
4926 gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
4942 ept_misconfig_inspect_spte(vcpu, sptes[i-1], i);
4950 static int handle_nmi_window(
struct kvm_vcpu *vcpu)
4952 u32 cpu_based_vm_exec_control;
4955 cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
4956 cpu_based_vm_exec_control &= ~CPU_BASED_VIRTUAL_NMI_PENDING;
4957 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
4958 ++vcpu->
stat.nmi_window_exits;
4964 static int handle_invalid_guest_state(
struct kvm_vcpu *vcpu)
4966 struct vcpu_vmx *vmx = to_vmx(vcpu);
4970 bool intr_window_requested;
4971 unsigned count = 130;
4973 cpu_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
4974 intr_window_requested = cpu_exec_ctrl & CPU_BASED_VIRTUAL_INTR_PENDING;
4976 while (!guest_state_valid(vcpu) && count-- != 0) {
4977 if (intr_window_requested && vmx_interrupt_allowed(vcpu))
4978 return handle_interrupt_window(&vmx->
vcpu);
4983 err = emulate_instruction(vcpu, 0);
4993 vcpu->
run->internal.ndata = 0;
5012 static int handle_pause(
struct kvm_vcpu *vcpu)
5014 skip_emulated_instruction(vcpu);
5020 static int handle_invalid_op(
struct kvm_vcpu *vcpu)
5040 static struct loaded_vmcs *nested_get_current_vmcs02(
struct vcpu_vmx *vmx)
5044 if (item->
vmptr == vmx->nested.current_vmptr) {
5045 list_move(&item->
list, &vmx->
nested.vmcs02_pool);
5054 list_move(&item->
list, &vmx->
nested.vmcs02_pool);
5063 item->
vmcs02.vmcs = alloc_vmcs();
5064 if (!item->
vmcs02.vmcs) {
5068 loaded_vmcs_init(&item->
vmcs02);
5070 list_add(&(item->
list), &(vmx->
nested.vmcs02_pool));
5071 vmx->
nested.vmcs02_num++;
5080 if (item->vmptr == vmptr) {
5081 free_loaded_vmcs(&item->
vmcs02);
5084 vmx->
nested.vmcs02_num--;
5094 static void nested_free_all_saved_vmcss(
struct vcpu_vmx *vmx)
5099 free_loaded_vmcs(&item->
vmcs02);
5103 vmx->
nested.vmcs02_num = 0;
5106 free_loaded_vmcs(&vmx->
vmcs01);
5117 static int handle_vmon(
struct kvm_vcpu *vcpu)
5120 struct vcpu_vmx *vmx = to_vmx(vcpu);
5127 if (!kvm_read_cr4_bits(vcpu, X86_CR4_VMXE) ||
5128 !kvm_read_cr0_bits(vcpu, X86_CR0_PE) ||
5129 (vmx_get_rflags(vcpu) & X86_EFLAGS_VM)) {
5135 if (is_long_mode(vcpu) && !
cs.l) {
5140 if (vmx_get_cpl(vcpu)) {
5141 kvm_inject_gp(vcpu, 0);
5145 INIT_LIST_HEAD(&(vmx->
nested.vmcs02_pool));
5146 vmx->
nested.vmcs02_num = 0;
5148 vmx->
nested.vmxon =
true;
5150 skip_emulated_instruction(vcpu);
5159 static int nested_vmx_check_permission(
struct kvm_vcpu *vcpu)
5162 struct vcpu_vmx *vmx = to_vmx(vcpu);
5164 if (!vmx->
nested.vmxon) {
5170 if ((vmx_get_rflags(vcpu) & X86_EFLAGS_VM) ||
5171 (is_long_mode(vcpu) && !
cs.l)) {
5176 if (vmx_get_cpl(vcpu)) {
5177 kvm_inject_gp(vcpu, 0);
5188 static void free_nested(
struct vcpu_vmx *vmx)
5192 vmx->
nested.vmxon =
false;
5193 if (vmx->
nested.current_vmptr != -1ull) {
5195 nested_release_page(vmx->
nested.current_vmcs12_page);
5196 vmx->
nested.current_vmptr = -1ull;
5200 if (vmx->
nested.apic_access_page) {
5201 nested_release_page(vmx->
nested.apic_access_page);
5202 vmx->
nested.apic_access_page = 0;
5205 nested_free_all_saved_vmcss(vmx);
5209 static int handle_vmoff(
struct kvm_vcpu *vcpu)
5211 if (!nested_vmx_check_permission(vcpu))
5213 free_nested(to_vmx(vcpu));
5214 skip_emulated_instruction(vcpu);
5224 static int get_vmx_mem_address(
struct kvm_vcpu *vcpu,
5225 unsigned long exit_qualification,
5226 u32 vmx_instruction_info,
gva_t *ret)
5236 int scaling = vmx_instruction_info & 3;
5237 int addr_size = (vmx_instruction_info >> 7) & 7;
5238 bool is_reg = vmx_instruction_info & (1
u << 10);
5239 int seg_reg = (vmx_instruction_info >> 15) & 7;
5240 int index_reg = (vmx_instruction_info >> 18) & 0xf;
5241 bool index_is_valid = !(vmx_instruction_info & (1
u << 22));
5242 int base_reg = (vmx_instruction_info >> 23) & 0xf;
5243 bool base_is_valid = !(vmx_instruction_info & (1
u << 27));
5252 *ret = vmx_get_segment_base(vcpu, seg_reg);
5254 *ret += kvm_register_read(vcpu, base_reg);
5256 *ret += kvm_register_read(vcpu, index_reg)<<scaling;
5257 *ret += exit_qualification;
5276 static void nested_vmx_succeed(
struct kvm_vcpu *vcpu)
5278 vmx_set_rflags(vcpu, vmx_get_rflags(vcpu)
5283 static void nested_vmx_failInvalid(
struct kvm_vcpu *vcpu)
5285 vmx_set_rflags(vcpu, (vmx_get_rflags(vcpu)
5291 static void nested_vmx_failValid(
struct kvm_vcpu *vcpu,
5292 u32 vm_instruction_error)
5294 if (to_vmx(vcpu)->
nested.current_vmptr == -1ull) {
5299 nested_vmx_failInvalid(vcpu);
5302 vmx_set_rflags(vcpu, (vmx_get_rflags(vcpu)
5306 get_vmcs12(vcpu)->vm_instruction_error = vm_instruction_error;
5310 static int handle_vmclear(
struct kvm_vcpu *vcpu)
5312 struct vcpu_vmx *vmx = to_vmx(vcpu);
5315 struct vmcs12 *vmcs12;
5319 if (!nested_vmx_check_permission(vcpu))
5322 if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
5323 vmcs_read32(VMX_INSTRUCTION_INFO), &gva))
5327 sizeof(vmptr), &
e)) {
5333 nested_vmx_failValid(vcpu, VMXERR_VMCLEAR_INVALID_ADDRESS);
5334 skip_emulated_instruction(vcpu);
5338 if (vmptr == vmx->
nested.current_vmptr) {
5340 nested_release_page(vmx->
nested.current_vmcs12_page);
5341 vmx->
nested.current_vmptr = -1ull;
5345 page = nested_get_page(vcpu, vmptr);
5357 vmcs12 =
kmap(page);
5360 nested_release_page(page);
5362 nested_free_vmcs02(vmx, vmptr);
5364 skip_emulated_instruction(vcpu);
5365 nested_vmx_succeed(vcpu);
5372 static int handle_vmlaunch(
struct kvm_vcpu *vcpu)
5374 return nested_vmx_run(vcpu,
true);
5378 static int handle_vmresume(
struct kvm_vcpu *vcpu)
5381 return nested_vmx_run(vcpu,
false);
5395 return (field >> 13) & 0x3 ;
5398 static inline int vmcs_field_readonly(
unsigned long field)
5400 return (((field >> 10) & 0x3) == 1);
5410 static inline bool vmcs12_read_any(
struct kvm_vcpu *vcpu,
5411 unsigned long field,
u64 *ret)
5413 short offset = vmcs_field_to_offset(field);
5419 p = ((
char *)(get_vmcs12(vcpu))) +
offset;
5443 static int nested_vmx_check_vmcs12(
struct kvm_vcpu *vcpu)
5445 struct vcpu_vmx *vmx = to_vmx(vcpu);
5446 if (vmx->
nested.current_vmptr == -1ull) {
5447 nested_vmx_failInvalid(vcpu);
5448 skip_emulated_instruction(vcpu);
5454 static int handle_vmread(
struct kvm_vcpu *vcpu)
5456 unsigned long field;
5458 unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
5459 u32 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
5462 if (!nested_vmx_check_permission(vcpu) ||
5463 !nested_vmx_check_vmcs12(vcpu))
5467 field = kvm_register_read(vcpu, (((vmx_instruction_info) >> 28) & 0xf));
5469 if (!vmcs12_read_any(vcpu, field, &field_value)) {
5470 nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
5471 skip_emulated_instruction(vcpu);
5479 if (vmx_instruction_info & (1
u << 10)) {
5480 kvm_register_write(vcpu, (((vmx_instruction_info) >> 3) & 0xf),
5483 if (get_vmx_mem_address(vcpu, exit_qualification,
5484 vmx_instruction_info, &gva))
5488 &field_value, (is_long_mode(vcpu) ? 8 : 4),
NULL);
5491 nested_vmx_succeed(vcpu);
5492 skip_emulated_instruction(vcpu);
5497 static int handle_vmwrite(
struct kvm_vcpu *vcpu)
5499 unsigned long field;
5501 unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
5502 u32 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
5511 u64 field_value = 0;
5514 if (!nested_vmx_check_permission(vcpu) ||
5515 !nested_vmx_check_vmcs12(vcpu))
5518 if (vmx_instruction_info & (1
u << 10))
5519 field_value = kvm_register_read(vcpu,
5520 (((vmx_instruction_info) >> 3) & 0xf));
5522 if (get_vmx_mem_address(vcpu, exit_qualification,
5523 vmx_instruction_info, &gva))
5526 &field_value, (is_long_mode(vcpu) ? 8 : 4), &
e)) {
5533 field = kvm_register_read(vcpu, (((vmx_instruction_info) >> 28) & 0xf));
5534 if (vmcs_field_readonly(field)) {
5535 nested_vmx_failValid(vcpu,
5536 VMXERR_VMWRITE_READ_ONLY_VMCS_COMPONENT);
5537 skip_emulated_instruction(vcpu);
5541 offset = vmcs_field_to_offset(field);
5543 nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
5544 skip_emulated_instruction(vcpu);
5547 p = ((
char *) get_vmcs12(vcpu)) + offset;
5551 *(
u16 *)p = field_value;
5554 *(
u32 *)p = field_value;
5557 *(
u64 *)p = field_value;
5563 nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
5564 skip_emulated_instruction(vcpu);
5568 nested_vmx_succeed(vcpu);
5569 skip_emulated_instruction(vcpu);
5574 static int handle_vmptrld(
struct kvm_vcpu *vcpu)
5576 struct vcpu_vmx *vmx = to_vmx(vcpu);
5581 if (!nested_vmx_check_permission(vcpu))
5584 if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
5585 vmcs_read32(VMX_INSTRUCTION_INFO), &gva))
5589 sizeof(vmptr), &
e)) {
5595 nested_vmx_failValid(vcpu, VMXERR_VMPTRLD_INVALID_ADDRESS);
5596 skip_emulated_instruction(vcpu);
5600 if (vmx->
nested.current_vmptr != vmptr) {
5601 struct vmcs12 *new_vmcs12;
5603 page = nested_get_page(vcpu, vmptr);
5605 nested_vmx_failInvalid(vcpu);
5606 skip_emulated_instruction(vcpu);
5609 new_vmcs12 =
kmap(page);
5612 nested_release_page_clean(page);
5613 nested_vmx_failValid(vcpu,
5614 VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID);
5615 skip_emulated_instruction(vcpu);
5618 if (vmx->
nested.current_vmptr != -1ull) {
5620 nested_release_page(vmx->
nested.current_vmcs12_page);
5623 vmx->
nested.current_vmptr = vmptr;
5624 vmx->
nested.current_vmcs12 = new_vmcs12;
5628 nested_vmx_succeed(vcpu);
5629 skip_emulated_instruction(vcpu);
5634 static int handle_vmptrst(
struct kvm_vcpu *vcpu)
5636 unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
5637 u32 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
5641 if (!nested_vmx_check_permission(vcpu))
5644 if (get_vmx_mem_address(vcpu, exit_qualification,
5645 vmx_instruction_info, &vmcs_gva))
5649 (
void *)&to_vmx(vcpu)->nested.current_vmptr,
5654 nested_vmx_succeed(vcpu);
5655 skip_emulated_instruction(vcpu);
5664 static int (*
const kvm_vmx_exit_handlers[])(
struct kvm_vcpu *vcpu) = {
5703 static const int kvm_vmx_max_exit_handlers =
5712 static bool nested_vmx_exit_handled_msr(
struct kvm_vcpu *vcpu,
5713 struct vmcs12 *vmcs12,
u32 exit_reason)
5718 if (!nested_cpu_has(get_vmcs12(vcpu), CPU_BASED_USE_MSR_BITMAPS))
5729 if (msr_index >= 0xc0000000) {
5730 msr_index -= 0xc0000000;
5735 if (msr_index < 1024*8) {
5738 return 1 & (b >> (msr_index & 7));
5748 static bool nested_vmx_exit_handled_cr(
struct kvm_vcpu *vcpu,
5749 struct vmcs12 *vmcs12)
5751 unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
5752 int cr = exit_qualification & 15;
5753 int reg = (exit_qualification >> 8) & 15;
5754 unsigned long val = kvm_register_read(vcpu, reg);
5756 switch ((exit_qualification >> 4) & 3) {
5774 if (nested_cpu_has(vmcs12, CPU_BASED_CR3_LOAD_EXITING))
5783 if (nested_cpu_has(vmcs12, CPU_BASED_CR8_LOAD_EXITING))
5797 CPU_BASED_CR3_STORE_EXITING)
5802 CPU_BASED_CR8_STORE_EXITING)
5829 static bool nested_vmx_exit_handled(
struct kvm_vcpu *vcpu)
5831 u32 exit_reason = vmcs_read32(VM_EXIT_REASON);
5832 u32 intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
5833 struct vcpu_vmx *vmx = to_vmx(vcpu);
5834 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
5836 if (vmx->
nested.nested_run_pending)
5841 vmcs_read32(VM_INSTRUCTION_ERROR));
5845 switch (exit_reason) {
5847 if (!is_exception(intr_info))
5849 else if (is_page_fault(intr_info))
5852 (1
u << (intr_info & INTR_INFO_VECTOR_MASK));
5871 return nested_cpu_has(vmcs12, CPU_BASED_HLT_EXITING);
5875 return nested_cpu_has(vmcs12, CPU_BASED_INVLPG_EXITING);
5877 return nested_cpu_has(vmcs12, CPU_BASED_RDPMC_EXITING);
5879 return nested_cpu_has(vmcs12, CPU_BASED_RDTSC_EXITING);
5891 return nested_vmx_exit_handled_cr(vcpu, vmcs12);
5893 return nested_cpu_has(vmcs12, CPU_BASED_MOV_DR_EXITING);
5899 return nested_vmx_exit_handled_msr(vcpu, vmcs12, exit_reason);
5903 return nested_cpu_has(vmcs12, CPU_BASED_MWAIT_EXITING);
5905 return nested_cpu_has(vmcs12, CPU_BASED_MONITOR_EXITING);
5907 return nested_cpu_has(vmcs12, CPU_BASED_PAUSE_EXITING) ||
5908 nested_cpu_has2(vmcs12,
5909 SECONDARY_EXEC_PAUSE_LOOP_EXITING);
5915 return nested_cpu_has2(vmcs12,
5916 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES);
5921 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_WBINVD_EXITING);
5931 *info1 = vmcs_readl(EXIT_QUALIFICATION);
5932 *info2 = vmcs_read32(VM_EXIT_INTR_INFO);
5939 static int vmx_handle_exit(
struct kvm_vcpu *vcpu)
5941 struct vcpu_vmx *vmx = to_vmx(vcpu);
5947 return handle_invalid_guest_state(vcpu);
5954 if (vmx->
nested.nested_run_pending)
5959 vmx->
nested.nested_run_pending = 1;
5961 vmx->
nested.nested_run_pending = 0;
5963 if (is_guest_mode(vcpu) && nested_vmx_exit_handled(vcpu)) {
5964 nested_vmx_vmexit(vcpu);
5970 vcpu->
run->fail_entry.hardware_entry_failure_reason
5977 vcpu->
run->fail_entry.hardware_entry_failure_reason
5978 = vmcs_read32(VM_INSTRUCTION_ERROR);
5982 if ((vectoring_info & VECTORING_INFO_VALID_MASK) &&
5987 "(0x%x) and exit reason is 0x%x\n",
5988 __func__, vectoring_info, exit_reason);
5991 !(is_guest_mode(vcpu) && nested_cpu_has_virtual_nmis(
5992 get_vmcs12(vcpu), vcpu)))) {
5993 if (vmx_interrupt_allowed(vcpu)) {
5996 vcpu->
arch.nmi_pending) {
6004 "state on VCPU %d after 1 s timeout\n",
6010 if (exit_reason < kvm_vmx_max_exit_handlers
6011 && kvm_vmx_exit_handlers[exit_reason])
6020 static void update_cr8_intercept(
struct kvm_vcpu *vcpu,
int tpr,
int irr)
6022 if (irr == -1 || tpr < irr) {
6023 vmcs_write32(TPR_THRESHOLD, 0);
6027 vmcs_write32(TPR_THRESHOLD, irr);
6030 static void vmx_complete_atomic_exit(
struct vcpu_vmx *vmx)
6042 if (is_machine_check(exit_intr_info))
6043 kvm_machine_check();
6046 if ((exit_intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR &&
6047 (exit_intr_info & INTR_INFO_VALID_MASK)) {
6054 static void vmx_recover_nmi_blocking(
struct vcpu_vmx *vmx)
6059 bool idtv_info_valid;
6063 if (cpu_has_virtual_nmis()) {
6070 exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
6071 unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0;
6072 vector = exit_intr_info & INTR_INFO_VECTOR_MASK;
6083 if ((exit_intr_info & INTR_INFO_VALID_MASK) && unblock_nmi &&
6084 vector !=
DF_VECTOR && !idtv_info_valid)
6085 vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
6086 GUEST_INTR_STATE_NMI);
6089 !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO)
6090 & GUEST_INTR_STATE_NMI);
6096 static void __vmx_complete_interrupts(
struct vcpu_vmx *vmx,
6098 int instr_len_field,
6099 int error_code_field)
6103 bool idtv_info_valid;
6105 idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK;
6107 vmx->
vcpu.arch.nmi_injected =
false;
6108 kvm_clear_exception_queue(&vmx->
vcpu);
6109 kvm_clear_interrupt_queue(&vmx->
vcpu);
6111 if (!idtv_info_valid)
6116 vector = idt_vectoring_info & VECTORING_INFO_VECTOR_MASK;
6117 type = idt_vectoring_info & VECTORING_INFO_TYPE_MASK;
6120 case INTR_TYPE_NMI_INTR:
6121 vmx->
vcpu.arch.nmi_injected =
true;
6127 vmx_set_nmi_mask(&vmx->
vcpu,
false);
6129 case INTR_TYPE_SOFT_EXCEPTION:
6130 vmx->
vcpu.arch.event_exit_inst_len =
6131 vmcs_read32(instr_len_field);
6133 case INTR_TYPE_HARD_EXCEPTION:
6134 if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) {
6135 u32 err = vmcs_read32(error_code_field);
6140 case INTR_TYPE_SOFT_INTR:
6141 vmx->
vcpu.arch.event_exit_inst_len =
6142 vmcs_read32(instr_len_field);
6144 case INTR_TYPE_EXT_INTR:
6145 kvm_queue_interrupt(&vmx->
vcpu, vector,
6146 type == INTR_TYPE_SOFT_INTR);
6153 static void vmx_complete_interrupts(
struct vcpu_vmx *vmx)
6155 if (is_guest_mode(&vmx->
vcpu))
6158 VM_EXIT_INSTRUCTION_LEN,
6159 IDT_VECTORING_ERROR_CODE);
6162 static void vmx_cancel_injection(
struct kvm_vcpu *vcpu)
6164 if (is_guest_mode(vcpu))
6166 __vmx_complete_interrupts(to_vmx(vcpu),
6167 vmcs_read32(VM_ENTRY_INTR_INFO_FIELD),
6168 VM_ENTRY_INSTRUCTION_LEN,
6169 VM_ENTRY_EXCEPTION_ERROR_CODE);
6171 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0);
6174 static void atomic_switch_perf_msrs(
struct vcpu_vmx *vmx)
6177 struct perf_guest_switch_msr *msrs;
6184 for (i = 0; i < nr_msrs; i++)
6185 if (msrs[i].host == msrs[i].guest)
6186 clear_atomic_switch_msr(vmx, msrs[i].msr);
6188 add_atomic_switch_msr(vmx, msrs[i].msr, msrs[i].guest,
6194 struct vcpu_vmx *vmx = to_vmx(vcpu);
6195 unsigned long debugctlmsr;
6197 if (is_guest_mode(vcpu) && !vmx->
nested.nested_run_pending) {
6198 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
6200 VECTORING_INFO_VALID_MASK) {
6201 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
6203 vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
6206 VECTORING_INFO_DELIVER_CODE_MASK)
6207 vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE,
6232 vmx_set_interrupt_shadow(vcpu, 0);
6234 atomic_switch_perf_msrs(vmx);
6235 debugctlmsr = get_debugctlmsr();
6243 "cmp %%" _ASM_SP ", %c[host_rsp](%0) \n\t"
6245 "mov %%" _ASM_SP ", %c[host_rsp](%0) \n\t"
6246 __ex(ASM_VMX_VMWRITE_RSP_RDX)
"\n\t"
6249 "mov %c[cr2](%0), %%" _ASM_AX " \n\t"
6250 "mov %%cr2, %%" _ASM_DX " \n\t"
6253 "mov %%" _ASM_AX", %%cr2 \n\t"
6256 "cmpl $0, %c[launched](%0) \n\t"
6258 "mov %c[rax](%0), %%" _ASM_AX " \n\t"
6259 "mov %c[rbx](%0), %%" _ASM_BX " \n\t"
6260 "mov %c[rdx](%0), %%" _ASM_DX " \n\t"
6261 "mov %c[rsi](%0), %%" _ASM_SI " \n\t"
6262 "mov %c[rdi](%0), %%" _ASM_DI " \n\t"
6263 "mov %c[rbp](%0), %%" _ASM_BP " \n\t"
6264 #ifdef CONFIG_X86_64
6265 "mov %c[r8](%0), %%r8 \n\t"
6266 "mov %c[r9](%0), %%r9 \n\t"
6267 "mov %c[r10](%0), %%r10 \n\t"
6268 "mov %c[r11](%0), %%r11 \n\t"
6269 "mov %c[r12](%0), %%r12 \n\t"
6270 "mov %c[r13](%0), %%r13 \n\t"
6271 "mov %c[r14](%0), %%r14 \n\t"
6272 "mov %c[r15](%0), %%r15 \n\t"
6274 "mov %c[rcx](%0), %%" _ASM_CX " \n\t"
6278 __ex(ASM_VMX_VMLAUNCH)
"\n\t"
6280 "1: " __ex(ASM_VMX_VMRESUME)
"\n\t"
6283 "mov %0, %c[wordsize](%%" _ASM_SP ") \n\t"
6285 "mov %%" _ASM_AX ", %c[rax](%0) \n\t"
6286 "mov %%" _ASM_BX ", %c[rbx](%0) \n\t"
6288 "mov %%" _ASM_DX ", %c[rdx](%0) \n\t"
6289 "mov %%" _ASM_SI ", %c[rsi](%0) \n\t"
6290 "mov %%" _ASM_DI ", %c[rdi](%0) \n\t"
6291 "mov %%" _ASM_BP ", %c[rbp](%0) \n\t"
6292 #ifdef CONFIG_X86_64
6293 "mov %%r8, %c[r8](%0) \n\t"
6294 "mov %%r9, %c[r9](%0) \n\t"
6295 "mov %%r10, %c[r10](%0) \n\t"
6296 "mov %%r11, %c[r11](%0) \n\t"
6297 "mov %%r12, %c[r12](%0) \n\t"
6298 "mov %%r13, %c[r13](%0) \n\t"
6299 "mov %%r14, %c[r14](%0) \n\t"
6300 "mov %%r15, %c[r15](%0) \n\t"
6302 "mov %%cr2, %%" _ASM_AX " \n\t"
6303 "mov %%" _ASM_AX ", %c[cr2](%0) \n\t"
6306 "setbe %c[fail](%0) \n\t"
6307 ".pushsection .rodata \n\t"
6308 ".global vmx_return \n\t"
6311 : :
"c"(vmx),
"d"((
unsigned long)HOST_RSP),
6322 #ifdef CONFIG_X86_64
6333 [wordsize]
"i"(
sizeof(
ulong))
6335 #ifdef CONFIG_X86_64
6336 ,
"rax",
"rbx",
"rdi",
"rsi"
6337 ,
"r8",
"r9",
"r10",
"r11",
"r12",
"r13",
"r14",
"r15"
6339 ,
"eax",
"ebx",
"edi",
"esi"
6345 update_debugctlmsr(debugctlmsr);
6347 #ifndef CONFIG_X86_64
6366 vcpu->
arch.regs_dirty = 0;
6370 if (is_guest_mode(vcpu)) {
6371 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
6375 vmcs_read32(IDT_VECTORING_ERROR_CODE);
6377 vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
6386 vmx_complete_atomic_exit(vmx);
6387 vmx_recover_nmi_blocking(vmx);
6388 vmx_complete_interrupts(vmx);
6391 static void vmx_free_vcpu(
struct kvm_vcpu *vcpu)
6393 struct vcpu_vmx *vmx = to_vmx(vcpu);
6403 static struct kvm_vcpu *vmx_create_vcpu(
struct kvm *kvm,
unsigned int id)
6435 vmx_vcpu_load(&vmx->
vcpu, cpu);
6437 err = vmx_vcpu_setup(vmx);
6438 vmx_vcpu_put(&vmx->
vcpu);
6442 if (vm_need_virtualize_apic_accesses(kvm))
6443 err = alloc_apic_access_page(kvm);
6448 if (!kvm->
arch.ept_identity_map_addr)
6449 kvm->
arch.ept_identity_map_addr =
6450 VMX_EPT_IDENTITY_PAGETABLE_ADDR;
6452 if (alloc_identity_pagetable(kvm) != 0)
6454 if (!init_rmode_identity_map(kvm))
6458 vmx->
nested.current_vmptr = -1ull;
6472 return ERR_PTR(err);
6475 static void __init vmx_check_processor_compat(
void *rtn)
6477 struct vmcs_config vmcs_conf;
6480 if (setup_vmcs_config(&vmcs_conf) < 0)
6482 if (
memcmp(&vmcs_config, &vmcs_conf,
sizeof(
struct vmcs_config)) != 0) {
6489 static int get_ept_level(
void)
6491 return VMX_EPT_DEFAULT_GAW + 1;
6511 else if (vcpu->
kvm->arch.iommu_domain &&
6514 VMX_EPT_MT_EPTE_SHIFT;
6522 static int vmx_get_lpage_level(
void)
6524 if (enable_ept && !cpu_has_vmx_ept_1g_page())
6531 static void vmx_cpuid_update(
struct kvm_vcpu *vcpu)
6534 struct vcpu_vmx *vmx = to_vmx(vcpu);
6538 if (vmx_rdtscp_supported()) {
6539 exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
6540 if (exec_control & SECONDARY_EXEC_RDTSCP) {
6545 exec_control &= ~SECONDARY_EXEC_RDTSCP;
6546 vmcs_write32(SECONDARY_VM_EXEC_CONTROL,
6554 if (vmx_invpcid_supported() &&
6556 guest_cpuid_has_pcid(vcpu)) {
6557 exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
6558 exec_control |= SECONDARY_EXEC_ENABLE_INVPCID;
6559 vmcs_write32(SECONDARY_VM_EXEC_CONTROL,
6562 if (cpu_has_secondary_exec_ctrls()) {
6563 exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
6564 exec_control &= ~SECONDARY_EXEC_ENABLE_INVPCID;
6565 vmcs_write32(SECONDARY_VM_EXEC_CONTROL,
6588 static void prepare_vmcs02(
struct kvm_vcpu *vcpu,
struct vmcs12 *vmcs12)
6590 struct vcpu_vmx *vmx = to_vmx(vcpu);
6631 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
6633 vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE,
6635 vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
6637 vmcs_write32(GUEST_INTERRUPTIBILITY_INFO,
6641 vmcs_writel(GUEST_DR7, vmcs12->
guest_dr7);
6643 vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS,
6648 vmcs_write64(VMCS_LINK_POINTER, -1ull);
6650 vmcs_write32(PIN_BASED_VM_EXEC_CONTROL,
6651 (vmcs_config.pin_based_exec_ctrl |
6674 vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK,
6676 vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH,
6679 if (cpu_has_secondary_exec_ctrls()) {
6680 u32 exec_control = vmx_secondary_exec_control(vmx);
6682 exec_control &= ~SECONDARY_EXEC_RDTSCP;
6684 exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
6685 if (nested_cpu_has(vmcs12,
6686 CPU_BASED_ACTIVATE_SECONDARY_CONTROLS))
6689 if (exec_control & SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES) {
6696 if (vmx->
nested.apic_access_page)
6697 nested_release_page(vmx->
nested.apic_access_page);
6698 vmx->
nested.apic_access_page =
6706 if (!vmx->
nested.apic_access_page)
6708 ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
6710 vmcs_write64(APIC_ACCESS_ADDR,
6714 vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
6724 vmx_set_constant_host_state();
6735 exec_control = vmx_exec_control(vmx);
6736 exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING;
6737 exec_control &= ~CPU_BASED_VIRTUAL_NMI_PENDING;
6738 exec_control &= ~CPU_BASED_TPR_SHADOW;
6744 exec_control &= ~CPU_BASED_USE_MSR_BITMAPS;
6745 exec_control &= ~CPU_BASED_USE_IO_BITMAPS;
6746 exec_control |= CPU_BASED_UNCOND_IO_EXITING;
6748 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, exec_control);
6754 update_exception_bitmap(vcpu);
6756 vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->
arch.cr0_guest_owned_bits);
6759 vmcs_write32(VM_EXIT_CONTROLS,
6762 (vmcs_config.vmentry_ctrl & ~VM_ENTRY_IA32E_MODE));
6766 else if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT)
6767 vmcs_write64(GUEST_IA32_PAT, vmx->
vcpu.arch.pat);
6770 set_cr4_guest_host_mask(vmx);
6773 vmcs_write64(TSC_OFFSET,
6776 vmcs_write64(TSC_OFFSET, vmx->
nested.vmcs01_tsc_offset);
6784 vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->
vpid);
6785 vmx_flush_tlb(vcpu);
6795 vmx_set_efer(vcpu, vcpu->
arch.efer);
6806 vmcs_writel(CR0_READ_SHADOW, nested_read_cr0(vmcs12));
6809 vmcs_writel(CR4_READ_SHADOW, nested_read_cr4(vmcs12));
6825 struct vmcs12 *vmcs12;
6826 struct vcpu_vmx *vmx = to_vmx(vcpu);
6828 struct loaded_vmcs *vmcs02;
6830 if (!nested_vmx_check_permission(vcpu) ||
6831 !nested_vmx_check_vmcs12(vcpu))
6834 skip_emulated_instruction(vcpu);
6835 vmcs12 = get_vmcs12(vcpu);
6848 nested_vmx_failValid(vcpu,
6849 launch ? VMXERR_VMLAUNCH_NONCLEAR_VMCS
6850 : VMXERR_VMRESUME_NONLAUNCHED_VMCS);
6857 nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
6861 if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES) &&
6864 nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
6873 nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
6878 nested_vmx_procbased_ctls_low, nested_vmx_procbased_ctls_high) ||
6880 nested_vmx_secondary_ctls_low, nested_vmx_secondary_ctls_high) ||
6882 nested_vmx_pinbased_ctls_low, nested_vmx_pinbased_ctls_high) ||
6884 nested_vmx_exit_ctls_low, nested_vmx_exit_ctls_high) ||
6886 nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high))
6888 nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
6892 if (((vmcs12->
host_cr0 & VMXON_CR0_ALWAYSON) != VMXON_CR0_ALWAYSON) ||
6893 ((vmcs12->
host_cr4 & VMXON_CR4_ALWAYSON) != VMXON_CR4_ALWAYSON)) {
6894 nested_vmx_failValid(vcpu,
6895 VMXERR_ENTRY_INVALID_HOST_STATE_FIELD);
6899 if (((vmcs12->
guest_cr0 & VMXON_CR0_ALWAYSON) != VMXON_CR0_ALWAYSON) ||
6900 ((vmcs12->
guest_cr4 & VMXON_CR4_ALWAYSON) != VMXON_CR4_ALWAYSON)) {
6901 nested_vmx_entry_failure(vcpu, vmcs12,
6906 nested_vmx_entry_failure(vcpu, vmcs12,
6916 vmcs02 = nested_get_current_vmcs02(vmx);
6920 enter_guest_mode(vcpu);
6922 vmx->
nested.vmcs01_tsc_offset = vmcs_read64(TSC_OFFSET);
6927 vmx_vcpu_load(vcpu, cpu);
6933 prepare_vmcs02(vcpu, vmcs12);
6961 static inline unsigned long
6962 vmcs12_guest_cr0(
struct kvm_vcpu *vcpu,
struct vmcs12 *vmcs12)
6965 (vmcs_readl(GUEST_CR0) & vcpu->
arch.cr0_guest_owned_bits) |
6968 vcpu->
arch.cr0_guest_owned_bits));
6971 static inline unsigned long
6972 vmcs12_guest_cr4(
struct kvm_vcpu *vcpu,
struct vmcs12 *vmcs12)
6975 (vmcs_readl(GUEST_CR4) & vcpu->
arch.cr4_guest_owned_bits) |
6978 vcpu->
arch.cr4_guest_owned_bits));
6995 vmcs12->
guest_cr0 = vmcs12_guest_cr0(vcpu, vmcs12);
6996 vmcs12->
guest_cr4 = vmcs12_guest_cr4(vcpu, vmcs12);
7042 vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
7044 vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS);
7063 vmcs_read32(IDT_VECTORING_INFO_FIELD);
7065 vmcs_read32(IDT_VECTORING_ERROR_CODE);
7091 vmx_set_efer(vcpu, vcpu->
arch.efer);
7107 update_exception_bitmap(vcpu);
7109 vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->
arch.cr0_guest_owned_bits);
7115 vcpu->
arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK);
7128 vmx_flush_tlb(vcpu);
7151 vmcs_write64(GUEST_IA32_PERF_GLOBAL_CTRL,
7160 static void nested_vmx_vmexit(
struct kvm_vcpu *vcpu)
7162 struct vcpu_vmx *vmx = to_vmx(vcpu);
7164 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
7166 leave_guest_mode(vcpu);
7172 vmx_vcpu_load(vcpu, cpu);
7178 nested_free_vmcs02(vmx, vmx->
nested.current_vmptr);
7183 vmcs_write64(TSC_OFFSET, vmx->
nested.vmcs01_tsc_offset);
7189 if (vmx->
nested.apic_access_page) {
7190 nested_release_page(vmx->
nested.apic_access_page);
7191 vmx->
nested.apic_access_page = 0;
7201 nested_vmx_failValid(vcpu, vmcs_read32(VM_INSTRUCTION_ERROR));
7203 nested_vmx_succeed(vcpu);
7213 static void nested_vmx_entry_failure(
struct kvm_vcpu *vcpu,
7214 struct vmcs12 *vmcs12,
7215 u32 reason,
unsigned long qualification)
7220 nested_vmx_succeed(vcpu);
7223 static int vmx_check_intercept(
struct kvm_vcpu *vcpu,
7232 .disabled_by_bios = vmx_disabled_by_bios,
7235 .check_processor_compatibility = vmx_check_processor_compat,
7238 .cpu_has_accelerated_tpr = report_flexpriority,
7240 .vcpu_create = vmx_create_vcpu,
7241 .vcpu_free = vmx_free_vcpu,
7242 .vcpu_reset = vmx_vcpu_reset,
7244 .prepare_guest_switch = vmx_save_host_state,
7245 .vcpu_load = vmx_vcpu_load,
7246 .vcpu_put = vmx_vcpu_put,
7248 .update_db_bp_intercept = update_exception_bitmap,
7249 .get_msr = vmx_get_msr,
7250 .set_msr = vmx_set_msr,
7251 .get_segment_base = vmx_get_segment_base,
7252 .get_segment = vmx_get_segment,
7253 .set_segment = vmx_set_segment,
7254 .get_cpl = vmx_get_cpl,
7255 .get_cs_db_l_bits = vmx_get_cs_db_l_bits,
7256 .decache_cr0_guest_bits = vmx_decache_cr0_guest_bits,
7257 .decache_cr3 = vmx_decache_cr3,
7258 .decache_cr4_guest_bits = vmx_decache_cr4_guest_bits,
7259 .set_cr0 = vmx_set_cr0,
7260 .set_cr3 = vmx_set_cr3,
7261 .set_cr4 = vmx_set_cr4,
7262 .set_efer = vmx_set_efer,
7263 .get_idt = vmx_get_idt,
7264 .set_idt = vmx_set_idt,
7265 .get_gdt = vmx_get_gdt,
7266 .set_gdt = vmx_set_gdt,
7267 .set_dr7 = vmx_set_dr7,
7268 .cache_reg = vmx_cache_reg,
7269 .get_rflags = vmx_get_rflags,
7270 .set_rflags = vmx_set_rflags,
7271 .fpu_activate = vmx_fpu_activate,
7272 .fpu_deactivate = vmx_fpu_deactivate,
7274 .tlb_flush = vmx_flush_tlb,
7276 .run = vmx_vcpu_run,
7277 .handle_exit = vmx_handle_exit,
7279 .set_interrupt_shadow = vmx_set_interrupt_shadow,
7280 .get_interrupt_shadow = vmx_get_interrupt_shadow,
7281 .patch_hypercall = vmx_patch_hypercall,
7282 .set_irq = vmx_inject_irq,
7283 .set_nmi = vmx_inject_nmi,
7284 .queue_exception = vmx_queue_exception,
7285 .cancel_injection = vmx_cancel_injection,
7286 .interrupt_allowed = vmx_interrupt_allowed,
7287 .nmi_allowed = vmx_nmi_allowed,
7288 .get_nmi_mask = vmx_get_nmi_mask,
7289 .set_nmi_mask = vmx_set_nmi_mask,
7294 .set_tss_addr = vmx_set_tss_addr,
7295 .get_tdp_level = get_ept_level,
7296 .get_mt_mask = vmx_get_mt_mask,
7298 .get_exit_info = vmx_get_exit_info,
7300 .get_lpage_level = vmx_get_lpage_level,
7302 .cpuid_update = vmx_cpuid_update,
7304 .rdtscp_supported = vmx_rdtscp_supported,
7305 .invpcid_supported = vmx_invpcid_supported,
7307 .set_supported_cpuid = vmx_set_supported_cpuid,
7309 .has_wbinvd_exit = cpu_has_vmx_wbinvd_exit,
7311 .set_tsc_khz = vmx_set_tsc_khz,
7312 .write_tsc_offset = vmx_write_tsc_offset,
7313 .adjust_tsc_offset = vmx_adjust_tsc_offset,
7314 .compute_tsc_offset = vmx_compute_tsc_offset,
7317 .set_tdp_cr3 = vmx_set_cr3,
7319 .check_intercept = vmx_check_intercept,
7322 static int __init vmx_init(
void)
7332 if (!vmx_io_bitmap_a)
7338 if (!vmx_io_bitmap_b)
7342 if (!vmx_msr_bitmap_legacy)
7347 if (!vmx_msr_bitmap_longmode)
7370 vmx_disable_intercept_for_msr(
MSR_FS_BASE,
false);
7371 vmx_disable_intercept_for_msr(
MSR_GS_BASE,
false);
7379 (enable_ept_ad_bits) ? VMX_EPT_ACCESS_BIT : 0ull,
7380 (enable_ept_ad_bits) ? VMX_EPT_DIRTY_BIT : 0ull,
7381 0ull, VMX_EPT_EXECUTABLE_MASK);
7382 ept_set_mmio_spte_mask();
7390 free_page((
unsigned long)vmx_msr_bitmap_longmode);
7392 free_page((
unsigned long)vmx_msr_bitmap_legacy);
7394 free_page((
unsigned long)vmx_io_bitmap_b);
7396 free_page((
unsigned long)vmx_io_bitmap_a);
7400 static void __exit vmx_exit(
void)
7402 free_page((
unsigned long)vmx_msr_bitmap_legacy);
7403 free_page((
unsigned long)vmx_msr_bitmap_longmode);
7404 free_page((
unsigned long)vmx_io_bitmap_b);
7405 free_page((
unsigned long)vmx_io_bitmap_a);