]> git.karo-electronics.de Git - karo-tx-linux.git/blobdiff - arch/x86/kvm/vmx.c
kvm: better MWAIT emulation for guests
[karo-tx-linux.git] / arch / x86 / kvm / vmx.c
index 283aa8601833509b9cf792b919dd3f243a78f389..a4ef6371810106bef15aa649badef4ff9d8ac18d 100644 (file)
@@ -84,9 +84,6 @@ module_param_named(eptad, enable_ept_ad_bits, bool, S_IRUGO);
 static bool __read_mostly emulate_invalid_guest_state = true;
 module_param(emulate_invalid_guest_state, bool, S_IRUGO);
 
-static bool __read_mostly vmm_exclusive = 1;
-module_param(vmm_exclusive, bool, S_IRUGO);
-
 static bool __read_mostly fasteoi = 1;
 module_param(fasteoi, bool, S_IRUGO);
 
@@ -615,10 +612,6 @@ struct vcpu_vmx {
        int vpid;
        bool emulation_required;
 
-       /* Support for vnmi-less CPUs */
-       int soft_vnmi_blocked;
-       ktime_t entry_time;
-       s64 vnmi_blocked_time;
        u32 exit_reason;
 
        /* Posted interrupt descriptor */
@@ -914,8 +907,6 @@ static void nested_release_page_clean(struct page *page)
 
 static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu);
 static u64 construct_eptp(unsigned long root_hpa);
-static void kvm_cpu_vmxon(u64 addr);
-static void kvm_cpu_vmxoff(void);
 static bool vmx_xsaves_supported(void);
 static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr);
 static void vmx_set_segment(struct kvm_vcpu *vcpu,
@@ -1239,6 +1230,11 @@ static inline bool cpu_has_vmx_invvpid_global(void)
        return vmx_capability.vpid & VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT;
 }
 
+static inline bool cpu_has_vmx_invvpid(void)
+{
+       return vmx_capability.vpid & VMX_VPID_INVVPID_BIT;
+}
+
 static inline bool cpu_has_vmx_ept(void)
 {
        return vmcs_config.cpu_based_2nd_exec_ctrl &
@@ -1285,11 +1281,6 @@ static inline bool cpu_has_vmx_invpcid(void)
                SECONDARY_EXEC_ENABLE_INVPCID;
 }
 
-static inline bool cpu_has_virtual_nmis(void)
-{
-       return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS;
-}
-
 static inline bool cpu_has_vmx_wbinvd_exit(void)
 {
        return vmcs_config.cpu_based_2nd_exec_ctrl &
@@ -2235,15 +2226,10 @@ static void decache_tsc_multiplier(struct vcpu_vmx *vmx)
 static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
-       u64 phys_addr = __pa(per_cpu(vmxarea, cpu));
        bool already_loaded = vmx->loaded_vmcs->cpu == cpu;
 
-       if (!vmm_exclusive)
-               kvm_cpu_vmxon(phys_addr);
-       else if (!already_loaded)
-               loaded_vmcs_clear(vmx->loaded_vmcs);
-
        if (!already_loaded) {
+               loaded_vmcs_clear(vmx->loaded_vmcs);
                local_irq_disable();
                crash_disable_local_vmclear(cpu);
 
@@ -2321,11 +2307,6 @@ static void vmx_vcpu_put(struct kvm_vcpu *vcpu)
        vmx_vcpu_pi_put(vcpu);
 
        __vmx_load_host_state(to_vmx(vcpu));
-       if (!vmm_exclusive) {
-               __loaded_vmcs_clear(to_vmx(vcpu)->loaded_vmcs);
-               vcpu->cpu = -1;
-               kvm_cpu_vmxoff();
-       }
 }
 
 static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu);
@@ -2749,11 +2730,11 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
                vmx->nested.nested_vmx_secondary_ctls_high);
        vmx->nested.nested_vmx_secondary_ctls_low = 0;
        vmx->nested.nested_vmx_secondary_ctls_high &=
+               SECONDARY_EXEC_RDRAND | SECONDARY_EXEC_RDSEED |
                SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
                SECONDARY_EXEC_RDTSCP |
                SECONDARY_EXEC_DESC |
                SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
-               SECONDARY_EXEC_ENABLE_VPID |
                SECONDARY_EXEC_APIC_REGISTER_VIRT |
                SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
                SECONDARY_EXEC_WBINVD_EXITING |
@@ -2764,14 +2745,16 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
                vmx->nested.nested_vmx_secondary_ctls_high |=
                        SECONDARY_EXEC_ENABLE_EPT;
                vmx->nested.nested_vmx_ept_caps = VMX_EPT_PAGE_WALK_4_BIT |
-                        VMX_EPTP_WB_BIT | VMX_EPT_2MB_PAGE_BIT |
-                        VMX_EPT_INVEPT_BIT;
+                        VMX_EPTP_WB_BIT | VMX_EPT_INVEPT_BIT;
                if (cpu_has_vmx_ept_execute_only())
                        vmx->nested.nested_vmx_ept_caps |=
                                VMX_EPT_EXECUTE_ONLY_BIT;
                vmx->nested.nested_vmx_ept_caps &= vmx_capability.ept;
                vmx->nested.nested_vmx_ept_caps |= VMX_EPT_EXTENT_GLOBAL_BIT |
-                       VMX_EPT_EXTENT_CONTEXT_BIT;
+                       VMX_EPT_EXTENT_CONTEXT_BIT | VMX_EPT_2MB_PAGE_BIT |
+                       VMX_EPT_1GB_PAGE_BIT;
+              if (enable_ept_ad_bits)
+                      vmx->nested.nested_vmx_ept_caps |= VMX_EPT_AD_BIT;
        } else
                vmx->nested.nested_vmx_ept_caps = 0;
 
@@ -2781,10 +2764,12 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
         * though it is treated as global context.  The alternative is
         * not failing the single-context invvpid, and it is worse.
         */
-       if (enable_vpid)
+       if (enable_vpid) {
+               vmx->nested.nested_vmx_secondary_ctls_high |=
+                       SECONDARY_EXEC_ENABLE_VPID;
                vmx->nested.nested_vmx_vpid_caps = VMX_VPID_INVVPID_BIT |
                        VMX_VPID_EXTENT_SUPPORTED_MASK;
-       else
+       else
                vmx->nested.nested_vmx_vpid_caps = 0;
 
        if (enable_unrestricted_guest)
@@ -3416,6 +3401,7 @@ static __init int vmx_disabled_by_bios(void)
 
 static void kvm_cpu_vmxon(u64 addr)
 {
+       cr4_set_bits(X86_CR4_VMXE);
        intel_pt_handle_vmx(1);
 
        asm volatile (ASM_VMX_VMXON_RAX
@@ -3458,12 +3444,8 @@ static int hardware_enable(void)
                /* enable and lock */
                wrmsrl(MSR_IA32_FEATURE_CONTROL, old | test_bits);
        }
-       cr4_set_bits(X86_CR4_VMXE);
-
-       if (vmm_exclusive) {
-               kvm_cpu_vmxon(phys_addr);
-               ept_sync_global();
-       }
+       kvm_cpu_vmxon(phys_addr);
+       ept_sync_global();
 
        native_store_gdt(this_cpu_ptr(&host_gdt));
 
@@ -3489,15 +3471,13 @@ static void kvm_cpu_vmxoff(void)
        asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc");
 
        intel_pt_handle_vmx(0);
+       cr4_clear_bits(X86_CR4_VMXE);
 }
 
 static void hardware_disable(void)
 {
-       if (vmm_exclusive) {
-               vmclear_local_loaded_vmcss();
-               kvm_cpu_vmxoff();
-       }
-       cr4_clear_bits(X86_CR4_VMXE);
+       vmclear_local_loaded_vmcss();
+       kvm_cpu_vmxoff();
 }
 
 static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt,
@@ -3547,11 +3527,13 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
              CPU_BASED_USE_IO_BITMAPS |
              CPU_BASED_MOV_DR_EXITING |
              CPU_BASED_USE_TSC_OFFSETING |
-             CPU_BASED_MWAIT_EXITING |
-             CPU_BASED_MONITOR_EXITING |
              CPU_BASED_INVLPG_EXITING |
              CPU_BASED_RDPMC_EXITING;
 
+       if (!kvm_mwait_in_guest())
+               min |= CPU_BASED_MWAIT_EXITING |
+                       CPU_BASED_MONITOR_EXITING;
+
        opt = CPU_BASED_TPR_SHADOW |
              CPU_BASED_USE_MSR_BITMAPS |
              CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
@@ -3617,9 +3599,9 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
                                &_vmexit_control) < 0)
                return -EIO;
 
-       min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING;
-       opt = PIN_BASED_VIRTUAL_NMIS | PIN_BASED_POSTED_INTR |
-                PIN_BASED_VMX_PREEMPTION_TIMER;
+       min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING |
+               PIN_BASED_VIRTUAL_NMIS;
+       opt = PIN_BASED_POSTED_INTR | PIN_BASED_VMX_PREEMPTION_TIMER;
        if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS,
                                &_pin_based_exec_control) < 0)
                return -EIO;
@@ -4011,11 +3993,12 @@ static void exit_lmode(struct kvm_vcpu *vcpu)
 
 static inline void __vmx_flush_tlb(struct kvm_vcpu *vcpu, int vpid)
 {
-       vpid_sync_context(vpid);
        if (enable_ept) {
                if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
                        return;
                ept_sync_context(construct_eptp(vcpu->arch.mmu.root_hpa));
+       } else {
+               vpid_sync_context(vpid);
        }
 }
 
@@ -4024,6 +4007,12 @@ static void vmx_flush_tlb(struct kvm_vcpu *vcpu)
        __vmx_flush_tlb(vcpu, to_vmx(vcpu)->vpid);
 }
 
+static void vmx_flush_tlb_ept_only(struct kvm_vcpu *vcpu)
+{
+       if (enable_ept)
+               vmx_flush_tlb(vcpu);
+}
+
 static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu)
 {
        ulong cr0_guest_owned_bits = vcpu->arch.cr0_guest_owned_bits;
@@ -5285,8 +5274,6 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 
        vmx->rmode.vm86_active = 0;
 
-       vmx->soft_vnmi_blocked = 0;
-
        vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
        kvm_set_cr8(vcpu, 0);
 
@@ -5406,8 +5393,7 @@ static void enable_irq_window(struct kvm_vcpu *vcpu)
 
 static void enable_nmi_window(struct kvm_vcpu *vcpu)
 {
-       if (!cpu_has_virtual_nmis() ||
-           vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) {
+       if (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) {
                enable_irq_window(vcpu);
                return;
        }
@@ -5448,19 +5434,6 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
        struct vcpu_vmx *vmx = to_vmx(vcpu);
 
        if (!is_guest_mode(vcpu)) {
-               if (!cpu_has_virtual_nmis()) {
-                       /*
-                        * Tracking the NMI-blocked state in software is built upon
-                        * finding the next open IRQ window. This, in turn, depends on
-                        * well-behaving guests: They have to keep IRQs disabled at
-                        * least as long as the NMI handler runs. Otherwise we may
-                        * cause NMI nesting, maybe breaking the guest. But as this is
-                        * highly unlikely, we can live with the residual risk.
-                        */
-                       vmx->soft_vnmi_blocked = 1;
-                       vmx->vnmi_blocked_time = 0;
-               }
-
                ++vcpu->stat.nmi_injections;
                vmx->nmi_known_unmasked = false;
        }
@@ -5477,8 +5450,6 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
 
 static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu)
 {
-       if (!cpu_has_virtual_nmis())
-               return to_vmx(vcpu)->soft_vnmi_blocked;
        if (to_vmx(vcpu)->nmi_known_unmasked)
                return false;
        return vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_NMI;
@@ -5488,20 +5459,13 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
 {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
 
-       if (!cpu_has_virtual_nmis()) {
-               if (vmx->soft_vnmi_blocked != masked) {
-                       vmx->soft_vnmi_blocked = masked;
-                       vmx->vnmi_blocked_time = 0;
-               }
-       } else {
-               vmx->nmi_known_unmasked = !masked;
-               if (masked)
-                       vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
-                                     GUEST_INTR_STATE_NMI);
-               else
-                       vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
-                                       GUEST_INTR_STATE_NMI);
-       }
+       vmx->nmi_known_unmasked = !masked;
+       if (masked)
+               vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
+                             GUEST_INTR_STATE_NMI);
+       else
+               vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
+                               GUEST_INTR_STATE_NMI);
 }
 
 static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
@@ -5509,9 +5473,6 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
        if (to_vmx(vcpu)->nested.nested_run_pending)
                return 0;
 
-       if (!cpu_has_virtual_nmis() && to_vmx(vcpu)->soft_vnmi_blocked)
-               return 0;
-
        return  !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
                  (GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_STI
                   | GUEST_INTR_STATE_NMI));
@@ -6232,21 +6193,18 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
        unsigned long exit_qualification;
        gpa_t gpa;
        u32 error_code;
-       int gla_validity;
 
        exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
 
-       gla_validity = (exit_qualification >> 7) & 0x3;
-       if (gla_validity == 0x2) {
-               printk(KERN_ERR "EPT: Handling EPT violation failed!\n");
-               printk(KERN_ERR "EPT: GPA: 0x%lx, GVA: 0x%lx\n",
-                       (long unsigned int)vmcs_read64(GUEST_PHYSICAL_ADDRESS),
-                       vmcs_readl(GUEST_LINEAR_ADDRESS));
-               printk(KERN_ERR "EPT: Exit qualification is 0x%lx\n",
-                       (long unsigned int)exit_qualification);
-               vcpu->run->exit_reason = KVM_EXIT_UNKNOWN;
-               vcpu->run->hw.hardware_exit_reason = EXIT_REASON_EPT_VIOLATION;
-               return 0;
+       if (is_guest_mode(vcpu)
+           && !(exit_qualification & EPT_VIOLATION_GVA_TRANSLATED)) {
+               /*
+                * Fix up exit_qualification according to whether guest
+                * page table accesses are reads or writes.
+                */
+               u64 eptp = nested_ept_get_cr3(vcpu);
+               if (!(eptp & VMX_EPT_AD_ENABLE_BIT))
+                       exit_qualification &= ~EPT_VIOLATION_ACC_WRITE;
        }
 
        /*
@@ -6256,7 +6214,6 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
         * AAK134, BY25.
         */
        if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
-                       cpu_has_virtual_nmis() &&
                        (exit_qualification & INTR_INFO_UNBLOCK_NMI))
                vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, GUEST_INTR_STATE_NMI);
 
@@ -6517,8 +6474,10 @@ static __init int hardware_setup(void)
        if (boot_cpu_has(X86_FEATURE_NX))
                kvm_enable_efer_bits(EFER_NX);
 
-       if (!cpu_has_vmx_vpid())
+       if (!cpu_has_vmx_vpid() || !cpu_has_vmx_invvpid() ||
+               !(cpu_has_vmx_invvpid_single() || cpu_has_vmx_invvpid_global()))
                enable_vpid = 0;
+
        if (!cpu_has_vmx_shadow_vmcs())
                enable_shadow_vmcs = 0;
        if (enable_shadow_vmcs)
@@ -7258,9 +7217,8 @@ static int handle_vmoff(struct kvm_vcpu *vcpu)
 static int handle_vmclear(struct kvm_vcpu *vcpu)
 {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
+       u32 zero = 0;
        gpa_t vmptr;
-       struct vmcs12 *vmcs12;
-       struct page *page;
 
        if (!nested_vmx_check_permission(vcpu))
                return 1;
@@ -7271,22 +7229,9 @@ static int handle_vmclear(struct kvm_vcpu *vcpu)
        if (vmptr == vmx->nested.current_vmptr)
                nested_release_vmcs12(vmx);
 
-       page = nested_get_page(vcpu, vmptr);
-       if (page == NULL) {
-               /*
-                * For accurate processor emulation, VMCLEAR beyond available
-                * physical memory should do nothing at all. However, it is
-                * possible that a nested vmx bug, not a guest hypervisor bug,
-                * resulted in this case, so let's shut down before doing any
-                * more damage:
-                */
-               kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
-               return 1;
-       }
-       vmcs12 = kmap(page);
-       vmcs12->launch_state = 0;
-       kunmap(page);
-       nested_release_page(page);
+       kvm_vcpu_write_guest(vcpu,
+                       vmptr + offsetof(struct vmcs12, launch_state),
+                       &zero, sizeof(zero));
 
        nested_free_vmcs02(vmx, vmptr);
 
@@ -7819,7 +7764,6 @@ static int handle_pml_full(struct kvm_vcpu *vcpu)
         * "blocked by NMI" bit has to be set before next VM entry.
         */
        if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
-                       cpu_has_virtual_nmis() &&
                        (exit_qualification & INTR_INFO_UNBLOCK_NMI))
                vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
                                GUEST_INTR_STATE_NMI);
@@ -8121,6 +8065,10 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
                return nested_cpu_has(vmcs12, CPU_BASED_INVLPG_EXITING);
        case EXIT_REASON_RDPMC:
                return nested_cpu_has(vmcs12, CPU_BASED_RDPMC_EXITING);
+       case EXIT_REASON_RDRAND:
+               return nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDRAND);
+       case EXIT_REASON_RDSEED:
+               return nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDSEED);
        case EXIT_REASON_RDTSC: case EXIT_REASON_RDTSCP:
                return nested_cpu_has(vmcs12, CPU_BASED_RDTSC_EXITING);
        case EXIT_REASON_VMCALL: case EXIT_REASON_VMCLEAR:
@@ -8491,31 +8439,12 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
                return 0;
        }
 
-       if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked &&
-           !(is_guest_mode(vcpu) && nested_cpu_has_virtual_nmis(
-                                       get_vmcs12(vcpu))))) {
-               if (vmx_interrupt_allowed(vcpu)) {
-                       vmx->soft_vnmi_blocked = 0;
-               } else if (vmx->vnmi_blocked_time > 1000000000LL &&
-                          vcpu->arch.nmi_pending) {
-                       /*
-                        * This CPU don't support us in finding the end of an
-                        * NMI-blocked window if the guest runs with IRQs
-                        * disabled. So we pull the trigger after 1 s of
-                        * futile waiting, but inform the user about this.
-                        */
-                       printk(KERN_WARNING "%s: Breaking out of NMI-blocked "
-                              "state on VCPU %d after 1 s timeout\n",
-                              __func__, vcpu->vcpu_id);
-                       vmx->soft_vnmi_blocked = 0;
-               }
-       }
-
        if (exit_reason < kvm_vmx_max_exit_handlers
            && kvm_vmx_exit_handlers[exit_reason])
                return kvm_vmx_exit_handlers[exit_reason](vcpu);
        else {
-               WARN_ONCE(1, "vmx: unexpected exit reason 0x%x\n", exit_reason);
+               vcpu_unimpl(vcpu, "vmx: unexpected exit reason 0x%x\n",
+                               exit_reason);
                kvm_queue_exception(vcpu, UD_VECTOR);
                return 1;
        }
@@ -8561,6 +8490,7 @@ static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
        } else {
                sec_exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
                sec_exec_control |= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
+               vmx_flush_tlb_ept_only(vcpu);
        }
        vmcs_write32(SECONDARY_VM_EXEC_CONTROL, sec_exec_control);
 
@@ -8586,8 +8516,10 @@ static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu, hpa_t hpa)
         */
        if (!is_guest_mode(vcpu) ||
            !nested_cpu_has2(get_vmcs12(&vmx->vcpu),
-                            SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES))
+                            SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
                vmcs_write64(APIC_ACCESS_ADDR, hpa);
+               vmx_flush_tlb_ept_only(vcpu);
+       }
 }
 
 static void vmx_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr)
@@ -8782,37 +8714,33 @@ static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)
 
        idtv_info_valid = vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK;
 
-       if (cpu_has_virtual_nmis()) {
-               if (vmx->nmi_known_unmasked)
-                       return;
-               /*
-                * Can't use vmx->exit_intr_info since we're not sure what
-                * the exit reason is.
-                */
-               exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
-               unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0;
-               vector = exit_intr_info & INTR_INFO_VECTOR_MASK;
-               /*
-                * SDM 3: 27.7.1.2 (September 2008)
-                * Re-set bit "block by NMI" before VM entry if vmexit caused by
-                * a guest IRET fault.
-                * SDM 3: 23.2.2 (September 2008)
-                * Bit 12 is undefined in any of the following cases:
-                *  If the VM exit sets the valid bit in the IDT-vectoring
-                *   information field.
-                *  If the VM exit is due to a double fault.
-                */
-               if ((exit_intr_info & INTR_INFO_VALID_MASK) && unblock_nmi &&
-                   vector != DF_VECTOR && !idtv_info_valid)
-                       vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
-                                     GUEST_INTR_STATE_NMI);
-               else
-                       vmx->nmi_known_unmasked =
-                               !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO)
-                                 & GUEST_INTR_STATE_NMI);
-       } else if (unlikely(vmx->soft_vnmi_blocked))
-               vmx->vnmi_blocked_time +=
-                       ktime_to_ns(ktime_sub(ktime_get(), vmx->entry_time));
+       if (vmx->nmi_known_unmasked)
+               return;
+       /*
+        * Can't use vmx->exit_intr_info since we're not sure what
+        * the exit reason is.
+        */
+       exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
+       unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0;
+       vector = exit_intr_info & INTR_INFO_VECTOR_MASK;
+       /*
+        * SDM 3: 27.7.1.2 (September 2008)
+        * Re-set bit "block by NMI" before VM entry if vmexit caused by
+        * a guest IRET fault.
+        * SDM 3: 23.2.2 (September 2008)
+        * Bit 12 is undefined in any of the following cases:
+        *  If the VM exit sets the valid bit in the IDT-vectoring
+        *   information field.
+        *  If the VM exit is due to a double fault.
+        */
+       if ((exit_intr_info & INTR_INFO_VALID_MASK) && unblock_nmi &&
+           vector != DF_VECTOR && !idtv_info_valid)
+               vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
+                             GUEST_INTR_STATE_NMI);
+       else
+               vmx->nmi_known_unmasked =
+                       !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO)
+                         & GUEST_INTR_STATE_NMI);
 }
 
 static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu,
@@ -8929,10 +8857,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
        struct vcpu_vmx *vmx = to_vmx(vcpu);
        unsigned long debugctlmsr, cr4;
 
-       /* Record the guest's net vcpu time for enforced NMI injections. */
-       if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked))
-               vmx->entry_time = ktime_get();
-
        /* Don't enter VMX if guest state is invalid, let the exit handler
           start emulation until we arrive back to a valid state */
        if (vmx->emulation_required)
@@ -9140,16 +9064,16 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
        vmx_complete_interrupts(vmx);
 }
 
-static void vmx_load_vmcs01(struct kvm_vcpu *vcpu)
+static void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs)
 {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
        int cpu;
 
-       if (vmx->loaded_vmcs == &vmx->vmcs01)
+       if (vmx->loaded_vmcs == vmcs)
                return;
 
        cpu = get_cpu();
-       vmx->loaded_vmcs = &vmx->vmcs01;
+       vmx->loaded_vmcs = vmcs;
        vmx_vcpu_put(vcpu);
        vmx_vcpu_load(vcpu, cpu);
        vcpu->cpu = cpu;
@@ -9167,7 +9091,7 @@ static void vmx_free_vcpu_nested(struct kvm_vcpu *vcpu)
 
        r = vcpu_load(vcpu);
        BUG_ON(r);
-       vmx_load_vmcs01(vcpu);
+       vmx_switch_vmcs(vcpu, &vmx->vmcs01);
        free_nested(vmx);
        vcpu_put(vcpu);
 }
@@ -9228,11 +9152,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
        vmx->loaded_vmcs->shadow_vmcs = NULL;
        if (!vmx->loaded_vmcs->vmcs)
                goto free_msrs;
-       if (!vmm_exclusive)
-               kvm_cpu_vmxon(__pa(per_cpu(vmxarea, raw_smp_processor_id())));
        loaded_vmcs_init(vmx->loaded_vmcs);
-       if (!vmm_exclusive)
-               kvm_cpu_vmxoff();
 
        cpu = get_cpu();
        vmx_vcpu_load(&vmx->vcpu, cpu);
@@ -9492,17 +9412,26 @@ static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu)
        return get_vmcs12(vcpu)->ept_pointer;
 }
 
-static void nested_ept_init_mmu_context(struct kvm_vcpu *vcpu)
+static int nested_ept_init_mmu_context(struct kvm_vcpu *vcpu)
 {
+       u64 eptp;
+
        WARN_ON(mmu_is_nested(vcpu));
+       eptp = nested_ept_get_cr3(vcpu);
+       if ((eptp & VMX_EPT_AD_ENABLE_BIT) && !enable_ept_ad_bits)
+               return 1;
+
+       kvm_mmu_unload(vcpu);
        kvm_init_shadow_ept_mmu(vcpu,
                        to_vmx(vcpu)->nested.nested_vmx_ept_caps &
-                       VMX_EPT_EXECUTE_ONLY_BIT);
+                       VMX_EPT_EXECUTE_ONLY_BIT,
+                       eptp & VMX_EPT_AD_ENABLE_BIT);
        vcpu->arch.mmu.set_cr3           = vmx_set_cr3;
        vcpu->arch.mmu.get_cr3           = nested_ept_get_cr3;
        vcpu->arch.mmu.inject_page_fault = nested_ept_inject_page_fault;
 
        vcpu->arch.walk_mmu              = &vcpu->arch.nested_mmu;
+       return 0;
 }
 
 static void nested_ept_uninit_mmu_context(struct kvm_vcpu *vcpu)
@@ -9694,10 +9623,8 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu,
                return false;
 
        page = nested_get_page(vcpu, vmcs12->msr_bitmap);
-       if (!page) {
-               WARN_ON(1);
+       if (!page)
                return false;
-       }
        msr_bitmap_l1 = (unsigned long *)kmap(page);
 
        memset(msr_bitmap_l0, 0xff, PAGE_SIZE);
@@ -9990,7 +9917,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
 {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
        u32 exec_control;
-       bool nested_ept_enabled = false;
 
        vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector);
        vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector);
@@ -10137,8 +10063,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
                                vmcs12->guest_intr_status);
                }
 
-               nested_ept_enabled = (exec_control & SECONDARY_EXEC_ENABLE_EPT) != 0;
-
                /*
                 * Write an illegal value to APIC_ACCESS_ADDR. Later,
                 * nested_get_vmcs12_pages will either fix it up or
@@ -10269,8 +10193,13 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
        }
 
        if (nested_cpu_has_ept(vmcs12)) {
-               kvm_mmu_unload(vcpu);
-               nested_ept_init_mmu_context(vcpu);
+               if (nested_ept_init_mmu_context(vcpu)) {
+                       *entry_failure_code = ENTRY_FAIL_DEFAULT;
+                       return 1;
+               }
+       } else if (nested_cpu_has2(vmcs12,
+                                  SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
+               vmx_flush_tlb_ept_only(vcpu);
        }
 
        /*
@@ -10298,12 +10227,10 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
        vmx_set_efer(vcpu, vcpu->arch.efer);
 
        /* Shadow page tables on either EPT or shadow page tables. */
-       if (nested_vmx_load_cr3(vcpu, vmcs12->guest_cr3, nested_ept_enabled,
+       if (nested_vmx_load_cr3(vcpu, vmcs12->guest_cr3, nested_cpu_has_ept(vmcs12),
                                entry_failure_code))
                return 1;
 
-       kvm_mmu_reset_context(vcpu);
-
        if (!enable_ept)
                vcpu->arch.walk_mmu->inject_page_fault = vmx_inject_page_fault_nested;
 
@@ -10423,7 +10350,6 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry)
        struct vcpu_vmx *vmx = to_vmx(vcpu);
        struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
        struct loaded_vmcs *vmcs02;
-       int cpu;
        u32 msr_entry_idx;
        u32 exit_qual;
 
@@ -10436,18 +10362,12 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry)
        if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS))
                vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
 
-       cpu = get_cpu();
-       vmx->loaded_vmcs = vmcs02;
-       vmx_vcpu_put(vcpu);
-       vmx_vcpu_load(vcpu, cpu);
-       vcpu->cpu = cpu;
-       put_cpu();
-
+       vmx_switch_vmcs(vcpu, vmcs02);
        vmx_segment_cache_clear(vmx);
 
        if (prepare_vmcs02(vcpu, vmcs12, from_vmentry, &exit_qual)) {
                leave_guest_mode(vcpu);
-               vmx_load_vmcs01(vcpu);
+               vmx_switch_vmcs(vcpu, &vmx->vmcs01);
                nested_vmx_entry_failure(vcpu, vmcs12,
                                         EXIT_REASON_INVALID_STATE, exit_qual);
                return 1;
@@ -10460,7 +10380,7 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry)
                                            vmcs12->vm_entry_msr_load_count);
        if (msr_entry_idx) {
                leave_guest_mode(vcpu);
-               vmx_load_vmcs01(vcpu);
+               vmx_switch_vmcs(vcpu, &vmx->vmcs01);
                nested_vmx_entry_failure(vcpu, vmcs12,
                                EXIT_REASON_MSR_LOAD_FAIL, msr_entry_idx);
                return 1;
@@ -11028,7 +10948,7 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
        if (unlikely(vmx->fail))
                vm_inst_error = vmcs_read32(VM_INSTRUCTION_ERROR);
 
-       vmx_load_vmcs01(vcpu);
+       vmx_switch_vmcs(vcpu, &vmx->vmcs01);
 
        if ((exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT)
            && nested_exit_intr_ack_set(vcpu)) {
@@ -11072,6 +10992,10 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
                vmx->nested.change_vmcs01_virtual_x2apic_mode = false;
                vmx_set_virtual_x2apic_mode(vcpu,
                                vcpu->arch.apic_base & X2APIC_ENABLE);
+       } else if (!nested_cpu_has_ept(vmcs12) &&
+                  nested_cpu_has2(vmcs12,
+                                  SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
+               vmx_flush_tlb_ept_only(vcpu);
        }
 
        /* This is needed for same reason as it was needed in prepare_vmcs02 */
@@ -11121,8 +11045,10 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
  */
 static void vmx_leave_nested(struct kvm_vcpu *vcpu)
 {
-       if (is_guest_mode(vcpu))
+       if (is_guest_mode(vcpu)) {
+               to_vmx(vcpu)->nested.nested_run_pending = 0;
                nested_vmx_vmexit(vcpu, -1, 0, 0);
+       }
        free_nested(to_vmx(vcpu));
 }