]> git.karo-electronics.de Git - mv-sheeva.git/blobdiff - arch/x86/kvm/vmx.c
KVM: VMX: Support Unrestricted Guest feature
[mv-sheeva.git] / arch / x86 / kvm / vmx.c
index 29f912927a588bda4e9a3fc06ea2412ec04b5ebc..f0f9773f0b0f9079329edb0057f410945cd2ff06 100644 (file)
@@ -51,6 +51,10 @@ module_param_named(flexpriority, flexpriority_enabled, bool, S_IRUGO);
 static int __read_mostly enable_ept = 1;
 module_param_named(ept, enable_ept, bool, S_IRUGO);
 
+static int __read_mostly enable_unrestricted_guest = 1;
+module_param_named(unrestricted_guest,
+                       enable_unrestricted_guest, bool, S_IRUGO);
+
 static int __read_mostly emulate_invalid_guest_state = 0;
 module_param(emulate_invalid_guest_state, bool, S_IRUGO);
 
@@ -161,6 +165,8 @@ static struct kvm_vmx_segment_field {
        VMX_SEGMENT_FIELD(LDTR),
 };
 
+static void ept_save_pdptrs(struct kvm_vcpu *vcpu);
+
 /*
  * Keep MSR_K6_STAR at the end, as setup_msrs() will try to optimize it
  * away by decrementing the array size.
@@ -277,6 +283,12 @@ static inline int cpu_has_vmx_ept(void)
                SECONDARY_EXEC_ENABLE_EPT;
 }
 
+static inline int cpu_has_vmx_unrestricted_guest(void)
+{
+       return vmcs_config.cpu_based_2nd_exec_ctrl &
+               SECONDARY_EXEC_UNRESTRICTED_GUEST;
+}
+
 static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm)
 {
        return flexpriority_enabled &&
@@ -801,8 +813,9 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
                vmx->rmode.irq.pending = true;
                vmx->rmode.irq.vector = nr;
                vmx->rmode.irq.rip = kvm_rip_read(vcpu);
-               if (nr == BP_VECTOR || nr == OF_VECTOR)
-                       vmx->rmode.irq.rip++;
+               if (kvm_exception_is_soft(nr))
+                       vmx->rmode.irq.rip +=
+                               vmx->vcpu.arch.event_exit_inst_len;
                intr_info |= INTR_TYPE_SOFT_INTR;
                vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info);
                vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1);
@@ -940,7 +953,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
        case MSR_EFER:
                return kvm_get_msr_common(vcpu, msr_index, pdata);
 #endif
-       case MSR_IA32_TIME_STAMP_COUNTER:
+       case MSR_IA32_TSC:
                data = guest_read_tsc();
                break;
        case MSR_IA32_SYSENTER_CS:
@@ -1000,7 +1013,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
        case MSR_IA32_SYSENTER_ESP:
                vmcs_writel(GUEST_SYSENTER_ESP, data);
                break;
-       case MSR_IA32_TIME_STAMP_COUNTER:
+       case MSR_IA32_TSC:
                rdtscll(host_tsc);
                guest_write_tsc(data, host_tsc);
                break;
@@ -1046,6 +1059,10 @@ static void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
        case VCPU_REGS_RIP:
                vcpu->arch.regs[VCPU_REGS_RIP] = vmcs_readl(GUEST_RIP);
                break;
+       case VCPU_EXREG_PDPTR:
+               if (enable_ept)
+                       ept_save_pdptrs(vcpu);
+               break;
        default:
                break;
        }
@@ -1203,7 +1220,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
                opt2 = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
                        SECONDARY_EXEC_WBINVD_EXITING |
                        SECONDARY_EXEC_ENABLE_VPID |
-                       SECONDARY_EXEC_ENABLE_EPT;
+                       SECONDARY_EXEC_ENABLE_EPT |
+                       SECONDARY_EXEC_UNRESTRICTED_GUEST;
                if (adjust_vmx_controls(min2, opt2,
                                        MSR_IA32_VMX_PROCBASED_CTLS2,
                                        &_cpu_based_2nd_exec_control) < 0)
@@ -1333,8 +1351,13 @@ static __init int hardware_setup(void)
        if (!cpu_has_vmx_vpid())
                enable_vpid = 0;
 
-       if (!cpu_has_vmx_ept())
+       if (!cpu_has_vmx_ept()) {
                enable_ept = 0;
+               enable_unrestricted_guest = 0;
+       }
+
+       if (!cpu_has_vmx_unrestricted_guest())
+               enable_unrestricted_guest = 0;
 
        if (!cpu_has_vmx_flexpriority())
                flexpriority_enabled = 0;
@@ -1433,6 +1456,9 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
        unsigned long flags;
        struct vcpu_vmx *vmx = to_vmx(vcpu);
 
+       if (enable_unrestricted_guest)
+               return;
+
        vmx->emulation_required = 1;
        vcpu->arch.rmode.vm86_active = 1;
 
@@ -1545,11 +1571,11 @@ static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu)
 
 static void ept_load_pdptrs(struct kvm_vcpu *vcpu)
 {
+       if (!test_bit(VCPU_EXREG_PDPTR,
+                     (unsigned long *)&vcpu->arch.regs_dirty))
+               return;
+
        if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) {
-               if (!load_pdptrs(vcpu, vcpu->arch.cr3)) {
-                       printk(KERN_ERR "EPT: Fail to load pdptrs!\n");
-                       return;
-               }
                vmcs_write64(GUEST_PDPTR0, vcpu->arch.pdptrs[0]);
                vmcs_write64(GUEST_PDPTR1, vcpu->arch.pdptrs[1]);
                vmcs_write64(GUEST_PDPTR2, vcpu->arch.pdptrs[2]);
@@ -1557,6 +1583,21 @@ static void ept_load_pdptrs(struct kvm_vcpu *vcpu)
        }
 }
 
+static void ept_save_pdptrs(struct kvm_vcpu *vcpu)
+{
+       if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) {
+               vcpu->arch.pdptrs[0] = vmcs_read64(GUEST_PDPTR0);
+               vcpu->arch.pdptrs[1] = vmcs_read64(GUEST_PDPTR1);
+               vcpu->arch.pdptrs[2] = vmcs_read64(GUEST_PDPTR2);
+               vcpu->arch.pdptrs[3] = vmcs_read64(GUEST_PDPTR3);
+       }
+
+       __set_bit(VCPU_EXREG_PDPTR,
+                 (unsigned long *)&vcpu->arch.regs_avail);
+       __set_bit(VCPU_EXREG_PDPTR,
+                 (unsigned long *)&vcpu->arch.regs_dirty);
+}
+
 static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4);
 
 static void ept_update_paging_mode_cr0(unsigned long *hw_cr0,
@@ -1571,7 +1612,6 @@ static void ept_update_paging_mode_cr0(unsigned long *hw_cr0,
                              CPU_BASED_CR3_STORE_EXITING));
                vcpu->arch.cr0 = cr0;
                vmx_set_cr4(vcpu, vcpu->arch.cr4);
-               *hw_cr0 |= X86_CR0_PE | X86_CR0_PG;
                *hw_cr0 &= ~X86_CR0_WP;
        } else if (!is_paging(vcpu)) {
                /* From nonpaging to paging */
@@ -1598,8 +1638,13 @@ static void ept_update_paging_mode_cr4(unsigned long *hw_cr4,
 
 static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
 {
-       unsigned long hw_cr0 = (cr0 & ~KVM_GUEST_CR0_MASK) |
-                               KVM_VM_CR0_ALWAYS_ON;
+       unsigned long hw_cr0;
+
+       if (enable_unrestricted_guest)
+               hw_cr0 = (cr0 & ~KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST)
+                       | KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST;
+       else
+               hw_cr0 = (cr0 & ~KVM_GUEST_CR0_MASK) | KVM_VM_CR0_ALWAYS_ON;
 
        vmx_fpu_deactivate(vcpu);
 
@@ -1650,8 +1695,6 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
        if (enable_ept) {
                eptp = construct_eptp(cr3);
                vmcs_write64(EPT_POINTER, eptp);
-               ept_sync_context(eptp);
-               ept_load_pdptrs(vcpu);
                guest_cr3 = is_paging(vcpu) ? vcpu->arch.cr3 :
                        VMX_EPT_IDENTITY_PAGETABLE_ADDR;
        }
@@ -1766,6 +1809,21 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu,
                ar = 0xf3;
        } else
                ar = vmx_segment_access_rights(var);
+
+       /*
+        *   Fix the "Accessed" bit in AR field of segment registers for older
+        * qemu binaries.
+        *   IA32 arch specifies that at the time of processor reset the
+        * "Accessed" bit in the AR field of segment registers is 1. And qemu
+        * is setting it to 0 in the usedland code. This causes invalid guest
+        * state vmexit when "unrestricted guest" mode is turned on.
+        *    Fix for this setup issue in cpu_reset is being pushed in the qemu
+        * tree. Newer qemu binaries with that qemu fix would not need this
+        * kvm hack.
+        */
+       if (enable_unrestricted_guest && (seg != VCPU_SREG_LDTR))
+               ar |= 0x1; /* Accessed */
+
        vmcs_write32(sf->ar_bytes, ar);
 }
 
@@ -2062,11 +2120,19 @@ out:
 static void seg_setup(int seg)
 {
        struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
+       unsigned int ar;
 
        vmcs_write16(sf->selector, 0);
        vmcs_writel(sf->base, 0);
        vmcs_write32(sf->limit, 0xffff);
-       vmcs_write32(sf->ar_bytes, 0xf3);
+       if (enable_unrestricted_guest) {
+               ar = 0x93;
+               if (seg == VCPU_SREG_CS)
+                       ar |= 0x08; /* code segment */
+       } else
+               ar = 0xf3;
+
+       vmcs_write32(sf->ar_bytes, ar);
 }
 
 static int alloc_apic_access_page(struct kvm *kvm)
@@ -2209,6 +2275,8 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
                        exec_control &= ~SECONDARY_EXEC_ENABLE_VPID;
                if (!enable_ept)
                        exec_control &= ~SECONDARY_EXEC_ENABLE_EPT;
+               if (!enable_unrestricted_guest)
+                       exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST;
                vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
        }
 
@@ -2468,6 +2536,9 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu)
                vmx->rmode.irq.pending = true;
                vmx->rmode.irq.vector = irq;
                vmx->rmode.irq.rip = kvm_rip_read(vcpu);
+               if (vcpu->arch.interrupt.soft)
+                       vmx->rmode.irq.rip +=
+                               vmx->vcpu.arch.event_exit_inst_len;
                vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
                             irq | INTR_TYPE_SOFT_INTR | INTR_INFO_VALID_MASK);
                vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1);
@@ -3130,8 +3201,8 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
                printk(KERN_ERR "EPT: Exit qualification is 0x%lx\n",
                        (long unsigned int)exit_qualification);
                kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
-               kvm_run->hw.hardware_exit_reason = 0;
-               return -ENOTSUPP;
+               kvm_run->hw.hardware_exit_reason = EXIT_REASON_EPT_VIOLATION;
+               return 0;
        }
 
        gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
@@ -3247,10 +3318,8 @@ static int vmx_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 
        /* Access CR3 don't cause VMExit in paging mode, so we need
         * to sync with guest real CR3. */
-       if (enable_ept && is_paging(vcpu)) {
+       if (enable_ept && is_paging(vcpu))
                vcpu->arch.cr3 = vmcs_readl(GUEST_CR3);
-               ept_load_pdptrs(vcpu);
-       }
 
        if (unlikely(vmx->fail)) {
                kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
@@ -3434,6 +3503,10 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
 
+       if (enable_ept && is_paging(vcpu)) {
+               vmcs_writel(GUEST_CR3, vcpu->arch.cr3);
+               ept_load_pdptrs(vcpu);
+       }
        /* Record the guest's net vcpu time for enforced NMI injections. */
        if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked))
                vmx->entry_time = ktime_get();
@@ -3449,6 +3522,14 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
        if (test_bit(VCPU_REGS_RIP, (unsigned long *)&vcpu->arch.regs_dirty))
                vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]);
 
+       /* When single-stepping over STI and MOV SS, we must clear the
+        * corresponding interruptibility bits in the guest state. Otherwise
+        * vmentry fails as it then expects bit 14 (BS) in pending debug
+        * exceptions being set, but that's not correct for the guest debugging
+        * case. */
+       if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
+               vmx_set_interrupt_shadow(vcpu, 0);
+
        /*
         * Loading guest fpu may have cleared host cr0.ts
         */
@@ -3547,7 +3628,8 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 #endif
              );
 
-       vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP));
+       vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP)
+                                 | (1 << VCPU_EXREG_PDPTR));
        vcpu->arch.regs_dirty = 0;
 
        get_debugreg(vcpu->arch.dr6, 6);