Merge remote-tracking branch 'kvm/linux-next'

author Thierry Reding <treding@nvidia.com>

Thu, 24 Oct 2013 12:58:35 +0000 (14:58 +0200)

committer Thierry Reding <treding@nvidia.com>

Thu, 24 Oct 2013 12:58:35 +0000 (14:58 +0200)
author Thierry Reding <treding@nvidia.com>
Thu, 24 Oct 2013 12:58:35 +0000 (14:58 +0200)
committer Thierry Reding <treding@nvidia.com>
Thu, 24 Oct 2013 12:58:35 +0000 (14:58 +0200)
diff --combined arch/arm/kvm/reset.c

index c02ba4af599f417113fdb2c260270ae7162575e6,d9bbd834f188031ecab5fff7af21a65fd2b9e8a6..d153e64d125505c9a8623521053fbe507db83a2f
--- 1/arch/arm/kvm/reset.c
--- 2/arch/arm/kvm/reset.c
+++ b/arch/arm/kvm/reset.c
@@@ -30,16 -30,16 +30,16 @@@
   #include <kvm/arm_arch_timer.h>
   
   /******************************************************************************
-  * Cortex-A15 Reset Values
+  * Cortex-A15 and Cortex-A7 Reset Values
    */
   
- static const int a15_max_cpu_idx = 3;
+ static const int cortexa_max_cpu_idx = 3;
   
- static struct kvm_regs a15_regs_reset = {
+ static struct kvm_regs cortexa_regs_reset = {
         .usr_regs.ARM_cpsr = SVC_MODE | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT,
   };
   
- static const struct kvm_irq_level a15_vtimer_irq = {
+ static const struct kvm_irq_level cortexa_vtimer_irq = {
         { .irq = 27 },
         .level = 1,
   };
@@@ -58,23 -58,24 +58,24 @@@
    */
   int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
   {
- -      struct kvm_regs *cpu_reset;
+ +      struct kvm_regs *reset_regs;
         const struct kvm_irq_level *cpu_vtimer_irq;
   
         switch (vcpu->arch.target) {
+       case KVM_ARM_TARGET_CORTEX_A7:
         case KVM_ARM_TARGET_CORTEX_A15:
-               if (vcpu->vcpu_id > a15_max_cpu_idx)
+               if (vcpu->vcpu_id > cortexa_max_cpu_idx)
                         return -EINVAL;
-               reset_regs = &a15_regs_reset;
- -              cpu_reset = &cortexa_regs_reset;
++              reset_regs = &cortexa_regs_reset;
                 vcpu->arch.midr = read_cpuid_id();
-               cpu_vtimer_irq = &a15_vtimer_irq;
+               cpu_vtimer_irq = &cortexa_vtimer_irq;
                 break;
         default:
                 return -ENODEV;
         }
   
         /* Reset core registers */
- -      memcpy(&vcpu->arch.regs, cpu_reset, sizeof(vcpu->arch.regs));
+ +      memcpy(&vcpu->arch.regs, reset_regs, sizeof(vcpu->arch.regs));
   
         /* Reset CP15 registers */
         kvm_reset_coprocs(vcpu);
diff --combined arch/s390/kvm/interrupt.c

index 7f1f7ac5cf7f8a2c3f3966d4fe96fa23af90ea04,e7323cd9f1098dc57cec3f21382330cdbf6a84f8..5f79d2d79ca76f34648677bb3514802458882b81
--- 1/arch/s390/kvm/interrupt.c
--- 2/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@@ -385,7 -385,7 +385,7 @@@ static int kvm_cpu_has_interrupt(struc
         }
   
         if ((!rc) && (vcpu->arch.sie_block->ckc <
- -              get_tod_clock() + vcpu->arch.sie_block->epoch)) {
+ +              get_tod_clock_fast() + vcpu->arch.sie_block->epoch)) {
                 if ((!psw_extint_disabled(vcpu)) &&
                         (vcpu->arch.sie_block->gcr[0] & 0x800ul))
                         rc = 1;
@@@ -425,7 -425,7 +425,7 @@@ int kvm_s390_handle_wait(struct kvm_vcp
                 goto no_timer;
         }
   
- -      now = get_tod_clock() + vcpu->arch.sie_block->epoch;
+ +      now = get_tod_clock_fast() + vcpu->arch.sie_block->epoch;
         if (vcpu->arch.sie_block->ckc < now) {
                 __unset_cpu_idle(vcpu);
                 return 0;
@@@ -436,6 -436,7 +436,7 @@@
         hrtimer_start(&vcpu->arch.ckc_timer, ktime_set (0, sltime) , HRTIMER_MODE_REL);
         VCPU_EVENT(vcpu, 5, "enabled wait via clock comparator: %llx ns", sltime);
   no_timer:
+       srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
         spin_lock(&vcpu->arch.local_int.float_int->lock);
         spin_lock_bh(&vcpu->arch.local_int.lock);
         add_wait_queue(&vcpu->wq, &wait);
@@@ -455,6 -456,8 +456,8 @@@
         remove_wait_queue(&vcpu->wq, &wait);
         spin_unlock_bh(&vcpu->arch.local_int.lock);
         spin_unlock(&vcpu->arch.local_int.float_int->lock);
+       vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+ 
         hrtimer_try_to_cancel(&vcpu->arch.ckc_timer);
         return 0;
   }
@@@ -515,7 -518,7 +518,7 @@@ void kvm_s390_deliver_pending_interrupt
         }
   
         if ((vcpu->arch.sie_block->ckc <
- -              get_tod_clock() + vcpu->arch.sie_block->epoch))
+ +              get_tod_clock_fast() + vcpu->arch.sie_block->epoch))
                 __try_deliver_ckc_interrupt(vcpu);
   
         if (atomic_read(&fi->active)) {
diff --combined arch/s390/kvm/kvm-s390.c

index ed8064cb5c4921424d5981b890e6fd9b07f9ed02,1e4e7b97337a8a72b8820a77a3f99e6232618bcc..2d67b3bbf1906d4a0f33d72c5e31344c94f2feab
--- 1/arch/s390/kvm/kvm-s390.c
--- 2/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@@ -343,11 -343,10 +343,11 @@@ void kvm_arch_vcpu_uninit(struct kvm_vc
   
   void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
   {
- -      save_fp_regs(&vcpu->arch.host_fpregs);
+ +      save_fp_ctl(&vcpu->arch.host_fpregs.fpc);
+ +      save_fp_regs(vcpu->arch.host_fpregs.fprs);
         save_access_regs(vcpu->arch.host_acrs);
- -      vcpu->arch.guest_fpregs.fpc &= FPC_VALID_MASK;
- -      restore_fp_regs(&vcpu->arch.guest_fpregs);
+ +      restore_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
+ +      restore_fp_regs(vcpu->arch.guest_fpregs.fprs);
         restore_access_regs(vcpu->run->s.regs.acrs);
         gmap_enable(vcpu->arch.gmap);
         atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
@@@ -357,11 -356,9 +357,11 @@@ void kvm_arch_vcpu_put(struct kvm_vcpu 
   {
         atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
         gmap_disable(vcpu->arch.gmap);
- -      save_fp_regs(&vcpu->arch.guest_fpregs);
+ +      save_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
+ +      save_fp_regs(vcpu->arch.guest_fpregs.fprs);
         save_access_regs(vcpu->run->s.regs.acrs);
- -      restore_fp_regs(&vcpu->arch.host_fpregs);
+ +      restore_fp_ctl(&vcpu->arch.host_fpregs.fpc);
+ +      restore_fp_regs(vcpu->arch.host_fpregs.fprs);
         restore_access_regs(vcpu->arch.host_acrs);
   }
   
@@@ -621,12 -618,9 +621,12 @@@ int kvm_arch_vcpu_ioctl_get_sregs(struc
   
   int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
   {
+ +      if (test_fp_ctl(fpu->fpc))
+ +              return -EINVAL;
         memcpy(&vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs));
- -      vcpu->arch.guest_fpregs.fpc = fpu->fpc & FPC_VALID_MASK;
- -      restore_fp_regs(&vcpu->arch.guest_fpregs);
+ +      vcpu->arch.guest_fpregs.fpc = fpu->fpc;
+ +      restore_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
+ +      restore_fp_regs(vcpu->arch.guest_fpregs.fprs);
         return 0;
   }
   
@@@ -695,9 -689,9 +695,9 @@@ static int kvm_s390_handle_requests(str
         return 0;
   }
   
- static int __vcpu_run(struct kvm_vcpu *vcpu)
+ static int vcpu_pre_run(struct kvm_vcpu *vcpu)
   {
-       int rc;
+       int rc, cpuflags;
   
         memcpy(&vcpu->arch.sie_block->gg14, &vcpu->run->s.regs.gprs[14], 16);
   
@@@ -715,28 -709,24 +715,24 @@@
                 return rc;
   
         vcpu->arch.sie_block->icptcode = 0;
-       VCPU_EVENT(vcpu, 6, "entering sie flags %x",
-                  atomic_read(&vcpu->arch.sie_block->cpuflags));
-       trace_kvm_s390_sie_enter(vcpu,
-                                atomic_read(&vcpu->arch.sie_block->cpuflags));
+       cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
+       VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
+       trace_kvm_s390_sie_enter(vcpu, cpuflags);
   
-       /*
-        * As PF_VCPU will be used in fault handler, between guest_enter
-        * and guest_exit should be no uaccess.
-        */
-       preempt_disable();
-       kvm_guest_enter();
-       preempt_enable();
-       rc = sie64a(vcpu->arch.sie_block, vcpu->run->s.regs.gprs);
-       kvm_guest_exit();
+       return 0;
+ }
+ 
+ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
+ {
+       int rc;
   
         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
                    vcpu->arch.sie_block->icptcode);
         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
   
-       if (rc > 0)
+       if (exit_reason >= 0) {
                 rc = 0;
-       if (rc < 0) {
+       } else {
                 if (kvm_is_ucontrol(vcpu->kvm)) {
                         rc = SIE_INTERCEPT_UCONTROL;
                 } else {
@@@ -747,6 -737,49 +743,49 @@@
         }
   
         memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16);
+ 
+       if (rc == 0) {
+               if (kvm_is_ucontrol(vcpu->kvm))
+                       rc = -EOPNOTSUPP;
+               else
+                       rc = kvm_handle_sie_intercept(vcpu);
+       }
+ 
+       return rc;
+ }
+ 
+ static int __vcpu_run(struct kvm_vcpu *vcpu)
+ {
+       int rc, exit_reason;
+ 
+       /*
+        * We try to hold kvm->srcu during most of vcpu_run (except when run-
+        * ning the guest), so that memslots (and other stuff) are protected
+        */
+       vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+ 
+       do {
+               rc = vcpu_pre_run(vcpu);
+               if (rc)
+                       break;
+ 
+               srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
+               /*
+                * As PF_VCPU will be used in fault handler, between
+                * guest_enter and guest_exit should be no uaccess.
+                */
+               preempt_disable();
+               kvm_guest_enter();
+               preempt_enable();
+               exit_reason = sie64a(vcpu->arch.sie_block,
+                                    vcpu->run->s.regs.gprs);
+               kvm_guest_exit();
+               vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+ 
+               rc = vcpu_post_run(vcpu, exit_reason);
+       } while (!signal_pending(current) && !rc);
+ 
+       srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
         return rc;
   }
   
@@@ -755,7 -788,6 +794,6 @@@ int kvm_arch_vcpu_ioctl_run(struct kvm_
         int rc;
         sigset_t sigsaved;
   
- rerun_vcpu:
         if (vcpu->sigset_active)
                 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
   
@@@ -788,19 -820,7 +826,7 @@@
         }
   
         might_fault();
- 
-       do {
-               rc = __vcpu_run(vcpu);
-               if (rc)
-                       break;
-               if (kvm_is_ucontrol(vcpu->kvm))
-                       rc = -EOPNOTSUPP;
-               else
-                       rc = kvm_handle_sie_intercept(vcpu);
-       } while (!signal_pending(current) && !rc);
- 
-       if (rc == SIE_INTERCEPT_RERUNVCPU)
-               goto rerun_vcpu;
+       rc = __vcpu_run(vcpu);
   
         if (signal_pending(current) && !rc) {
                 kvm_run->exit_reason = KVM_EXIT_INTR;
@@@ -882,8 -902,7 +908,8 @@@ int kvm_s390_vcpu_store_status(struct k
          * copying in vcpu load/put. Lets update our copies before we save
          * it into the save area
          */
- -      save_fp_regs(&vcpu->arch.guest_fpregs);
+ +      save_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
+ +      save_fp_regs(vcpu->arch.guest_fpregs.fprs);
         save_access_regs(vcpu->run->s.regs.acrs);
   
         if (__guestcopy(vcpu, addr + offsetof(struct save_area, fp_regs),
@@@ -958,6 -977,7 +984,7 @@@ long kvm_arch_vcpu_ioctl(struct file *f
   {
         struct kvm_vcpu *vcpu = filp->private_data;
         void __user *argp = (void __user *)arg;
+       int idx;
         long r;
   
         switch (ioctl) {
@@@ -971,7 -991,9 +998,9 @@@
                 break;
         }
         case KVM_S390_STORE_STATUS:
+               idx = srcu_read_lock(&vcpu->kvm->srcu);
                 r = kvm_s390_vcpu_store_status(vcpu, arg);
+               srcu_read_unlock(&vcpu->kvm->srcu, idx);
                 break;
         case KVM_S390_SET_INITIAL_PSW: {
                 psw_t psw;
diff --combined arch/x86/include/uapi/asm/msr-index.h

index 940ed3fd889a743732891945b191f34cddc8e217,b93e09a0fa21c34ee20a5cf3ddd8319efa07f40c..37813b5ddc37472dba6c64b8ff3f2508dc085de0
--- 1/arch/x86/include/uapi/asm/msr-index.h
--- 2/arch/x86/include/uapi/asm/msr-index.h
+++ b/arch/x86/include/uapi/asm/msr-index.h
@@@ -147,8 -147,6 +147,8 @@@
   #define MSR_PP1_ENERGY_STATUS         0x00000641
   #define MSR_PP1_POLICY                        0x00000642
   
+ +#define MSR_CORE_C1_RES                       0x00000660
+ +
   #define MSR_AMD64_MC0_MASK            0xc0010044
   
   #define MSR_IA32_MCx_CTL(x)           (MSR_IA32_MC0_CTL + 4*(x))
@@@ -538,6 -536,7 +538,7 @@@
   
   /* MSR_IA32_VMX_MISC bits */
   #define MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS (1ULL << 29)
+ #define MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE   0x1F
   /* AMD-V MSRs */
   
   #define MSR_VM_CR                       0xc0010114
diff --combined arch/x86/kvm/vmx.c

index 2b2fce1b200900b1af42865f946d5faa25fdc56a,0156560c68a83c758a19e076e4ac730ccf1de8c4..06fd7629068ac6ddd3adde76dbec5aff7d39bf29
--- 1/arch/x86/kvm/vmx.c
--- 2/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@@ -1898,16 -1898,12 +1898,12 @@@ static void skip_emulated_instruction(s
   /*
    * KVM wants to inject page-faults which it got to the guest. This function
    * checks whether in a nested guest, we need to inject them to L1 or L2.
-  * This function assumes it is called with the exit reason in vmcs02 being
-  * a #PF exception (this is the only case in which KVM injects a #PF when L2
-  * is running).
    */
- static int nested_pf_handled(struct kvm_vcpu *vcpu)
+ static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned nr)
   {
         struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
   
-       /* TODO: also check PFEC_MATCH/MASK, not just EB.PF. */
-       if (!(vmcs12->exception_bitmap & (1u << PF_VECTOR)))
+       if (!(vmcs12->exception_bitmap & (1u << nr)))
                 return 0;
   
         nested_vmx_vmexit(vcpu);
@@@ -1921,8 -1917,8 +1917,8 @@@ static void vmx_queue_exception(struct 
         struct vcpu_vmx *vmx = to_vmx(vcpu);
         u32 intr_info = nr | INTR_INFO_VALID_MASK;
   
-       if (nr == PF_VECTOR && is_guest_mode(vcpu) &&
-           !vmx->nested.nested_run_pending && nested_pf_handled(vcpu))
+       if (!reinject && is_guest_mode(vcpu) &&
+           nested_vmx_check_exception(vcpu, nr))
                 return;
   
         if (has_error_code) {
@@@ -2204,9 -2200,15 +2200,15 @@@ static __init void nested_vmx_setup_ctl
   #ifdef CONFIG_X86_64
                 VM_EXIT_HOST_ADDR_SPACE_SIZE |
   #endif
-               VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT;
+               VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT |
+               VM_EXIT_SAVE_VMX_PREEMPTION_TIMER;
+       if (!(nested_vmx_pinbased_ctls_high & PIN_BASED_VMX_PREEMPTION_TIMER) ||
+           !(nested_vmx_exit_ctls_high & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)) {
+               nested_vmx_exit_ctls_high &= ~VM_EXIT_SAVE_VMX_PREEMPTION_TIMER;
+               nested_vmx_pinbased_ctls_high &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
+       }
         nested_vmx_exit_ctls_high |= (VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR |
-                                     VM_EXIT_LOAD_IA32_EFER);
+               VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER);
   
         /* entry controls */
         rdmsr(MSR_IA32_VMX_ENTRY_CTLS,
@@@ -2252,6 -2254,7 +2254,7 @@@
         nested_vmx_secondary_ctls_low = 0;
         nested_vmx_secondary_ctls_high &=
                 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
+               SECONDARY_EXEC_UNRESTRICTED_GUEST |
                 SECONDARY_EXEC_WBINVD_EXITING;
   
         if (enable_ept) {
@@@ -3255,29 -3258,25 +3258,29 @@@ static void vmx_decache_cr4_guest_bits(
   
   static void ept_load_pdptrs(struct kvm_vcpu *vcpu)
   {
+ +      struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
+ +
         if (!test_bit(VCPU_EXREG_PDPTR,
                       (unsigned long *)&vcpu->arch.regs_dirty))
                 return;
   
         if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) {
- -              vmcs_write64(GUEST_PDPTR0, vcpu->arch.mmu.pdptrs[0]);
- -              vmcs_write64(GUEST_PDPTR1, vcpu->arch.mmu.pdptrs[1]);
- -              vmcs_write64(GUEST_PDPTR2, vcpu->arch.mmu.pdptrs[2]);
- -              vmcs_write64(GUEST_PDPTR3, vcpu->arch.mmu.pdptrs[3]);
+ +              vmcs_write64(GUEST_PDPTR0, mmu->pdptrs[0]);
+ +              vmcs_write64(GUEST_PDPTR1, mmu->pdptrs[1]);
+ +              vmcs_write64(GUEST_PDPTR2, mmu->pdptrs[2]);
+ +              vmcs_write64(GUEST_PDPTR3, mmu->pdptrs[3]);
         }
   }
   
   static void ept_save_pdptrs(struct kvm_vcpu *vcpu)
   {
+ +      struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
+ +
         if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) {
- -              vcpu->arch.mmu.pdptrs[0] = vmcs_read64(GUEST_PDPTR0);
- -              vcpu->arch.mmu.pdptrs[1] = vmcs_read64(GUEST_PDPTR1);
- -              vcpu->arch.mmu.pdptrs[2] = vmcs_read64(GUEST_PDPTR2);
- -              vcpu->arch.mmu.pdptrs[3] = vmcs_read64(GUEST_PDPTR3);
+ +              mmu->pdptrs[0] = vmcs_read64(GUEST_PDPTR0);
+ +              mmu->pdptrs[1] = vmcs_read64(GUEST_PDPTR1);
+ +              mmu->pdptrs[2] = vmcs_read64(GUEST_PDPTR2);
+ +              mmu->pdptrs[3] = vmcs_read64(GUEST_PDPTR3);
         }
   
         __set_bit(VCPU_EXREG_PDPTR,
@@@ -3380,8 -3379,10 +3383,10 @@@ static void vmx_set_cr3(struct kvm_vcp
         if (enable_ept) {
                 eptp = construct_eptp(cr3);
                 vmcs_write64(EPT_POINTER, eptp);
-               guest_cr3 = is_paging(vcpu) ? kvm_read_cr3(vcpu) :
-                       vcpu->kvm->arch.ept_identity_map_addr;
+               if (is_paging(vcpu) || is_guest_mode(vcpu))
+                       guest_cr3 = kvm_read_cr3(vcpu);
+               else
+                       guest_cr3 = vcpu->kvm->arch.ept_identity_map_addr;
                 ept_load_pdptrs(vcpu);
         }
   
@@@ -4879,6 -4880,17 +4884,17 @@@ vmx_patch_hypercall(struct kvm_vcpu *vc
         hypercall[2] = 0xc1;
   }
   
+ static bool nested_cr0_valid(struct vmcs12 *vmcs12, unsigned long val)
+ {
+       unsigned long always_on = VMXON_CR0_ALWAYSON;
+ 
+       if (nested_vmx_secondary_ctls_high &
+               SECONDARY_EXEC_UNRESTRICTED_GUEST &&
+           nested_cpu_has2(vmcs12, SECONDARY_EXEC_UNRESTRICTED_GUEST))
+               always_on &= ~(X86_CR0_PE | X86_CR0_PG);
+       return (val & always_on) == always_on;
+ }
+ 
   /* called to set cr0 as appropriate for a mov-to-cr0 exit. */
   static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val)
   {
@@@ -4897,9 -4909,7 +4913,7 @@@
                 val = (val & ~vmcs12->cr0_guest_host_mask) |
                         (vmcs12->guest_cr0 & vmcs12->cr0_guest_host_mask);
   
-               /* TODO: will have to take unrestricted guest mode into
-                * account */
-               if ((val & VMXON_CR0_ALWAYSON) != VMXON_CR0_ALWAYSON)
+               if (!nested_cr0_valid(vmcs12, val))
                         return 1;
   
                 if (kvm_set_cr0(vcpu, val))
@@@ -5349,9 -5359,7 +5363,9 @@@ static int handle_ept_violation(struct 
          * There are errata that may cause this bit to not be set:
          * AAK134, BY25.
          */
- -      if (exit_qualification & INTR_INFO_UNBLOCK_NMI)
+ +      if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
+ +                      cpu_has_virtual_nmis() &&
+ +                      (exit_qualification & INTR_INFO_UNBLOCK_NMI))
                 vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, GUEST_INTR_STATE_NMI);
   
         gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
@@@ -6722,6 -6730,27 +6736,27 @@@ static void vmx_get_exit_info(struct kv
         *info2 = vmcs_read32(VM_EXIT_INTR_INFO);
   }
   
+ static void nested_adjust_preemption_timer(struct kvm_vcpu *vcpu)
+ {
+       u64 delta_tsc_l1;
+       u32 preempt_val_l1, preempt_val_l2, preempt_scale;
+ 
+       if (!(get_vmcs12(vcpu)->pin_based_vm_exec_control &
+                       PIN_BASED_VMX_PREEMPTION_TIMER))
+               return;
+       preempt_scale = native_read_msr(MSR_IA32_VMX_MISC) &
+                       MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE;
+       preempt_val_l2 = vmcs_read32(VMX_PREEMPTION_TIMER_VALUE);
+       delta_tsc_l1 = vmx_read_l1_tsc(vcpu, native_read_tsc())
+               - vcpu->arch.last_guest_tsc;
+       preempt_val_l1 = delta_tsc_l1 >> preempt_scale;
+       if (preempt_val_l2 <= preempt_val_l1)
+               preempt_val_l2 = 0;
+       else
+               preempt_val_l2 -= preempt_val_l1;
+       vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, preempt_val_l2);
+ }
+ 
   /*
    * The guest has exited.  See if we can fix it or if we need userspace
    * assistance.
@@@ -6736,20 -6765,6 +6771,6 @@@ static int vmx_handle_exit(struct kvm_v
         if (vmx->emulation_required)
                 return handle_invalid_guest_state(vcpu);
   
-       /*
-        * the KVM_REQ_EVENT optimization bit is only on for one entry, and if
-        * we did not inject a still-pending event to L1 now because of
-        * nested_run_pending, we need to re-enable this bit.
-        */
-       if (vmx->nested.nested_run_pending)
-               kvm_make_request(KVM_REQ_EVENT, vcpu);
- 
-       if (!is_guest_mode(vcpu) && (exit_reason == EXIT_REASON_VMLAUNCH ||
-           exit_reason == EXIT_REASON_VMRESUME))
-               vmx->nested.nested_run_pending = 1;
-       else
-               vmx->nested.nested_run_pending = 0;
- 
         if (is_guest_mode(vcpu) && nested_vmx_exit_handled(vcpu)) {
                 nested_vmx_vmexit(vcpu);
                 return 1;
@@@ -7061,9 -7076,9 +7082,9 @@@ static void __vmx_complete_interrupts(s
         case INTR_TYPE_HARD_EXCEPTION:
                 if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) {
                         u32 err = vmcs_read32(error_code_field);
-                       kvm_queue_exception_e(vcpu, vector, err);
+                       kvm_requeue_exception_e(vcpu, vector, err);
                 } else
-                       kvm_queue_exception(vcpu, vector);
+                       kvm_requeue_exception(vcpu, vector);
                 break;
         case INTR_TYPE_SOFT_INTR:
                 vcpu->arch.event_exit_inst_len = vmcs_read32(instr_len_field);
@@@ -7146,6 -7161,8 +7167,8 @@@ static void __noclone vmx_vcpu_run(stru
         atomic_switch_perf_msrs(vmx);
         debugctlmsr = get_debugctlmsr();
   
+       if (is_guest_mode(vcpu) && !vmx->nested.nested_run_pending)
+               nested_adjust_preemption_timer(vcpu);
         vmx->__launched = vmx->loaded_vmcs->launched;
         asm(
                 /* Store host registers */
@@@ -7284,6 -7301,16 +7307,16 @@@
         vmx->exit_reason = vmcs_read32(VM_EXIT_REASON);
         trace_kvm_exit(vmx->exit_reason, vcpu, KVM_ISA_VMX);
   
+       /*
+        * the KVM_REQ_EVENT optimization bit is only on for one entry, and if
+        * we did not inject a still-pending event to L1 now because of
+        * nested_run_pending, we need to re-enable this bit.
+        */
+       if (vmx->nested.nested_run_pending)
+               kvm_make_request(KVM_REQ_EVENT, vcpu);
+ 
+       vmx->nested.nested_run_pending = 0;
+ 
         vmx_complete_atomic_exit(vmx);
         vmx_recover_nmi_blocking(vmx);
         vmx_complete_interrupts(vmx);
@@@ -7501,9 -7528,9 +7534,9 @@@ static unsigned long nested_ept_get_cr3
         return get_vmcs12(vcpu)->ept_pointer;
   }
   
- static int nested_ept_init_mmu_context(struct kvm_vcpu *vcpu)
+ static void nested_ept_init_mmu_context(struct kvm_vcpu *vcpu)
   {
-       int r = kvm_init_shadow_ept_mmu(vcpu, &vcpu->arch.mmu,
+       kvm_init_shadow_ept_mmu(vcpu, &vcpu->arch.mmu,
                         nested_vmx_ept_caps & VMX_EPT_EXECUTE_ONLY_BIT);
   
         vcpu->arch.mmu.set_cr3           = vmx_set_cr3;
@@@ -7511,8 -7538,6 +7544,6 @@@
         vcpu->arch.mmu.inject_page_fault = nested_ept_inject_page_fault;
   
         vcpu->arch.walk_mmu              = &vcpu->arch.nested_mmu;
- 
-       return r;
   }
   
   static void nested_ept_uninit_mmu_context(struct kvm_vcpu *vcpu)
@@@ -7520,6 -7545,20 +7551,20 @@@
         vcpu->arch.walk_mmu = &vcpu->arch.mmu;
   }
   
+ static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu,
+               struct x86_exception *fault)
+ {
+       struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+ 
+       WARN_ON(!is_guest_mode(vcpu));
+ 
+       /* TODO: also check PFEC_MATCH/MASK, not just EB.PF. */
+       if (vmcs12->exception_bitmap & (1u << PF_VECTOR))
+               nested_vmx_vmexit(vcpu);
+       else
+               kvm_inject_page_fault(vcpu, fault);
+ }
+ 
   /*
    * prepare_vmcs02 is called when the L1 guest hypervisor runs its nested
    * L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it
@@@ -7533,6 -7572,7 +7578,7 @@@ static void prepare_vmcs02(struct kvm_v
   {
         struct vcpu_vmx *vmx = to_vmx(vcpu);
         u32 exec_control;
+       u32 exit_control;
   
         vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector);
         vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector);
@@@ -7706,7 -7746,10 +7752,10 @@@
          * we should use its exit controls. Note that VM_EXIT_LOAD_IA32_EFER
          * bits are further modified by vmx_set_efer() below.
          */
-       vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl);
+       exit_control = vmcs_config.vmexit_ctrl;
+       if (vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER)
+               exit_control |= VM_EXIT_SAVE_VMX_PREEMPTION_TIMER;
+       vmcs_write32(VM_EXIT_CONTROLS, exit_control);
   
         /* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are
          * emulated by vmx_set_efer(), below.
@@@ -7773,6 -7816,9 +7822,9 @@@
         kvm_set_cr3(vcpu, vmcs12->guest_cr3);
         kvm_mmu_reset_context(vcpu);
   
+       if (!enable_ept)
+               vcpu->arch.walk_mmu->inject_page_fault = vmx_inject_page_fault_nested;
+ 
         /*
          * L1 may access the L2's PDPTR, so save them to construct vmcs12
          */
@@@ -7781,6 -7827,10 +7833,6 @@@
                 vmcs_write64(GUEST_PDPTR1, vmcs12->guest_pdptr1);
                 vmcs_write64(GUEST_PDPTR2, vmcs12->guest_pdptr2);
                 vmcs_write64(GUEST_PDPTR3, vmcs12->guest_pdptr3);
- -              __clear_bit(VCPU_EXREG_PDPTR,
- -                              (unsigned long *)&vcpu->arch.regs_avail);
- -              __clear_bit(VCPU_EXREG_PDPTR,
- -                              (unsigned long *)&vcpu->arch.regs_dirty);
         }
   
         kvm_register_write(vcpu, VCPU_REGS_RSP, vmcs12->guest_rsp);
@@@ -7876,7 -7926,7 +7928,7 @@@ static int nested_vmx_run(struct kvm_vc
                 return 1;
         }
   
-       if (((vmcs12->guest_cr0 & VMXON_CR0_ALWAYSON) != VMXON_CR0_ALWAYSON) ||
+       if (!nested_cr0_valid(vmcs12, vmcs12->guest_cr0) ||
             ((vmcs12->guest_cr4 & VMXON_CR4_ALWAYSON) != VMXON_CR4_ALWAYSON)) {
                 nested_vmx_entry_failure(vcpu, vmcs12,
                         EXIT_REASON_INVALID_STATE, ENTRY_FAIL_DEFAULT);
@@@ -7938,6 -7988,8 +7990,8 @@@
   
         enter_guest_mode(vcpu);
   
+       vmx->nested.nested_run_pending = 1;
+ 
         vmx->nested.vmcs01_tsc_offset = vmcs_read64(TSC_OFFSET);
   
         cpu = get_cpu();
@@@ -8005,7 -8057,7 +8059,7 @@@ static void vmcs12_save_pending_event(s
         u32 idt_vectoring;
         unsigned int nr;
   
-       if (vcpu->arch.exception.pending) {
+       if (vcpu->arch.exception.pending && vcpu->arch.exception.reinject) {
                 nr = vcpu->arch.exception.nr;
                 idt_vectoring = nr | VECTORING_INFO_VALID_MASK;
   
@@@ -8105,6 -8157,11 +8159,11 @@@ static void prepare_vmcs12(struct kvm_v
         vmcs12->guest_pending_dbg_exceptions =
                 vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS);
   
+       if ((vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) &&
+           (vmcs12->vm_exit_controls & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER))
+               vmcs12->vmx_preemption_timer_value =
+                       vmcs_read32(VMX_PREEMPTION_TIMER_VALUE);
+ 
         /*
          * In some cases (usually, nested EPT), L2 is allowed to change its
          * own CR3 without exiting. If it has changed it, we must keep it.
@@@ -8130,6 -8187,8 +8189,8 @@@
         vmcs12->guest_ia32_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
         if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_PAT)
                 vmcs12->guest_ia32_pat = vmcs_read64(GUEST_IA32_PAT);
+       if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_EFER)
+               vmcs12->guest_ia32_efer = vcpu->arch.efer;
         vmcs12->guest_sysenter_cs = vmcs_read32(GUEST_SYSENTER_CS);
         vmcs12->guest_sysenter_esp = vmcs_readl(GUEST_SYSENTER_ESP);
         vmcs12->guest_sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP);
@@@ -8201,7 -8260,7 +8262,7 @@@ static void load_vmcs12_host_state(stru
          * fpu_active (which may have changed).
          * Note that vmx_set_cr0 refers to efer set above.
          */
-       kvm_set_cr0(vcpu, vmcs12->host_cr0);
+       vmx_set_cr0(vcpu, vmcs12->host_cr0);
         /*
          * If we did fpu_activate()/fpu_deactivate() during L2's run, we need
          * to apply the same changes to L1's vmcs. We just set cr0 correctly,
@@@ -8224,6 -8283,9 +8285,9 @@@
         kvm_set_cr3(vcpu, vmcs12->host_cr3);
         kvm_mmu_reset_context(vcpu);
   
+       if (!enable_ept)
+               vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault;
+ 
         if (enable_vpid) {
                 /*
                  * Trivially support vpid by letting L2s share their parent
diff --combined virt/kvm/kvm_main.c

index a9dd682cf5e3f5117de017156396337a8352914f,d469114aff097dbdc791173c6ec997a48970249c..0d20c320a33daa5b20710e2fa0f7c3ab18390d27
--- 1/virt/kvm/kvm_main.c
--- 2/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@@ -70,7 -70,8 +70,8 @@@ MODULE_LICENSE("GPL")
    *            kvm->lock --> kvm->slots_lock --> kvm->irq_lock
    */
   
- DEFINE_RAW_SPINLOCK(kvm_lock);
+ DEFINE_SPINLOCK(kvm_lock);
+ static DEFINE_RAW_SPINLOCK(kvm_count_lock);
   LIST_HEAD(vm_list);
   
   static cpumask_var_t cpus_hardware_enabled;
@@@ -490,9 -491,9 +491,9 @@@ static struct kvm *kvm_create_vm(unsign
         if (r)
                 goto out_err;
   
-       raw_spin_lock(&kvm_lock);
+       spin_lock(&kvm_lock);
         list_add(&kvm->vm_list, &vm_list);
-       raw_spin_unlock(&kvm_lock);
+       spin_unlock(&kvm_lock);
   
         return kvm;
   
@@@ -581,9 -582,9 +582,9 @@@ static void kvm_destroy_vm(struct kvm *
         struct mm_struct *mm = kvm->mm;
   
         kvm_arch_sync_events(kvm);
-       raw_spin_lock(&kvm_lock);
+       spin_lock(&kvm_lock);
         list_del(&kvm->vm_list);
-       raw_spin_unlock(&kvm_lock);
+       spin_unlock(&kvm_lock);
         kvm_free_irq_routing(kvm);
         for (i = 0; i < KVM_NR_BUSES; i++)
                 kvm_io_bus_destroy(kvm->buses[i]);
@@@ -1064,12 -1065,10 +1065,12 @@@ EXPORT_SYMBOL_GPL(gfn_to_hva)
   unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable)
   {
         struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
- -      if (writable)
+ +      unsigned long hva = __gfn_to_hva_many(slot, gfn, NULL, false);
+ +
+ +      if (!kvm_is_error_hva(hva) && writable)
                 *writable = !memslot_is_readonly(slot);
   
- -      return __gfn_to_hva_many(gfn_to_memslot(kvm, gfn), gfn, NULL, false);
+ +      return hva;
   }
   
   static int kvm_read_hva(void *data, void __user *hva, int len)
@@@ -2683,11 -2682,12 +2684,12 @@@ static void hardware_enable_nolock(voi
         }
   }
   
- static void hardware_enable(void *junk)
+ static void hardware_enable(void)
   {
-       raw_spin_lock(&kvm_lock);
-       hardware_enable_nolock(junk);
-       raw_spin_unlock(&kvm_lock);
+       raw_spin_lock(&kvm_count_lock);
+       if (kvm_usage_count)
+               hardware_enable_nolock(NULL);
+       raw_spin_unlock(&kvm_count_lock);
   }
   
   static void hardware_disable_nolock(void *junk)
@@@ -2700,11 -2700,12 +2702,12 @@@
         kvm_arch_hardware_disable(NULL);
   }
   
- static void hardware_disable(void *junk)
+ static void hardware_disable(void)
   {
-       raw_spin_lock(&kvm_lock);
-       hardware_disable_nolock(junk);
-       raw_spin_unlock(&kvm_lock);
+       raw_spin_lock(&kvm_count_lock);
+       if (kvm_usage_count)
+               hardware_disable_nolock(NULL);
+       raw_spin_unlock(&kvm_count_lock);
   }
   
   static void hardware_disable_all_nolock(void)
@@@ -2718,16 -2719,16 +2721,16 @@@
   
   static void hardware_disable_all(void)
   {
-       raw_spin_lock(&kvm_lock);
+       raw_spin_lock(&kvm_count_lock);
         hardware_disable_all_nolock();
-       raw_spin_unlock(&kvm_lock);
+       raw_spin_unlock(&kvm_count_lock);
   }
   
   static int hardware_enable_all(void)
   {
         int r = 0;
   
-       raw_spin_lock(&kvm_lock);
+       raw_spin_lock(&kvm_count_lock);
   
         kvm_usage_count++;
         if (kvm_usage_count == 1) {
@@@ -2740,7 -2741,7 +2743,7 @@@
                 }
         }
   
-       raw_spin_unlock(&kvm_lock);
+       raw_spin_unlock(&kvm_count_lock);
   
         return r;
   }
@@@ -2750,20 -2751,17 +2753,17 @@@ static int kvm_cpu_hotplug(struct notif
   {
         int cpu = (long)v;
   
-       if (!kvm_usage_count)
-               return NOTIFY_OK;
- 
         val &= ~CPU_TASKS_FROZEN;
         switch (val) {
         case CPU_DYING:
                 printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n",
                        cpu);
-               hardware_disable(NULL);
+               hardware_disable();
                 break;
         case CPU_STARTING:
                 printk(KERN_INFO "kvm: enabling virtualization on CPU%d\n",
                        cpu);
-               hardware_enable(NULL);
+               hardware_enable();
                 break;
         }
         return NOTIFY_OK;
@@@ -3056,10 -3054,10 +3056,10 @@@ static int vm_stat_get(void *_offset, u
         struct kvm *kvm;
   
         *val = 0;
-       raw_spin_lock(&kvm_lock);
+       spin_lock(&kvm_lock);
         list_for_each_entry(kvm, &vm_list, vm_list)
                 *val += *(u32 *)((void *)kvm + offset);
-       raw_spin_unlock(&kvm_lock);
+       spin_unlock(&kvm_lock);
         return 0;
   }
   
@@@ -3073,12 -3071,12 +3073,12 @@@ static int vcpu_stat_get(void *_offset
         int i;
   
         *val = 0;
-       raw_spin_lock(&kvm_lock);
+       spin_lock(&kvm_lock);
         list_for_each_entry(kvm, &vm_list, vm_list)
                 kvm_for_each_vcpu(i, vcpu, kvm)
                         *val += *(u32 *)((void *)vcpu + offset);
   
-       raw_spin_unlock(&kvm_lock);
+       spin_unlock(&kvm_lock);
         return 0;
   }
   
@@@ -3133,7 -3131,7 +3133,7 @@@ static int kvm_suspend(void
   static void kvm_resume(void)
   {
         if (kvm_usage_count) {
-               WARN_ON(raw_spin_is_locked(&kvm_lock));
+               WARN_ON(raw_spin_is_locked(&kvm_count_lock));
                 hardware_enable_nolock(NULL);
         }
   }
author	Thierry Reding <treding@nvidia.com>
	Thu, 24 Oct 2013 12:58:35 +0000 (14:58 +0200)
committer	Thierry Reding <treding@nvidia.com>
	Thu, 24 Oct 2013 12:58:35 +0000 (14:58 +0200)
		1	2
arch/arm/kvm/reset.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/s390/kvm/interrupt.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/s390/kvm/kvm-s390.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/include/uapi/asm/msr-index.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kvm/vmx.c	patch \|	diff1 \|	diff2 \|	blob \| history
virt/kvm/kvm_main.c	patch \|	diff1 \|	diff2 \|	blob \| history