]> git.karo-electronics.de Git - karo-tx-linux.git/blobdiff - arch/x86/kvm/x86.c
KVM: x86: Fix potential preemption when get the current kvmclock timestamp
[karo-tx-linux.git] / arch / x86 / kvm / x86.c
index ccbd45ecd41a3fa5c850cf924cccbb2723aecc92..3b5fc7e35f6ef5e99cdf0e82d8c1f342cfad28ae 100644 (file)
@@ -27,7 +27,6 @@
 #include "kvm_cache_regs.h"
 #include "x86.h"
 #include "cpuid.h"
-#include "assigned-dev.h"
 #include "pmu.h"
 #include "hyperv.h"
 
@@ -1008,6 +1007,8 @@ static u32 emulated_msrs[] = {
        MSR_IA32_MCG_CTL,
        MSR_IA32_MCG_EXT_CTL,
        MSR_IA32_SMBASE,
+       MSR_PLATFORM_INFO,
+       MSR_MISC_FEATURES_ENABLES,
 };
 
 static unsigned num_emulated_msrs;
@@ -1444,10 +1445,10 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
        struct kvm *kvm = vcpu->kvm;
        u64 offset, ns, elapsed;
        unsigned long flags;
-       s64 usdiff;
        bool matched;
        bool already_matched;
        u64 data = msr->data;
+       bool synchronizing = false;
 
        raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags);
        offset = kvm_compute_tsc_offset(vcpu, data);
@@ -1455,51 +1456,34 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
        elapsed = ns - kvm->arch.last_tsc_nsec;
 
        if (vcpu->arch.virtual_tsc_khz) {
-               int faulted = 0;
-
-               /* n.b - signed multiplication and division required */
-               usdiff = data - kvm->arch.last_tsc_write;
-#ifdef CONFIG_X86_64
-               usdiff = (usdiff * 1000) / vcpu->arch.virtual_tsc_khz;
-#else
-               /* do_div() only does unsigned */
-               asm("1: idivl %[divisor]\n"
-                   "2: xor %%edx, %%edx\n"
-                   "   movl $0, %[faulted]\n"
-                   "3:\n"
-                   ".section .fixup,\"ax\"\n"
-                   "4: movl $1, %[faulted]\n"
-                   "   jmp  3b\n"
-                   ".previous\n"
-
-               _ASM_EXTABLE(1b, 4b)
-
-               : "=A"(usdiff), [faulted] "=r" (faulted)
-               : "A"(usdiff * 1000), [divisor] "rm"(vcpu->arch.virtual_tsc_khz));
-
-#endif
-               do_div(elapsed, 1000);
-               usdiff -= elapsed;
-               if (usdiff < 0)
-                       usdiff = -usdiff;
-
-               /* idivl overflow => difference is larger than USEC_PER_SEC */
-               if (faulted)
-                       usdiff = USEC_PER_SEC;
-       } else
-               usdiff = USEC_PER_SEC; /* disable TSC match window below */
+               if (data == 0 && msr->host_initiated) {
+                       /*
+                        * detection of vcpu initialization -- need to sync
+                        * with other vCPUs. This particularly helps to keep
+                        * kvm_clock stable after CPU hotplug
+                        */
+                       synchronizing = true;
+               } else {
+                       u64 tsc_exp = kvm->arch.last_tsc_write +
+                                               nsec_to_cycles(vcpu, elapsed);
+                       u64 tsc_hz = vcpu->arch.virtual_tsc_khz * 1000LL;
+                       /*
+                        * Special case: TSC write with a small delta (1 second)
+                        * of virtual cycle time against real time is
+                        * interpreted as an attempt to synchronize the CPU.
+                        */
+                       synchronizing = data < tsc_exp + tsc_hz &&
+                                       data + tsc_hz > tsc_exp;
+               }
+       }
 
        /*
-        * Special case: TSC write with a small delta (1 second) of virtual
-        * cycle time against real time is interpreted as an attempt to
-        * synchronize the CPU.
-         *
         * For a reliable TSC, we can match TSC offsets, and for an unstable
         * TSC, we add elapsed time in this computation.  We could let the
         * compensation code attempt to catch up if we fall behind, but
         * it's better to try to match offsets from the beginning.
          */
-       if (usdiff < USEC_PER_SEC &&
+       if (synchronizing &&
            vcpu->arch.virtual_tsc_khz == kvm->arch.last_tsc_khz) {
                if (!check_tsc_unstable()) {
                        offset = kvm->arch.cur_tsc_offset;
@@ -1769,16 +1753,17 @@ static void kvm_gen_update_masterclock(struct kvm *kvm)
 
        /* guest entries allowed */
        kvm_for_each_vcpu(i, vcpu, kvm)
-               clear_bit(KVM_REQ_MCLOCK_INPROGRESS, &vcpu->requests);
+               kvm_clear_request(KVM_REQ_MCLOCK_INPROGRESS, vcpu);
 
        spin_unlock(&ka->pvclock_gtod_sync_lock);
 #endif
 }
 
-static u64 __get_kvmclock_ns(struct kvm *kvm)
+u64 get_kvmclock_ns(struct kvm *kvm)
 {
        struct kvm_arch *ka = &kvm->arch;
        struct pvclock_vcpu_time_info hv_clock;
+       u64 ret;
 
        spin_lock(&ka->pvclock_gtod_sync_lock);
        if (!ka->use_master_clock) {
@@ -1790,22 +1775,17 @@ static u64 __get_kvmclock_ns(struct kvm *kvm)
        hv_clock.system_time = ka->master_kernel_ns + ka->kvmclock_offset;
        spin_unlock(&ka->pvclock_gtod_sync_lock);
 
+       /* both __this_cpu_read() and rdtsc() should be on the same cpu */
+       get_cpu();
+
        kvm_get_time_scale(NSEC_PER_SEC, __this_cpu_read(cpu_tsc_khz) * 1000LL,
                           &hv_clock.tsc_shift,
                           &hv_clock.tsc_to_system_mul);
-       return __pvclock_read_cycles(&hv_clock, rdtsc());
-}
-
-u64 get_kvmclock_ns(struct kvm *kvm)
-{
-       unsigned long flags;
-       s64 ns;
+       ret = __pvclock_read_cycles(&hv_clock, rdtsc());
 
-       local_irq_save(flags);
-       ns = __get_kvmclock_ns(kvm);
-       local_irq_restore(flags);
+       put_cpu();
 
-       return ns;
+       return ret;
 }
 
 static void kvm_setup_pvclock_page(struct kvm_vcpu *v)
@@ -1813,7 +1793,7 @@ static void kvm_setup_pvclock_page(struct kvm_vcpu *v)
        struct kvm_vcpu_arch *vcpu = &v->arch;
        struct pvclock_vcpu_time_info guest_hv_clock;
 
-       if (unlikely(kvm_vcpu_read_guest_cached(v, &vcpu->pv_time,
+       if (unlikely(kvm_read_guest_cached(v->kvm, &vcpu->pv_time,
                &guest_hv_clock, sizeof(guest_hv_clock))))
                return;
 
@@ -1834,9 +1814,9 @@ static void kvm_setup_pvclock_page(struct kvm_vcpu *v)
        BUILD_BUG_ON(offsetof(struct pvclock_vcpu_time_info, version) != 0);
 
        vcpu->hv_clock.version = guest_hv_clock.version + 1;
-       kvm_vcpu_write_guest_cached(v, &vcpu->pv_time,
-                                   &vcpu->hv_clock,
-                                   sizeof(vcpu->hv_clock.version));
+       kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
+                               &vcpu->hv_clock,
+                               sizeof(vcpu->hv_clock.version));
 
        smp_wmb();
 
@@ -1850,16 +1830,16 @@ static void kvm_setup_pvclock_page(struct kvm_vcpu *v)
 
        trace_kvm_pvclock_update(v->vcpu_id, &vcpu->hv_clock);
 
-       kvm_vcpu_write_guest_cached(v, &vcpu->pv_time,
-                                   &vcpu->hv_clock,
-                                   sizeof(vcpu->hv_clock));
+       kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
+                               &vcpu->hv_clock,
+                               sizeof(vcpu->hv_clock));
 
        smp_wmb();
 
        vcpu->hv_clock.version++;
-       kvm_vcpu_write_guest_cached(v, &vcpu->pv_time,
-                                   &vcpu->hv_clock,
-                                   sizeof(vcpu->hv_clock.version));
+       kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
+                               &vcpu->hv_clock,
+                               sizeof(vcpu->hv_clock.version));
 }
 
 static int kvm_guest_time_update(struct kvm_vcpu *v)
@@ -2092,7 +2072,7 @@ static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data)
                return 0;
        }
 
-       if (kvm_vcpu_gfn_to_hva_cache_init(vcpu, &vcpu->arch.apf.data, gpa,
+       if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.apf.data, gpa,
                                        sizeof(u32)))
                return 1;
 
@@ -2111,7 +2091,7 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
        if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
                return;
 
-       if (unlikely(kvm_vcpu_read_guest_cached(vcpu, &vcpu->arch.st.stime,
+       if (unlikely(kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
                &vcpu->arch.st.steal, sizeof(struct kvm_steal_time))))
                return;
 
@@ -2122,7 +2102,7 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
 
        vcpu->arch.st.steal.version += 1;
 
-       kvm_vcpu_write_guest_cached(vcpu, &vcpu->arch.st.stime,
+       kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
                &vcpu->arch.st.steal, sizeof(struct kvm_steal_time));
 
        smp_wmb();
@@ -2131,14 +2111,14 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
                vcpu->arch.st.last_steal;
        vcpu->arch.st.last_steal = current->sched_info.run_delay;
 
-       kvm_vcpu_write_guest_cached(vcpu, &vcpu->arch.st.stime,
+       kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
                &vcpu->arch.st.steal, sizeof(struct kvm_steal_time));
 
        smp_wmb();
 
        vcpu->arch.st.steal.version += 1;
 
-       kvm_vcpu_write_guest_cached(vcpu, &vcpu->arch.st.stime,
+       kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
                &vcpu->arch.st.steal, sizeof(struct kvm_steal_time));
 }
 
@@ -2155,6 +2135,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
        case MSR_VM_HSAVE_PA:
        case MSR_AMD64_PATCH_LOADER:
        case MSR_AMD64_BU_CFG2:
+       case MSR_AMD64_DC_CFG:
                break;
 
        case MSR_EFER:
@@ -2230,8 +2211,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
                        bool tmp = (msr == MSR_KVM_SYSTEM_TIME);
 
                        if (ka->boot_vcpu_runs_old_kvmclock != tmp)
-                               set_bit(KVM_REQ_MASTERCLOCK_UPDATE,
-                                       &vcpu->requests);
+                               kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
 
                        ka->boot_vcpu_runs_old_kvmclock = tmp;
                }
@@ -2243,7 +2223,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
                if (!(data & 1))
                        break;
 
-               if (kvm_vcpu_gfn_to_hva_cache_init(vcpu,
+               if (kvm_gfn_to_hva_cache_init(vcpu->kvm,
                     &vcpu->arch.pv_time, data & ~1ULL,
                     sizeof(struct pvclock_vcpu_time_info)))
                        vcpu->arch.pv_time_enabled = false;
@@ -2264,7 +2244,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
                if (data & KVM_STEAL_RESERVED_MASK)
                        return 1;
 
-               if (kvm_vcpu_gfn_to_hva_cache_init(vcpu, &vcpu->arch.st.stime,
+               if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.st.stime,
                                                data & KVM_STEAL_VALID_BITS,
                                                sizeof(struct kvm_steal_time)))
                        return 1;
@@ -2331,6 +2311,21 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
                        return 1;
                vcpu->arch.osvw.status = data;
                break;
+       case MSR_PLATFORM_INFO:
+               if (!msr_info->host_initiated ||
+                   data & ~MSR_PLATFORM_INFO_CPUID_FAULT ||
+                   (!(data & MSR_PLATFORM_INFO_CPUID_FAULT) &&
+                    cpuid_fault_enabled(vcpu)))
+                       return 1;
+               vcpu->arch.msr_platform_info = data;
+               break;
+       case MSR_MISC_FEATURES_ENABLES:
+               if (data & ~MSR_MISC_FEATURES_ENABLES_CPUID_FAULT ||
+                   (data & MSR_MISC_FEATURES_ENABLES_CPUID_FAULT &&
+                    !supports_cpuid_fault(vcpu)))
+                       return 1;
+               vcpu->arch.msr_misc_features_enables = data;
+               break;
        default:
                if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr))
                        return xen_hvm_config(vcpu, data);
@@ -2417,6 +2412,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
        case MSR_FAM10H_MMIO_CONF_BASE:
        case MSR_AMD64_BU_CFG2:
        case MSR_IA32_PERF_CTL:
+       case MSR_AMD64_DC_CFG:
                msr_info->data = 0;
                break;
        case MSR_K7_EVNTSEL0 ... MSR_K7_EVNTSEL3:
@@ -2545,6 +2541,12 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
                        return 1;
                msr_info->data = vcpu->arch.osvw.status;
                break;
+       case MSR_PLATFORM_INFO:
+               msr_info->data = vcpu->arch.msr_platform_info;
+               break;
+       case MSR_MISC_FEATURES_ENABLES:
+               msr_info->data = vcpu->arch.msr_misc_features_enables;
+               break;
        default:
                if (kvm_pmu_is_valid_msr(vcpu, msr_info->index))
                        return kvm_pmu_get_msr(vcpu, msr_info->index, &msr_info->data);
@@ -2675,15 +2677,14 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
        case KVM_CAP_SET_BOOT_CPU_ID:
        case KVM_CAP_SPLIT_IRQCHIP:
        case KVM_CAP_IMMEDIATE_EXIT:
-#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
-       case KVM_CAP_ASSIGN_DEV_IRQ:
-       case KVM_CAP_PCI_2_3:
-#endif
                r = 1;
                break;
        case KVM_CAP_ADJUST_CLOCK:
                r = KVM_CLOCK_TSC_STABLE;
                break;
+       case KVM_CAP_X86_GUEST_MWAIT:
+               r = kvm_mwait_in_guest();
+               break;
        case KVM_CAP_X86_SMM:
                /* SMBASE is usually relocated above 1M on modern chipsets,
                 * and SMM handlers might indeed rely on 4G segment limits,
@@ -2695,9 +2696,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
                 */
                r = kvm_x86_ops->cpu_has_high_real_mode_segbase();
                break;
-       case KVM_CAP_COALESCED_MMIO:
-               r = KVM_COALESCED_MMIO_PAGE_OFFSET;
-               break;
        case KVM_CAP_VAPIC:
                r = !kvm_x86_ops->cpu_has_accelerated_tpr();
                break;
@@ -2713,11 +2711,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
        case KVM_CAP_PV_MMU:    /* obsolete */
                r = 0;
                break;
-#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
-       case KVM_CAP_IOMMU:
-               r = iommu_present(&pci_bus_type);
-               break;
-#endif
        case KVM_CAP_MCE:
                r = KVM_MAX_MCE_BANKS;
                break;
@@ -2816,11 +2809,6 @@ static bool need_emulate_wbinvd(struct kvm_vcpu *vcpu)
        return kvm_arch_has_noncoherent_dma(vcpu->kvm);
 }
 
-static inline void kvm_migrate_timers(struct kvm_vcpu *vcpu)
-{
-       set_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests);
-}
-
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
        /* Address WBINVD may be executed by guest */
@@ -2864,7 +2852,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
                if (!vcpu->kvm->arch.use_master_clock || vcpu->cpu == -1)
                        kvm_make_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu);
                if (vcpu->cpu != cpu)
-                       kvm_migrate_timers(vcpu);
+                       kvm_make_request(KVM_REQ_MIGRATE_TIMER, vcpu);
                vcpu->cpu = cpu;
        }
 
@@ -2878,7 +2866,7 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu)
 
        vcpu->arch.st.steal.preempted = 1;
 
-       kvm_vcpu_write_guest_offset_cached(vcpu, &vcpu->arch.st.stime,
+       kvm_write_guest_offset_cached(vcpu->kvm, &vcpu->arch.st.stime,
                        &vcpu->arch.st.steal.preempted,
                        offsetof(struct kvm_steal_time, preempted),
                        sizeof(vcpu->arch.st.steal.preempted));
@@ -3124,7 +3112,14 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
                return -EINVAL;
 
        if (events->exception.injected &&
-           (events->exception.nr > 31 || events->exception.nr == NMI_VECTOR))
+           (events->exception.nr > 31 || events->exception.nr == NMI_VECTOR ||
+            is_guest_mode(vcpu)))
+               return -EINVAL;
+
+       /* INITs are latched while in SMM */
+       if (events->flags & KVM_VCPUEVENT_VALID_SMM &&
+           (events->smi.smm || events->smi.pending) &&
+           vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED)
                return -EINVAL;
 
        process_nmi(vcpu);
@@ -3301,11 +3296,14 @@ static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
        }
 }
 
+#define XSAVE_MXCSR_OFFSET 24
+
 static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
                                        struct kvm_xsave *guest_xsave)
 {
        u64 xstate_bv =
                *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)];
+       u32 mxcsr = *(u32 *)&guest_xsave->region[XSAVE_MXCSR_OFFSET / sizeof(u32)];
 
        if (boot_cpu_has(X86_FEATURE_XSAVE)) {
                /*
@@ -3313,11 +3311,13 @@ static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
                 * CPUID leaf 0xD, index 0, EDX:EAX.  This is for compatibility
                 * with old userspace.
                 */
-               if (xstate_bv & ~kvm_supported_xcr0())
+               if (xstate_bv & ~kvm_supported_xcr0() ||
+                       mxcsr & ~mxcsr_feature_mask)
                        return -EINVAL;
                load_xsave(vcpu, (u8 *)guest_xsave->region);
        } else {
-               if (xstate_bv & ~XFEATURE_MASK_FPSSE)
+               if (xstate_bv & ~XFEATURE_MASK_FPSSE ||
+                       mxcsr & ~mxcsr_feature_mask)
                        return -EINVAL;
                memcpy(&vcpu->arch.guest_fpu.state.fxsave,
                        guest_xsave->region, sizeof(struct fxregs_state));
@@ -3721,22 +3721,21 @@ static int kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm)
 
 static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
 {
+       struct kvm_pic *pic = kvm->arch.vpic;
        int r;
 
        r = 0;
        switch (chip->chip_id) {
        case KVM_IRQCHIP_PIC_MASTER:
-               memcpy(&chip->chip.pic,
-                       &pic_irqchip(kvm)->pics[0],
+               memcpy(&chip->chip.pic, &pic->pics[0],
                        sizeof(struct kvm_pic_state));
                break;
        case KVM_IRQCHIP_PIC_SLAVE:
-               memcpy(&chip->chip.pic,
-                       &pic_irqchip(kvm)->pics[1],
+               memcpy(&chip->chip.pic, &pic->pics[1],
                        sizeof(struct kvm_pic_state));
                break;
        case KVM_IRQCHIP_IOAPIC:
-               r = kvm_get_ioapic(kvm, &chip->chip.ioapic);
+               kvm_get_ioapic(kvm, &chip->chip.ioapic);
                break;
        default:
                r = -EINVAL;
@@ -3747,32 +3746,31 @@ static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
 
 static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
 {
+       struct kvm_pic *pic = kvm->arch.vpic;
        int r;
 
        r = 0;
        switch (chip->chip_id) {
        case KVM_IRQCHIP_PIC_MASTER:
-               spin_lock(&pic_irqchip(kvm)->lock);
-               memcpy(&pic_irqchip(kvm)->pics[0],
-                       &chip->chip.pic,
+               spin_lock(&pic->lock);
+               memcpy(&pic->pics[0], &chip->chip.pic,
                        sizeof(struct kvm_pic_state));
-               spin_unlock(&pic_irqchip(kvm)->lock);
+               spin_unlock(&pic->lock);
                break;
        case KVM_IRQCHIP_PIC_SLAVE:
-               spin_lock(&pic_irqchip(kvm)->lock);
-               memcpy(&pic_irqchip(kvm)->pics[1],
-                       &chip->chip.pic,
+               spin_lock(&pic->lock);
+               memcpy(&pic->pics[1], &chip->chip.pic,
                        sizeof(struct kvm_pic_state));
-               spin_unlock(&pic_irqchip(kvm)->lock);
+               spin_unlock(&pic->lock);
                break;
        case KVM_IRQCHIP_IOAPIC:
-               r = kvm_set_ioapic(kvm, &chip->chip.ioapic);
+               kvm_set_ioapic(kvm, &chip->chip.ioapic);
                break;
        default:
                r = -EINVAL;
                break;
        }
-       kvm_pic_update_irq(pic_irqchip(kvm));
+       kvm_pic_update_irq(pic);
        return r;
 }
 
@@ -4018,20 +4016,14 @@ long kvm_arch_vm_ioctl(struct file *filp,
 
                r = kvm_ioapic_init(kvm);
                if (r) {
-                       mutex_lock(&kvm->slots_lock);
                        kvm_pic_destroy(kvm);
-                       mutex_unlock(&kvm->slots_lock);
                        goto create_irqchip_unlock;
                }
 
                r = kvm_setup_default_irq_routing(kvm);
                if (r) {
-                       mutex_lock(&kvm->slots_lock);
-                       mutex_lock(&kvm->irq_lock);
                        kvm_ioapic_destroy(kvm);
                        kvm_pic_destroy(kvm);
-                       mutex_unlock(&kvm->irq_lock);
-                       mutex_unlock(&kvm->slots_lock);
                        goto create_irqchip_unlock;
                }
                /* Write kvm->irq_routing before enabling irqchip_in_kernel. */
@@ -4196,10 +4188,8 @@ long kvm_arch_vm_ioctl(struct file *filp,
                        goto out;
 
                r = 0;
-               local_irq_disable();
-               now_ns = __get_kvmclock_ns(kvm);
+               now_ns = get_kvmclock_ns(kvm);
                kvm->arch.kvmclock_offset += user_ns.clock - now_ns;
-               local_irq_enable();
                kvm_gen_update_masterclock(kvm);
                break;
        }
@@ -4207,11 +4197,9 @@ long kvm_arch_vm_ioctl(struct file *filp,
                struct kvm_clock_data user_ns;
                u64 now_ns;
 
-               local_irq_disable();
-               now_ns = __get_kvmclock_ns(kvm);
+               now_ns = get_kvmclock_ns(kvm);
                user_ns.clock = now_ns;
                user_ns.flags = kvm->arch.use_master_clock ? KVM_CLOCK_TSC_STABLE : 0;
-               local_irq_enable();
                memset(&user_ns.pad, 0, sizeof(user_ns.pad));
 
                r = -EFAULT;
@@ -4230,7 +4218,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
                break;
        }
        default:
-               r = kvm_vm_ioctl_assigned_device(kvm, ioctl, arg);
+               r = -ENOTTY;
        }
 out:
        return r;
@@ -5223,6 +5211,16 @@ static void emulator_set_nmi_mask(struct x86_emulate_ctxt *ctxt, bool masked)
        kvm_x86_ops->set_nmi_mask(emul_to_vcpu(ctxt), masked);
 }
 
+static unsigned emulator_get_hflags(struct x86_emulate_ctxt *ctxt)
+{
+       return emul_to_vcpu(ctxt)->arch.hflags;
+}
+
+static void emulator_set_hflags(struct x86_emulate_ctxt *ctxt, unsigned emul_flags)
+{
+       kvm_set_hflags(emul_to_vcpu(ctxt), emul_flags);
+}
+
 static const struct x86_emulate_ops emulate_ops = {
        .read_gpr            = emulator_read_gpr,
        .write_gpr           = emulator_write_gpr,
@@ -5262,6 +5260,8 @@ static const struct x86_emulate_ops emulate_ops = {
        .intercept           = emulator_intercept,
        .get_cpuid           = emulator_get_cpuid,
        .set_nmi_mask        = emulator_set_nmi_mask,
+       .get_hflags          = emulator_get_hflags,
+       .set_hflags          = emulator_set_hflags,
 };
 
 static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask)
@@ -5314,7 +5314,6 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
        BUILD_BUG_ON(HF_GUEST_MASK != X86EMUL_GUEST_MASK);
        BUILD_BUG_ON(HF_SMM_MASK != X86EMUL_SMM_MASK);
        BUILD_BUG_ON(HF_SMM_INSIDE_NMI_MASK != X86EMUL_SMM_INSIDE_NMI_MASK);
-       ctxt->emul_flags = vcpu->arch.hflags;
 
        init_decode_cache(ctxt);
        vcpu->arch.emulate_regs_need_sync_from_vcpu = false;
@@ -5718,8 +5717,6 @@ restart:
                unsigned long rflags = kvm_x86_ops->get_rflags(vcpu);
                toggle_interruptibility(vcpu, ctxt->interruptibility);
                vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
-               if (vcpu->arch.hflags != ctxt->emul_flags)
-                       kvm_set_hflags(vcpu, ctxt->emul_flags);
                kvm_rip_write(vcpu, ctxt->eip);
                if (r == EMULATE_DONE)
                        kvm_vcpu_check_singlestep(vcpu, rflags, &r);
@@ -6869,7 +6866,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 
        /*
         * 1) We should set ->mode before checking ->requests.  Please see
-        * the comment in kvm_make_all_cpus_request.
+        * the comment in kvm_vcpu_exiting_guest_mode().
         *
         * 2) For APICv, we should set ->mode before checking PIR.ON.  This
         * pairs with the memory barrier implicit in pi_test_and_set_on
@@ -7051,7 +7048,7 @@ static int vcpu_run(struct kvm_vcpu *vcpu)
                if (r <= 0)
                        break;
 
-               clear_bit(KVM_REQ_PENDING_TIMER, &vcpu->requests);
+               kvm_clear_request(KVM_REQ_PENDING_TIMER, vcpu);
                if (kvm_cpu_has_pending_timer(vcpu))
                        kvm_inject_pending_timer_irqs(vcpu);
 
@@ -7179,7 +7176,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
        if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
                kvm_vcpu_block(vcpu);
                kvm_apic_accept_events(vcpu);
-               clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
+               kvm_clear_request(KVM_REQ_UNHALT, vcpu);
                r = -EAGAIN;
                goto out;
        }
@@ -7355,6 +7352,12 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
            mp_state->mp_state != KVM_MP_STATE_RUNNABLE)
                return -EINVAL;
 
+       /* INITs are latched while in SMM */
+       if ((is_smm(vcpu) || vcpu->arch.smi_pending) &&
+           (mp_state->mp_state == KVM_MP_STATE_SIPI_RECEIVED ||
+            mp_state->mp_state == KVM_MP_STATE_INIT_RECEIVED))
+               return -EINVAL;
+
        if (mp_state->mp_state == KVM_MP_STATE_SIPI_RECEIVED) {
                vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
                set_bit(KVM_APIC_SIPI, &vcpu->arch.apic->pending_events);
@@ -7724,6 +7727,9 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
        if (!init_event) {
                kvm_pmu_reset(vcpu);
                vcpu->arch.smbase = 0x30000;
+
+               vcpu->arch.msr_platform_info = MSR_PLATFORM_INFO_CPUID_FAULT;
+               vcpu->arch.msr_misc_features_enables = 0;
        }
 
        memset(vcpu->arch.regs, 0, sizeof(vcpu->arch.regs));
@@ -8068,7 +8074,6 @@ void kvm_arch_sync_events(struct kvm *kvm)
 {
        cancel_delayed_work_sync(&kvm->arch.kvmclock_sync_work);
        cancel_delayed_work_sync(&kvm->arch.kvmclock_update_work);
-       kvm_free_all_assigned_devices(kvm);
        kvm_free_pit(kvm);
 }
 
@@ -8152,7 +8157,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
        }
        if (kvm_x86_ops->vm_destroy)
                kvm_x86_ops->vm_destroy(kvm);
-       kvm_iommu_unmap_guest(kvm);
        kvm_pic_destroy(kvm);
        kvm_ioapic_destroy(kvm);
        kvm_free_vcpus(kvm);
@@ -8199,13 +8203,13 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
                                      slot->base_gfn, level) + 1;
 
                slot->arch.rmap[i] =
-                       kvm_kvzalloc(lpages * sizeof(*slot->arch.rmap[i]));
+                       kvzalloc(lpages * sizeof(*slot->arch.rmap[i]), GFP_KERNEL);
                if (!slot->arch.rmap[i])
                        goto out_free;
                if (i == 0)
                        continue;
 
-               linfo = kvm_kvzalloc(lpages * sizeof(*linfo));
+               linfo = kvzalloc(lpages * sizeof(*linfo), GFP_KERNEL);
                if (!linfo)
                        goto out_free;
 
@@ -8385,7 +8389,7 @@ static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu)
        if (atomic_read(&vcpu->arch.nmi_queued))
                return true;
 
-       if (test_bit(KVM_REQ_SMI, &vcpu->requests))
+       if (kvm_test_request(KVM_REQ_SMI, vcpu))
                return true;
 
        if (kvm_arch_interrupt_allowed(vcpu) &&
@@ -8536,8 +8540,9 @@ static void kvm_del_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
 
 static int apf_put_user(struct kvm_vcpu *vcpu, u32 val)
 {
-       return kvm_vcpu_write_guest_cached(vcpu, &vcpu->arch.apf.data, &val,
-                                          sizeof(val));
+
+       return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.apf.data, &val,
+                                     sizeof(val));
 }
 
 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,