]> git.karo-electronics.de Git - karo-tx-linux.git/commitdiff
Merge tag 'kvm-s390-20140825' of git://git.kernel.org/pub/scm/linux/kernel/git/kvms39...
authorPaolo Bonzini <pbonzini@redhat.com>
Mon, 25 Aug 2014 13:37:00 +0000 (15:37 +0200)
committerPaolo Bonzini <pbonzini@redhat.com>
Mon, 25 Aug 2014 13:37:00 +0000 (15:37 +0200)
Here are two fixes for s390 KVM code that prevent:
1. a malicious user to trigger a kernel BUG
2. a malicious user to change the storage key of read-only pages

18 files changed:
arch/arm/kvm/arm.c
arch/mips/kvm/mips.c
arch/powerpc/kvm/book3s_hv_builtin.c
arch/powerpc/kvm/powerpc.c
arch/s390/kvm/kvm-s390.c
arch/x86/include/asm/kvm_host.h
arch/x86/kvm/cpuid.c
arch/x86/kvm/emulate.c
arch/x86/kvm/lapic.c
arch/x86/kvm/pmu.c
arch/x86/kvm/svm.c
arch/x86/kvm/trace.h
arch/x86/kvm/vmx.c
arch/x86/kvm/x86.c
include/linux/kvm_host.h
virt/kvm/assigned-dev.c
virt/kvm/iommu.c
virt/kvm/kvm_main.c

index a99e0cdf8ba2f3c1799b3a7c2013f8e024b8056e..9f788ebac55bec7222b703bcb8f258c26c9334bc 100644 (file)
@@ -288,6 +288,10 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
 {
 }
 
+void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
+{
+}
+
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
        vcpu->cpu = cpu;
index cd7114147ae777f9a5bf7063acfd0cbd388cd944..2362df2a79f9327c054a1d599011851f7cdd904b 100644 (file)
@@ -1002,6 +1002,10 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
 {
 }
 
+void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
+{
+}
+
 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
                                  struct kvm_translation *tr)
 {
index 329d7fdd0a6ab7be8b9e203ac53b1870c1d37b0c..b9615ba5b083a6ddeea76b878925396561b5fb42 100644 (file)
@@ -101,7 +101,7 @@ struct kvm_rma_info *kvm_alloc_rma()
        ri = kmalloc(sizeof(struct kvm_rma_info), GFP_KERNEL);
        if (!ri)
                return NULL;
-       page = cma_alloc(kvm_cma, kvm_rma_pages, get_order(kvm_rma_pages));
+       page = cma_alloc(kvm_cma, kvm_rma_pages, order_base_2(kvm_rma_pages));
        if (!page)
                goto err_out;
        atomic_set(&ri->use_count, 1);
@@ -135,12 +135,12 @@ struct page *kvm_alloc_hpt(unsigned long nr_pages)
 {
        unsigned long align_pages = HPT_ALIGN_PAGES;
 
-       VM_BUG_ON(get_order(nr_pages) < KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
+       VM_BUG_ON(order_base_2(nr_pages) < KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
 
        /* Old CPUs require HPT aligned on a multiple of its size */
        if (!cpu_has_feature(CPU_FTR_ARCH_206))
                align_pages = nr_pages;
-       return cma_alloc(kvm_cma, nr_pages, get_order(align_pages));
+       return cma_alloc(kvm_cma, nr_pages, order_base_2(align_pages));
 }
 EXPORT_SYMBOL_GPL(kvm_alloc_hpt);
 
index 4c79284b58be9d0870eebbf62a772dbdf701598c..cbc432f4f0a6579a2d453d15adde1aac00f102fe 100644 (file)
@@ -720,6 +720,10 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
        kvmppc_subarch_vcpu_uninit(vcpu);
 }
 
+void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
+{
+}
+
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
 #ifdef CONFIG_BOOKE
index 81b0e11521e444501ff5b1fc723965374ea3bd7e..197bec03d9190b6b16e408e0f85793f5263093bc 100644 (file)
@@ -555,6 +555,10 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
        /* Nothing todo */
 }
 
+void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
+{
+}
+
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
        save_fp_ctl(&vcpu->arch.host_fpregs.fpc);
index 572460175ba509d9317e408e96975cf10780eef7..ac0f90e26a0b0312521fabf07c66fceb3031bd2a 100644 (file)
@@ -95,7 +95,7 @@ static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level)
 #define KVM_REFILL_PAGES 25
 #define KVM_MAX_CPUID_ENTRIES 80
 #define KVM_NR_FIXED_MTRR_REGION 88
-#define KVM_NR_VAR_MTRR 10
+#define KVM_NR_VAR_MTRR 8
 
 #define ASYNC_PF_PER_VCPU 64
 
@@ -710,7 +710,6 @@ struct kvm_x86_ops {
        void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg);
        unsigned long (*get_rflags)(struct kvm_vcpu *vcpu);
        void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags);
-       void (*fpu_activate)(struct kvm_vcpu *vcpu);
        void (*fpu_deactivate)(struct kvm_vcpu *vcpu);
 
        void (*tlb_flush)(struct kvm_vcpu *vcpu);
@@ -772,6 +771,8 @@ struct kvm_x86_ops {
        bool (*mpx_supported)(void);
 
        int (*check_nested_events)(struct kvm_vcpu *vcpu, bool external_intr);
+
+       void (*sched_in)(struct kvm_vcpu *kvm, int cpu);
 };
 
 struct kvm_arch_async_pf {
index 38a0afe83c6ba17822ca683ae570fec7ff65825e..f4bad87ef256533473650a57b250d0d1924ee164 100644 (file)
@@ -112,8 +112,8 @@ static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu)
                        break;
                }
        }
-       if (entry && (entry->edx & (1 << 20)) && !is_efer_nx()) {
-               entry->edx &= ~(1 << 20);
+       if (entry && (entry->edx & bit(X86_FEATURE_NX)) && !is_efer_nx()) {
+               entry->edx &= ~bit(X86_FEATURE_NX);
                printk(KERN_INFO "kvm: guest NX capability removed\n");
        }
 }
index 56657b0bb3bb14f14b76fdcd99a746598ce5e8b4..e5bf13003cd215def5ae6973480878b1352c09f3 100644 (file)
@@ -527,6 +527,7 @@ static unsigned long seg_base(struct x86_emulate_ctxt *ctxt, int seg)
 static int emulate_exception(struct x86_emulate_ctxt *ctxt, int vec,
                             u32 error, bool valid)
 {
+       WARN_ON(vec > 0x1f);
        ctxt->exception.vector = vec;
        ctxt->exception.error_code = error;
        ctxt->exception.error_code_valid = valid;
@@ -1468,7 +1469,7 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
                return ret;
 
        err_code = selector & 0xfffc;
-       err_vec = GP_VECTOR;
+       err_vec = in_task_switch ? TS_VECTOR : GP_VECTOR;
 
        /* can't load system descriptor into segment selector */
        if (seg <= VCPU_SREG_GS && !seg_desc.s)
@@ -1491,9 +1492,6 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
                        goto exception;
                break;
        case VCPU_SREG_CS:
-               if (in_task_switch && rpl != dpl)
-                       goto exception;
-
                if (!(seg_desc.type & 8))
                        goto exception;
 
@@ -1552,8 +1550,7 @@ load:
        ctxt->ops->set_segment(ctxt, selector, &seg_desc, base3, seg);
        return X86EMUL_CONTINUE;
 exception:
-       emulate_exception(ctxt, err_vec, err_code, true);
-       return X86EMUL_PROPAGATE_FAULT;
+       return emulate_exception(ctxt, err_vec, err_code, true);
 }
 
 static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
@@ -2726,8 +2723,7 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
        if (!next_tss_desc.p ||
            ((desc_limit < 0x67 && (next_tss_desc.type & 8)) ||
             desc_limit < 0x2b)) {
-               emulate_ts(ctxt, tss_selector & 0xfffc);
-               return X86EMUL_PROPAGATE_FAULT;
+               return emulate_ts(ctxt, tss_selector & 0xfffc);
        }
 
        if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) {
@@ -3019,7 +3015,7 @@ static int em_movbe(struct x86_emulate_ctxt *ctxt)
                ctxt->dst.val = swab64(ctxt->src.val);
                break;
        default:
-               return X86EMUL_PROPAGATE_FAULT;
+               BUG();
        }
        return X86EMUL_CONTINUE;
 }
@@ -4394,8 +4390,11 @@ done_prefixes:
 
        ctxt->execute = opcode.u.execute;
 
+       if (unlikely(ctxt->ud) && likely(!(ctxt->d & EmulateOnUD)))
+               return EMULATION_FAILED;
+
        if (unlikely(ctxt->d &
-                    (NotImpl|EmulateOnUD|Stack|Op3264|Sse|Mmx|Intercept|CheckPerm))) {
+                    (NotImpl|Stack|Op3264|Sse|Mmx|Intercept|CheckPerm))) {
                /*
                 * These are copied unconditionally here, and checked unconditionally
                 * in x86_emulate_insn.
@@ -4406,9 +4405,6 @@ done_prefixes:
                if (ctxt->d & NotImpl)
                        return EMULATION_FAILED;
 
-               if (!(ctxt->d & EmulateOnUD) && ctxt->ud)
-                       return EMULATION_FAILED;
-
                if (mode == X86EMUL_MODE_PROT64 && (ctxt->d & Stack))
                        ctxt->op_bytes = 8;
 
@@ -4832,8 +4828,10 @@ writeback:
        ctxt->eip = ctxt->_eip;
 
 done:
-       if (rc == X86EMUL_PROPAGATE_FAULT)
+       if (rc == X86EMUL_PROPAGATE_FAULT) {
+               WARN_ON(ctxt->exception.vector > 0x1f);
                ctxt->have_exception = true;
+       }
        if (rc == X86EMUL_INTERCEPTED)
                return EMULATION_INTERCEPTED;
 
index 08e8a899e005be109bc0fec7f342968179b80cc1..fb919c574e2306926629e565a1ec8a6286fd3b3b 100644 (file)
@@ -112,17 +112,6 @@ static inline int __apic_test_and_clear_vector(int vec, void *bitmap)
 struct static_key_deferred apic_hw_disabled __read_mostly;
 struct static_key_deferred apic_sw_disabled __read_mostly;
 
-static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val)
-{
-       if ((kvm_apic_get_reg(apic, APIC_SPIV) ^ val) & APIC_SPIV_APIC_ENABLED) {
-               if (val & APIC_SPIV_APIC_ENABLED)
-                       static_key_slow_dec_deferred(&apic_sw_disabled);
-               else
-                       static_key_slow_inc(&apic_sw_disabled.key);
-       }
-       apic_set_reg(apic, APIC_SPIV, val);
-}
-
 static inline int apic_enabled(struct kvm_lapic *apic)
 {
        return kvm_apic_sw_enabled(apic) &&     kvm_apic_hw_enabled(apic);
@@ -210,6 +199,20 @@ out:
        kvm_vcpu_request_scan_ioapic(kvm);
 }
 
+static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val)
+{
+       u32 prev = kvm_apic_get_reg(apic, APIC_SPIV);
+
+       apic_set_reg(apic, APIC_SPIV, val);
+       if ((prev ^ val) & APIC_SPIV_APIC_ENABLED) {
+               if (val & APIC_SPIV_APIC_ENABLED) {
+                       static_key_slow_dec_deferred(&apic_sw_disabled);
+                       recalculate_apic_map(apic->vcpu->kvm);
+               } else
+                       static_key_slow_inc(&apic_sw_disabled.key);
+       }
+}
+
 static inline void kvm_apic_set_id(struct kvm_lapic *apic, u8 id)
 {
        apic_set_reg(apic, APIC_ID, id << 24);
@@ -1352,6 +1355,9 @@ void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data)
                return;
 
        hrtimer_cancel(&apic->lapic_timer.timer);
+       /* Inject here so clearing tscdeadline won't override new value */
+       if (apic_has_pending_timer(vcpu))
+               kvm_inject_apic_timer_irqs(vcpu);
        apic->lapic_timer.tscdeadline = data;
        start_apic_timer(apic);
 }
@@ -1639,6 +1645,8 @@ void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu)
 
        if (atomic_read(&apic->lapic_timer.pending) > 0) {
                kvm_apic_local_deliver(apic, APIC_LVTT);
+               if (apic_lvtt_tscdeadline(apic))
+                       apic->lapic_timer.tscdeadline = 0;
                atomic_set(&apic->lapic_timer.pending, 0);
        }
 }
index 3dd6accb64ec130d28db81f501f95aa7726c4002..8e6b7d869d2f7f34432a1f1685606eab7945561a 100644 (file)
@@ -15,6 +15,7 @@
 #include <linux/types.h>
 #include <linux/kvm_host.h>
 #include <linux/perf_event.h>
+#include <asm/perf_event.h>
 #include "x86.h"
 #include "cpuid.h"
 #include "lapic.h"
@@ -463,7 +464,8 @@ void kvm_pmu_cpuid_update(struct kvm_vcpu *vcpu)
 {
        struct kvm_pmu *pmu = &vcpu->arch.pmu;
        struct kvm_cpuid_entry2 *entry;
-       unsigned bitmap_len;
+       union cpuid10_eax eax;
+       union cpuid10_edx edx;
 
        pmu->nr_arch_gp_counters = 0;
        pmu->nr_arch_fixed_counters = 0;
@@ -475,25 +477,27 @@ void kvm_pmu_cpuid_update(struct kvm_vcpu *vcpu)
        entry = kvm_find_cpuid_entry(vcpu, 0xa, 0);
        if (!entry)
                return;
+       eax.full = entry->eax;
+       edx.full = entry->edx;
 
-       pmu->version = entry->eax & 0xff;
+       pmu->version = eax.split.version_id;
        if (!pmu->version)
                return;
 
-       pmu->nr_arch_gp_counters = min((int)(entry->eax >> 8) & 0xff,
-                       INTEL_PMC_MAX_GENERIC);
-       pmu->counter_bitmask[KVM_PMC_GP] =
-               ((u64)1 << ((entry->eax >> 16) & 0xff)) - 1;
-       bitmap_len = (entry->eax >> 24) & 0xff;
-       pmu->available_event_types = ~entry->ebx & ((1ull << bitmap_len) - 1);
+       pmu->nr_arch_gp_counters = min_t(int, eax.split.num_counters,
+                                       INTEL_PMC_MAX_GENERIC);
+       pmu->counter_bitmask[KVM_PMC_GP] = ((u64)1 << eax.split.bit_width) - 1;
+       pmu->available_event_types = ~entry->ebx &
+                                       ((1ull << eax.split.mask_length) - 1);
 
        if (pmu->version == 1) {
                pmu->nr_arch_fixed_counters = 0;
        } else {
-               pmu->nr_arch_fixed_counters = min((int)(entry->edx & 0x1f),
+               pmu->nr_arch_fixed_counters =
+                       min_t(int, edx.split.num_counters_fixed,
                                INTEL_PMC_MAX_FIXED);
                pmu->counter_bitmask[KVM_PMC_FIXED] =
-                       ((u64)1 << ((entry->edx >> 5) & 0xff)) - 1;
+                       ((u64)1 << edx.split.bit_width_fixed) - 1;
        }
 
        pmu->global_ctrl = ((1 << pmu->nr_arch_gp_counters) - 1) |
index ddf742768ecf2f823b7e8462a8ccda2c45225f2f..1703aab84a6d0108672c8d8f020ff46111a99924 100644 (file)
@@ -4305,6 +4305,10 @@ static void svm_handle_external_intr(struct kvm_vcpu *vcpu)
        local_irq_enable();
 }
 
+static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu)
+{
+}
+
 static struct kvm_x86_ops svm_x86_ops = {
        .cpu_has_kvm_support = has_svm,
        .disabled_by_bios = is_disabled,
@@ -4349,7 +4353,6 @@ static struct kvm_x86_ops svm_x86_ops = {
        .cache_reg = svm_cache_reg,
        .get_rflags = svm_get_rflags,
        .set_rflags = svm_set_rflags,
-       .fpu_activate = svm_fpu_activate,
        .fpu_deactivate = svm_fpu_deactivate,
 
        .tlb_flush = svm_flush_tlb,
@@ -4406,6 +4409,8 @@ static struct kvm_x86_ops svm_x86_ops = {
 
        .check_intercept = svm_check_intercept,
        .handle_external_intr = svm_handle_external_intr,
+
+       .sched_in = svm_sched_in,
 };
 
 static int __init svm_init(void)
index e850a7d332be32a0f8c736bc0dced26b213b4b2d..1742dfbd26b30f6f3fb5ad0c7c77f4cb220d7c63 100644 (file)
@@ -848,6 +848,36 @@ TRACE_EVENT(kvm_track_tsc,
                  __print_symbolic(__entry->host_clock, host_clocks))
 );
 
+TRACE_EVENT(kvm_ple_window,
+       TP_PROTO(bool grow, unsigned int vcpu_id, int new, int old),
+       TP_ARGS(grow, vcpu_id, new, old),
+
+       TP_STRUCT__entry(
+               __field(                bool,      grow         )
+               __field(        unsigned int,   vcpu_id         )
+               __field(                 int,       new         )
+               __field(                 int,       old         )
+       ),
+
+       TP_fast_assign(
+               __entry->grow           = grow;
+               __entry->vcpu_id        = vcpu_id;
+               __entry->new            = new;
+               __entry->old            = old;
+       ),
+
+       TP_printk("vcpu %u: ple_window %d (%s %d)",
+                 __entry->vcpu_id,
+                 __entry->new,
+                 __entry->grow ? "grow" : "shrink",
+                 __entry->old)
+);
+
+#define trace_kvm_ple_window_grow(vcpu_id, new, old) \
+       trace_kvm_ple_window(true, vcpu_id, new, old)
+#define trace_kvm_ple_window_shrink(vcpu_id, new, old) \
+       trace_kvm_ple_window(false, vcpu_id, new, old)
+
 #endif /* CONFIG_X86_64 */
 
 #endif /* _TRACE_KVM_H */
index bfe11cf124a1ea26cfa8513ceac83d450e13c7ef..661abc2f7049b136c85d023202a80542e566c258 100644 (file)
@@ -125,14 +125,32 @@ module_param(nested, bool, S_IRUGO);
  * Time is measured based on a counter that runs at the same rate as the TSC,
  * refer SDM volume 3b section 21.6.13 & 22.1.3.
  */
-#define KVM_VMX_DEFAULT_PLE_GAP    128
-#define KVM_VMX_DEFAULT_PLE_WINDOW 4096
+#define KVM_VMX_DEFAULT_PLE_GAP           128
+#define KVM_VMX_DEFAULT_PLE_WINDOW        4096
+#define KVM_VMX_DEFAULT_PLE_WINDOW_GROW   2
+#define KVM_VMX_DEFAULT_PLE_WINDOW_SHRINK 0
+#define KVM_VMX_DEFAULT_PLE_WINDOW_MAX    \
+               INT_MAX / KVM_VMX_DEFAULT_PLE_WINDOW_GROW
+
 static int ple_gap = KVM_VMX_DEFAULT_PLE_GAP;
 module_param(ple_gap, int, S_IRUGO);
 
 static int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW;
 module_param(ple_window, int, S_IRUGO);
 
+/* Default doubles per-vcpu window every exit. */
+static int ple_window_grow = KVM_VMX_DEFAULT_PLE_WINDOW_GROW;
+module_param(ple_window_grow, int, S_IRUGO);
+
+/* Default resets per-vcpu window every exit to ple_window. */
+static int ple_window_shrink = KVM_VMX_DEFAULT_PLE_WINDOW_SHRINK;
+module_param(ple_window_shrink, int, S_IRUGO);
+
+/* Default is to compute the maximum so we can never overflow. */
+static int ple_window_actual_max = KVM_VMX_DEFAULT_PLE_WINDOW_MAX;
+static int ple_window_max        = KVM_VMX_DEFAULT_PLE_WINDOW_MAX;
+module_param(ple_window_max, int, S_IRUGO);
+
 extern const ulong vmx_return;
 
 #define NR_AUTOLOAD_MSRS 8
@@ -484,6 +502,10 @@ struct vcpu_vmx {
 
        /* Support for a guest hypervisor (nested VMX) */
        struct nested_vmx nested;
+
+       /* Dynamic PLE window. */
+       int ple_window;
+       bool ple_window_dirty;
 };
 
 enum segment_cache_field {
@@ -4402,7 +4424,8 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
 
        if (ple_gap) {
                vmcs_write32(PLE_GAP, ple_gap);
-               vmcs_write32(PLE_WINDOW, ple_window);
+               vmx->ple_window = ple_window;
+               vmx->ple_window_dirty = true;
        }
 
        vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, 0);
@@ -5521,17 +5544,18 @@ static u64 ept_rsvd_mask(u64 spte, int level)
        for (i = 51; i > boot_cpu_data.x86_phys_bits; i--)
                mask |= (1ULL << i);
 
-       if (level > 2)
+       if (level == 4)
                /* bits 7:3 reserved */
                mask |= 0xf8;
-       else if (level == 2) {
-               if (spte & (1ULL << 7))
-                       /* 2MB ref, bits 20:12 reserved */
-                       mask |= 0x1ff000;
-               else
-                       /* bits 6:3 reserved */
-                       mask |= 0x78;
-       }
+       else if (spte & (1ULL << 7))
+               /*
+                * 1GB/2MB page, bits 29:12 or 20:12 reserved respectively,
+                * level == 1 if the hypervisor is using the ignored bit 7.
+                */
+               mask |= (PAGE_SIZE << ((level - 1) * 9)) - PAGE_SIZE;
+       else if (level > 1)
+               /* bits 6:3 reserved */
+               mask |= 0x78;
 
        return mask;
 }
@@ -5561,7 +5585,8 @@ static void ept_misconfig_inspect_spte(struct kvm_vcpu *vcpu, u64 spte,
                        WARN_ON(1);
                }
 
-               if (level == 1 || (level == 2 && (spte & (1ULL << 7)))) {
+               /* bits 5:3 are _not_ reserved for large page or leaf page */
+               if ((rsvd_bits & 0x38) == 0) {
                        u64 ept_mem_type = (spte & 0x38) >> 3;
 
                        if (ept_mem_type == 2 || ept_mem_type == 3 ||
@@ -5676,12 +5701,85 @@ out:
        return ret;
 }
 
+static int __grow_ple_window(int val)
+{
+       if (ple_window_grow < 1)
+               return ple_window;
+
+       val = min(val, ple_window_actual_max);
+
+       if (ple_window_grow < ple_window)
+               val *= ple_window_grow;
+       else
+               val += ple_window_grow;
+
+       return val;
+}
+
+static int __shrink_ple_window(int val, int modifier, int minimum)
+{
+       if (modifier < 1)
+               return ple_window;
+
+       if (modifier < ple_window)
+               val /= modifier;
+       else
+               val -= modifier;
+
+       return max(val, minimum);
+}
+
+static void grow_ple_window(struct kvm_vcpu *vcpu)
+{
+       struct vcpu_vmx *vmx = to_vmx(vcpu);
+       int old = vmx->ple_window;
+
+       vmx->ple_window = __grow_ple_window(old);
+
+       if (vmx->ple_window != old)
+               vmx->ple_window_dirty = true;
+
+       trace_kvm_ple_window_grow(vcpu->vcpu_id, vmx->ple_window, old);
+}
+
+static void shrink_ple_window(struct kvm_vcpu *vcpu)
+{
+       struct vcpu_vmx *vmx = to_vmx(vcpu);
+       int old = vmx->ple_window;
+
+       vmx->ple_window = __shrink_ple_window(old,
+                                             ple_window_shrink, ple_window);
+
+       if (vmx->ple_window != old)
+               vmx->ple_window_dirty = true;
+
+       trace_kvm_ple_window_shrink(vcpu->vcpu_id, vmx->ple_window, old);
+}
+
+/*
+ * ple_window_actual_max is computed to be one grow_ple_window() below
+ * ple_window_max. (See __grow_ple_window for the reason.)
+ * This prevents overflows, because ple_window_max is int.
+ * ple_window_max effectively rounded down to a multiple of ple_window_grow in
+ * this process.
+ * ple_window_max is also prevented from setting vmx->ple_window < ple_window.
+ */
+static void update_ple_window_actual_max(void)
+{
+       ple_window_actual_max =
+                       __shrink_ple_window(max(ple_window_max, ple_window),
+                                           ple_window_grow, INT_MIN);
+}
+
 /*
  * Indicate a busy-waiting vcpu in spinlock. We do not enable the PAUSE
  * exiting, so only get here on cpu with PAUSE-Loop-Exiting.
  */
 static int handle_pause(struct kvm_vcpu *vcpu)
 {
+       if (ple_gap)
+               grow_ple_window(vcpu);
+
        skip_emulated_instruction(vcpu);
        kvm_vcpu_on_spin(vcpu);
 
@@ -7387,6 +7485,11 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
        if (vmx->emulation_required)
                return;
 
+       if (vmx->ple_window_dirty) {
+               vmx->ple_window_dirty = false;
+               vmcs_write32(PLE_WINDOW, vmx->ple_window);
+       }
+
        if (vmx->nested.sync_shadow_vmcs) {
                copy_vmcs12_to_shadow(vmx);
                vmx->nested.sync_shadow_vmcs = false;
@@ -8846,6 +8949,12 @@ static int vmx_check_intercept(struct kvm_vcpu *vcpu,
        return X86EMUL_CONTINUE;
 }
 
+void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu)
+{
+       if (ple_gap)
+               shrink_ple_window(vcpu);
+}
+
 static struct kvm_x86_ops vmx_x86_ops = {
        .cpu_has_kvm_support = cpu_has_kvm_support,
        .disabled_by_bios = vmx_disabled_by_bios,
@@ -8890,7 +8999,6 @@ static struct kvm_x86_ops vmx_x86_ops = {
        .cache_reg = vmx_cache_reg,
        .get_rflags = vmx_get_rflags,
        .set_rflags = vmx_set_rflags,
-       .fpu_activate = vmx_fpu_activate,
        .fpu_deactivate = vmx_fpu_deactivate,
 
        .tlb_flush = vmx_flush_tlb,
@@ -8951,6 +9059,8 @@ static struct kvm_x86_ops vmx_x86_ops = {
        .mpx_supported = vmx_mpx_supported,
 
        .check_nested_events = vmx_check_nested_events,
+
+       .sched_in = vmx_sched_in,
 };
 
 static int __init vmx_init(void)
@@ -9065,6 +9175,8 @@ static int __init vmx_init(void)
        } else
                kvm_disable_tdp();
 
+       update_ple_window_actual_max();
+
        return 0;
 
 out7:
@@ -9098,7 +9210,7 @@ static void __exit vmx_exit(void)
        free_page((unsigned long)vmx_vmread_bitmap);
 
 #ifdef CONFIG_KEXEC
-       rcu_assign_pointer(crash_vmclear_loaded_vmcss, NULL);
+       RCU_INIT_POINTER(crash_vmclear_loaded_vmcss, NULL);
        synchronize_rcu();
 #endif
 
index 8f1e22d3b286ccf03c90dc3bc28f120fe27a244b..c10408ef9ab1e3dda92f8b1ae008cea24c32d317 100644 (file)
@@ -1726,6 +1726,7 @@ static bool valid_mtrr_type(unsigned t)
 static bool mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 {
        int i;
+       u64 mask = 0;
 
        if (!msr_mtrr_valid(msr))
                return false;
@@ -1747,7 +1748,24 @@ static bool mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data)
        }
 
        /* variable MTRRs */
-       return valid_mtrr_type(data & 0xff);
+       WARN_ON(!(msr >= 0x200 && msr < 0x200 + 2 * KVM_NR_VAR_MTRR));
+
+       for (i = 63; i > boot_cpu_data.x86_phys_bits; i--)
+               mask |= (1ULL << i);
+       if ((msr & 1) == 0) {
+               /* MTRR base */
+               if (!valid_mtrr_type(data & 0xff))
+                       return false;
+               mask |= 0xf00;
+       } else
+               /* MTRR mask */
+               mask |= 0x7ff;
+       if (data & mask) {
+               kvm_inject_gp(vcpu, 0);
+               return false;
+       }
+
+       return true;
 }
 
 static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
@@ -2419,7 +2437,13 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
        case MSR_K7_HWCR:
        case MSR_VM_HSAVE_PA:
        case MSR_K7_EVNTSEL0:
+       case MSR_K7_EVNTSEL1:
+       case MSR_K7_EVNTSEL2:
+       case MSR_K7_EVNTSEL3:
        case MSR_K7_PERFCTR0:
+       case MSR_K7_PERFCTR1:
+       case MSR_K7_PERFCTR2:
+       case MSR_K7_PERFCTR3:
        case MSR_K8_INT_PENDING_MSG:
        case MSR_AMD64_NB_CFG:
        case MSR_FAM10H_MMIO_CONF_BASE:
@@ -5224,6 +5248,7 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
 
                ctxt->interruptibility = 0;
                ctxt->have_exception = false;
+               ctxt->exception.vector = -1;
                ctxt->perm_ok = false;
 
                ctxt->ud = emulation_type & EMULTYPE_TRAP_UD;
@@ -7146,6 +7171,11 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
                static_key_slow_dec(&kvm_no_apic_vcpu);
 }
 
+void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
+{
+       kvm_x86_ops->sched_in(vcpu, cpu);
+}
+
 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 {
        if (type)
@@ -7643,3 +7673,4 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intercepts);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_write_tsc_offset);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ple_window);
index a4c33b34fe3f0c7f3366a320c6df4534001c24b7..ebd723676633a4adeb39bff3c46189308a7e1ac1 100644 (file)
@@ -624,6 +624,8 @@ void kvm_arch_exit(void);
 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu);
 void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu);
 
+void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu);
+
 void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu);
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu);
index bf06577fea51c22ab944edb9560e56f01aae2f94..5819a2708d7edd5823d9e5885a6b7c3796b387ad 100644 (file)
@@ -526,8 +526,10 @@ static int assign_guest_irq(struct kvm *kvm,
                dev->irq_requested_type |= guest_irq_type;
                if (dev->ack_notifier.gsi != -1)
                        kvm_register_irq_ack_notifier(kvm, &dev->ack_notifier);
-       } else
+       } else {
                kvm_free_irq_source_id(kvm, dev->irq_source_id);
+               dev->irq_source_id = -1;
+       }
 
        return r;
 }
index 0df7d4b34dfec96345d359bf08562309d8607f40..714b949323120aee855dd7e57db549cac31e8183 100644 (file)
@@ -61,6 +61,14 @@ static pfn_t kvm_pin_pages(struct kvm_memory_slot *slot, gfn_t gfn,
        return pfn;
 }
 
+static void kvm_unpin_pages(struct kvm *kvm, pfn_t pfn, unsigned long npages)
+{
+       unsigned long i;
+
+       for (i = 0; i < npages; ++i)
+               kvm_release_pfn_clean(pfn + i);
+}
+
 int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
 {
        gfn_t gfn, end_gfn;
@@ -123,6 +131,7 @@ int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
                if (r) {
                        printk(KERN_ERR "kvm_iommu_map_address:"
                               "iommu failed to map pfn=%llx\n", pfn);
+                       kvm_unpin_pages(kvm, pfn, page_size);
                        goto unmap_pages;
                }
 
@@ -134,7 +143,7 @@ int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
        return 0;
 
 unmap_pages:
-       kvm_iommu_put_pages(kvm, slot->base_gfn, gfn);
+       kvm_iommu_put_pages(kvm, slot->base_gfn, gfn - slot->base_gfn);
        return r;
 }
 
@@ -266,14 +275,6 @@ out_unlock:
        return r;
 }
 
-static void kvm_unpin_pages(struct kvm *kvm, pfn_t pfn, unsigned long npages)
-{
-       unsigned long i;
-
-       for (i = 0; i < npages; ++i)
-               kvm_release_pfn_clean(pfn + i);
-}
-
 static void kvm_iommu_put_pages(struct kvm *kvm,
                                gfn_t base_gfn, unsigned long npages)
 {
index 33712fb26eb11caa0f14bbf3b21a9e459c18252a..5a0817ee996ec058e1164dd77fb7404c1d6740a1 100644 (file)
@@ -129,7 +129,8 @@ int vcpu_load(struct kvm_vcpu *vcpu)
                struct pid *oldpid = vcpu->pid;
                struct pid *newpid = get_task_pid(current, PIDTYPE_PID);
                rcu_assign_pointer(vcpu->pid, newpid);
-               synchronize_rcu();
+               if (oldpid)
+                       synchronize_rcu();
                put_pid(oldpid);
        }
        cpu = get_cpu();
@@ -3123,6 +3124,8 @@ static void kvm_sched_in(struct preempt_notifier *pn, int cpu)
        if (vcpu->preempted)
                vcpu->preempted = false;
 
+       kvm_arch_sched_in(vcpu, cpu);
+
        kvm_arch_vcpu_load(vcpu, cpu);
 }