]> git.karo-electronics.de Git - karo-tx-linux.git/commitdiff
Merge branch 'kvm-ppc-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/paulus...
authorPaolo Bonzini <pbonzini@redhat.com>
Wed, 26 Jul 2017 13:19:01 +0000 (15:19 +0200)
committerPaolo Bonzini <pbonzini@redhat.com>
Wed, 26 Jul 2017 17:04:56 +0000 (19:04 +0200)
Two commits which fix host crashes.

Signed-off-by: Paolo BOnzini <pbonzini@redhat.com>
14 files changed:
arch/s390/kvm/kvm-s390.c
arch/x86/entry/entry_64.S
arch/x86/include/asm/entry_arch.h
arch/x86/include/asm/hardirq.h
arch/x86/include/asm/hw_irq.h
arch/x86/include/asm/irq_vectors.h
arch/x86/kernel/irq.c
arch/x86/kernel/irqinit.c
arch/x86/kvm/lapic.c
arch/x86/kvm/vmx.c
arch/x86/kvm/x86.c
include/linux/kvm_host.h
tools/kvm/kvm_stat/kvm_stat
virt/kvm/kvm_main.c

index 3f2884e99ed4ce461cdb6f08148968880e90747b..af09d3437631d348dca2f1a6699c34ed49c624ed 100644 (file)
@@ -1324,7 +1324,7 @@ static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
 {
        uint8_t *keys;
        uint64_t hva;
-       int i, r = 0;
+       int srcu_idx, i, r = 0;
 
        if (args->flags != 0)
                return -EINVAL;
@@ -1342,6 +1342,7 @@ static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
                return -ENOMEM;
 
        down_read(&current->mm->mmap_sem);
+       srcu_idx = srcu_read_lock(&kvm->srcu);
        for (i = 0; i < args->count; i++) {
                hva = gfn_to_hva(kvm, args->start_gfn + i);
                if (kvm_is_error_hva(hva)) {
@@ -1353,6 +1354,7 @@ static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
                if (r)
                        break;
        }
+       srcu_read_unlock(&kvm->srcu, srcu_idx);
        up_read(&current->mm->mmap_sem);
 
        if (!r) {
@@ -1370,7 +1372,7 @@ static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
 {
        uint8_t *keys;
        uint64_t hva;
-       int i, r = 0;
+       int srcu_idx, i, r = 0;
 
        if (args->flags != 0)
                return -EINVAL;
@@ -1396,6 +1398,7 @@ static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
                goto out;
 
        down_read(&current->mm->mmap_sem);
+       srcu_idx = srcu_read_lock(&kvm->srcu);
        for (i = 0; i < args->count; i++) {
                hva = gfn_to_hva(kvm, args->start_gfn + i);
                if (kvm_is_error_hva(hva)) {
@@ -1413,6 +1416,7 @@ static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
                if (r)
                        break;
        }
+       srcu_read_unlock(&kvm->srcu, srcu_idx);
        up_read(&current->mm->mmap_sem);
 out:
        kvfree(keys);
index a9a8027a6c0eaa748541282448f41b9eec834d6c..d271fb79248f3569c6a0a934f707d91f398562b8 100644 (file)
@@ -705,6 +705,7 @@ apicinterrupt X86_PLATFORM_IPI_VECTOR               x86_platform_ipi                smp_x86_platform_ipi
 #ifdef CONFIG_HAVE_KVM
 apicinterrupt3 POSTED_INTR_VECTOR              kvm_posted_intr_ipi             smp_kvm_posted_intr_ipi
 apicinterrupt3 POSTED_INTR_WAKEUP_VECTOR       kvm_posted_intr_wakeup_ipi      smp_kvm_posted_intr_wakeup_ipi
+apicinterrupt3 POSTED_INTR_NESTED_VECTOR       kvm_posted_intr_nested_ipi      smp_kvm_posted_intr_nested_ipi
 #endif
 
 #ifdef CONFIG_X86_MCE_THRESHOLD
index df002992d8fd3dffa9d9d0913e823ae52f9e2256..07b06955a05df90575d2d66ffec0e0b44793a587 100644 (file)
@@ -25,6 +25,8 @@ BUILD_INTERRUPT3(kvm_posted_intr_ipi, POSTED_INTR_VECTOR,
                 smp_kvm_posted_intr_ipi)
 BUILD_INTERRUPT3(kvm_posted_intr_wakeup_ipi, POSTED_INTR_WAKEUP_VECTOR,
                 smp_kvm_posted_intr_wakeup_ipi)
+BUILD_INTERRUPT3(kvm_posted_intr_nested_ipi, POSTED_INTR_NESTED_VECTOR,
+                smp_kvm_posted_intr_nested_ipi)
 #endif
 
 /*
index 9b76cd331990159dcdd042e10f90946f7440adba..ad1ed531febcbc3a15992be0e367f1cb4d43978e 100644 (file)
@@ -15,6 +15,7 @@ typedef struct {
 #ifdef CONFIG_HAVE_KVM
        unsigned int kvm_posted_intr_ipis;
        unsigned int kvm_posted_intr_wakeup_ipis;
+       unsigned int kvm_posted_intr_nested_ipis;
 #endif
        unsigned int x86_platform_ipis; /* arch dependent */
        unsigned int apic_perf_irqs;
index b90e1053049bdd17ad36989315db8ac1b3ccc973..d6dbafbd420737a5ea5363c700940acb22a8c358 100644 (file)
@@ -30,6 +30,7 @@ extern asmlinkage void apic_timer_interrupt(void);
 extern asmlinkage void x86_platform_ipi(void);
 extern asmlinkage void kvm_posted_intr_ipi(void);
 extern asmlinkage void kvm_posted_intr_wakeup_ipi(void);
+extern asmlinkage void kvm_posted_intr_nested_ipi(void);
 extern asmlinkage void error_interrupt(void);
 extern asmlinkage void irq_work_interrupt(void);
 
@@ -62,6 +63,7 @@ extern void trace_call_function_single_interrupt(void);
 #define trace_reboot_interrupt  reboot_interrupt
 #define trace_kvm_posted_intr_ipi kvm_posted_intr_ipi
 #define trace_kvm_posted_intr_wakeup_ipi kvm_posted_intr_wakeup_ipi
+#define trace_kvm_posted_intr_nested_ipi kvm_posted_intr_nested_ipi
 #endif /* CONFIG_TRACING */
 
 #ifdef CONFIG_X86_LOCAL_APIC
index 6ca9fd6234e13b83de1bd495d0c4f7231ba439d4..aaf8d28b5d008969786ee60522d0d2f2a2fae126 100644 (file)
@@ -83,7 +83,6 @@
  */
 #define X86_PLATFORM_IPI_VECTOR                0xf7
 
-#define POSTED_INTR_WAKEUP_VECTOR      0xf1
 /*
  * IRQ work vector:
  */
@@ -98,6 +97,8 @@
 /* Vector for KVM to deliver posted interrupt IPI */
 #ifdef CONFIG_HAVE_KVM
 #define POSTED_INTR_VECTOR             0xf2
+#define POSTED_INTR_WAKEUP_VECTOR      0xf1
+#define POSTED_INTR_NESTED_VECTOR      0xf0
 #endif
 
 /*
index 4aa03c5a14c905e9c1a1db283d61ac56372041c9..4ed0aba8dbc83b6ef83aed558ae4c459009d9146 100644 (file)
@@ -155,6 +155,12 @@ int arch_show_interrupts(struct seq_file *p, int prec)
                seq_printf(p, "%10u ", irq_stats(j)->kvm_posted_intr_ipis);
        seq_puts(p, "  Posted-interrupt notification event\n");
 
+       seq_printf(p, "%*s: ", prec, "NPI");
+       for_each_online_cpu(j)
+               seq_printf(p, "%10u ",
+                          irq_stats(j)->kvm_posted_intr_nested_ipis);
+       seq_puts(p, "  Nested posted-interrupt event\n");
+
        seq_printf(p, "%*s: ", prec, "PIW");
        for_each_online_cpu(j)
                seq_printf(p, "%10u ",
@@ -313,6 +319,19 @@ __visible void smp_kvm_posted_intr_wakeup_ipi(struct pt_regs *regs)
        exiting_irq();
        set_irq_regs(old_regs);
 }
+
+/*
+ * Handler for POSTED_INTERRUPT_NESTED_VECTOR.
+ */
+__visible void smp_kvm_posted_intr_nested_ipi(struct pt_regs *regs)
+{
+       struct pt_regs *old_regs = set_irq_regs(regs);
+
+       entering_ack_irq();
+       inc_irq_stat(kvm_posted_intr_nested_ipis);
+       exiting_irq();
+       set_irq_regs(old_regs);
+}
 #endif
 
 __visible void __irq_entry smp_trace_x86_platform_ipi(struct pt_regs *regs)
index 7468c69875477e876dafd5209f0cd83c6939bce9..c7fd18526c3e3087dee25af3ab0c3f3ff7dc616b 100644 (file)
@@ -150,6 +150,8 @@ static void __init apic_intr_init(void)
        alloc_intr_gate(POSTED_INTR_VECTOR, kvm_posted_intr_ipi);
        /* IPI for KVM to deliver interrupt to wake up tasks */
        alloc_intr_gate(POSTED_INTR_WAKEUP_VECTOR, kvm_posted_intr_wakeup_ipi);
+       /* IPI for KVM to deliver nested posted interrupt */
+       alloc_intr_gate(POSTED_INTR_NESTED_VECTOR, kvm_posted_intr_nested_ipi);
 #endif
 
        /* IPI vectors for APIC spurious and error interrupts */
index 2819d4c123eb89c0e00e9785239f19e2b79d6f73..589dcc117086ffdfda31920cfdddfc462bd468c9 100644 (file)
@@ -1495,11 +1495,10 @@ EXPORT_SYMBOL_GPL(kvm_lapic_hv_timer_in_use);
 
 static void cancel_hv_timer(struct kvm_lapic *apic)
 {
+       WARN_ON(preemptible());
        WARN_ON(!apic->lapic_timer.hv_timer_in_use);
-       preempt_disable();
        kvm_x86_ops->cancel_hv_timer(apic->vcpu);
        apic->lapic_timer.hv_timer_in_use = false;
-       preempt_enable();
 }
 
 static bool start_hv_timer(struct kvm_lapic *apic)
@@ -1507,6 +1506,7 @@ static bool start_hv_timer(struct kvm_lapic *apic)
        struct kvm_timer *ktimer = &apic->lapic_timer;
        int r;
 
+       WARN_ON(preemptible());
        if (!kvm_x86_ops->set_hv_timer)
                return false;
 
@@ -1538,6 +1538,8 @@ static bool start_hv_timer(struct kvm_lapic *apic)
 static void start_sw_timer(struct kvm_lapic *apic)
 {
        struct kvm_timer *ktimer = &apic->lapic_timer;
+
+       WARN_ON(preemptible());
        if (apic->lapic_timer.hv_timer_in_use)
                cancel_hv_timer(apic);
        if (!apic_lvtt_period(apic) && atomic_read(&ktimer->pending))
@@ -1552,15 +1554,20 @@ static void start_sw_timer(struct kvm_lapic *apic)
 
 static void restart_apic_timer(struct kvm_lapic *apic)
 {
+       preempt_disable();
        if (!start_hv_timer(apic))
                start_sw_timer(apic);
+       preempt_enable();
 }
 
 void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu)
 {
        struct kvm_lapic *apic = vcpu->arch.apic;
 
-       WARN_ON(!apic->lapic_timer.hv_timer_in_use);
+       preempt_disable();
+       /* If the preempt notifier has already run, it also called apic_timer_expired */
+       if (!apic->lapic_timer.hv_timer_in_use)
+               goto out;
        WARN_ON(swait_active(&vcpu->wq));
        cancel_hv_timer(apic);
        apic_timer_expired(apic);
@@ -1569,6 +1576,8 @@ void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu)
                advance_periodic_target_expiration(apic);
                restart_apic_timer(apic);
        }
+out:
+       preempt_enable();
 }
 EXPORT_SYMBOL_GPL(kvm_lapic_expired_hv_timer);
 
@@ -1582,9 +1591,11 @@ void kvm_lapic_switch_to_sw_timer(struct kvm_vcpu *vcpu)
 {
        struct kvm_lapic *apic = vcpu->arch.apic;
 
+       preempt_disable();
        /* Possibly the TSC deadline timer is not enabled yet */
        if (apic->lapic_timer.hv_timer_in_use)
                start_sw_timer(apic);
+       preempt_enable();
 }
 EXPORT_SYMBOL_GPL(kvm_lapic_switch_to_sw_timer);
 
index 29fd8af5c347a6c642e131458fcfa254b6d7d854..39a6222bf9687fcc47dea3ee564eaa92704026e9 100644 (file)
@@ -563,7 +563,6 @@ struct vcpu_vmx {
        struct kvm_vcpu       vcpu;
        unsigned long         host_rsp;
        u8                    fail;
-       bool                  nmi_known_unmasked;
        u32                   exit_intr_info;
        u32                   idt_vectoring_info;
        ulong                 rflags;
@@ -4988,9 +4987,12 @@ static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu)
        }
 }
 
-static inline bool kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu)
+static inline bool kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu,
+                                                    bool nested)
 {
 #ifdef CONFIG_SMP
+       int pi_vec = nested ? POSTED_INTR_NESTED_VECTOR : POSTED_INTR_VECTOR;
+
        if (vcpu->mode == IN_GUEST_MODE) {
                struct vcpu_vmx *vmx = to_vmx(vcpu);
 
@@ -5008,8 +5010,7 @@ static inline bool kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu)
                 */
                WARN_ON_ONCE(pi_test_sn(&vmx->pi_desc));
 
-               apic->send_IPI_mask(get_cpu_mask(vcpu->cpu),
-                               POSTED_INTR_VECTOR);
+               apic->send_IPI_mask(get_cpu_mask(vcpu->cpu), pi_vec);
                return true;
        }
 #endif
@@ -5024,7 +5025,7 @@ static int vmx_deliver_nested_posted_interrupt(struct kvm_vcpu *vcpu,
        if (is_guest_mode(vcpu) &&
            vector == vmx->nested.posted_intr_nv) {
                /* the PIR and ON have been set by L1. */
-               kvm_vcpu_trigger_posted_interrupt(vcpu);
+               kvm_vcpu_trigger_posted_interrupt(vcpu, true);
                /*
                 * If a posted intr is not recognized by hardware,
                 * we will accomplish it in the next vmentry.
@@ -5058,7 +5059,7 @@ static void vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector)
        if (pi_test_and_set_on(&vmx->pi_desc))
                return;
 
-       if (!kvm_vcpu_trigger_posted_interrupt(vcpu))
+       if (!kvm_vcpu_trigger_posted_interrupt(vcpu, false))
                kvm_vcpu_kick(vcpu);
 }
 
@@ -10041,6 +10042,8 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
                             vmcs12->vm_entry_instruction_len);
                vmcs_write32(GUEST_INTERRUPTIBILITY_INFO,
                             vmcs12->guest_interruptibility_info);
+               vmx->loaded_vmcs->nmi_known_unmasked =
+                       !(vmcs12->guest_interruptibility_info & GUEST_INTR_STATE_NMI);
        } else {
                vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0);
        }
@@ -10065,13 +10068,9 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
 
        /* Posted interrupts setting is only taken from vmcs12.  */
        if (nested_cpu_has_posted_intr(vmcs12)) {
-               /*
-                * Note that we use L0's vector here and in
-                * vmx_deliver_nested_posted_interrupt.
-                */
                vmx->nested.posted_intr_nv = vmcs12->posted_intr_nv;
                vmx->nested.pi_pending = false;
-               vmcs_write16(POSTED_INTR_NV, POSTED_INTR_VECTOR);
+               vmcs_write16(POSTED_INTR_NV, POSTED_INTR_NESTED_VECTOR);
        } else {
                exec_control &= ~PIN_BASED_POSTED_INTR;
        }
@@ -10942,7 +10941,9 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
                 */
                vmx_flush_tlb(vcpu);
        }
-
+       /* Restore posted intr vector. */
+       if (nested_cpu_has_posted_intr(vmcs12))
+               vmcs_write16(POSTED_INTR_NV, POSTED_INTR_VECTOR);
 
        vmcs_write32(GUEST_SYSENTER_CS, vmcs12->host_ia32_sysenter_cs);
        vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->host_ia32_sysenter_esp);
index 82a63c59f77b52fb7a9fa6ce49513c77ce7c21fa..6c97c82814c45a2412da6e28b4393606da450bb3 100644 (file)
@@ -597,8 +597,8 @@ bool pdptrs_changed(struct kvm_vcpu *vcpu)
                      (unsigned long *)&vcpu->arch.regs_avail))
                return true;
 
-       gfn = (kvm_read_cr3(vcpu) & ~31ul) >> PAGE_SHIFT;
-       offset = (kvm_read_cr3(vcpu) & ~31ul) & (PAGE_SIZE - 1);
+       gfn = (kvm_read_cr3(vcpu) & 0xffffffe0ul) >> PAGE_SHIFT;
+       offset = (kvm_read_cr3(vcpu) & 0xffffffe0ul) & (PAGE_SIZE - 1);
        r = kvm_read_nested_guest_page(vcpu, gfn, pdpte, offset, sizeof(pdpte),
                                       PFERR_USER_MASK | PFERR_WRITE_MASK);
        if (r < 0)
index 648b34cabb38214e6bb957aeecbcf61e03a26d4c..890b706d194348c99517ddf23266de860a395f2e 100644 (file)
@@ -445,6 +445,7 @@ struct kvm {
        struct kvm_stat_data **debugfs_stat_data;
        struct srcu_struct srcu;
        struct srcu_struct irq_srcu;
+       pid_t userspace_pid;
 };
 
 #define kvm_err(fmt, ...) \
index dd8f00cfb8b482b71b60529fac0618363dc8e540..32283d88701afa33a0e53d730c8e4e3bd7ec2861 100755 (executable)
@@ -474,7 +474,7 @@ class Provider(object):
     @staticmethod
     def is_field_wanted(fields_filter, field):
         """Indicate whether field is valid according to fields_filter."""
-        if not fields_filter:
+        if not fields_filter or fields_filter == "help":
             return True
         return re.match(fields_filter, field) is not None
 
@@ -1413,8 +1413,8 @@ performance.
 
 Requirements:
 - Access to:
-    /sys/kernel/debug/kvm
-    /sys/kernel/debug/trace/events/*
+    %s
+    %s/events/*
     /proc/pid/task
 - /proc/sys/kernel/perf_event_paranoid < 1 if user has no
   CAP_SYS_ADMIN and perf events are used.
@@ -1434,7 +1434,7 @@ Interactive Commands:
    s     set update interval
    x     toggle reporting of stats for individual child trace events
 Press any other key to refresh statistics immediately.
-"""
+""" % (PATH_DEBUGFS_KVM, PATH_DEBUGFS_TRACING)
 
     class PlainHelpFormatter(optparse.IndentedHelpFormatter):
         def format_description(self, description):
@@ -1496,7 +1496,8 @@ Press any other key to refresh statistics immediately.
                          action='store',
                          default=DEFAULT_REGEX,
                          dest='fields',
-                         help='fields to display (regex)',
+                         help='''fields to display (regex)
+                                 "-f help" for a list of available events''',
                          )
     optparser.add_option('-p', '--pid',
                          action='store',
@@ -1559,6 +1560,17 @@ def main():
 
     stats = Stats(options)
 
+    if options.fields == "help":
+        event_list = "\n"
+        s = stats.get()
+        for key in s.keys():
+            if key.find('(') != -1:
+                key = key[0:key.find('(')]
+            if event_list.find('\n' + key + '\n') == -1:
+                event_list += key + '\n'
+        sys.stdout.write(event_list)
+        return ""
+
     if options.log:
         log(stats)
     elif not options.once:
index 82987d457b8bb4e7ec4c1159e37857fd85a11c18..f3f74271f1a9f11d2670f8377affc2c125d5181f 100644 (file)
@@ -3883,7 +3883,6 @@ static const struct file_operations *stat_fops[] = {
 static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm)
 {
        struct kobj_uevent_env *env;
-       char *tmp, *pathbuf = NULL;
        unsigned long long created, active;
 
        if (!kvm_dev.this_device || !kvm)
@@ -3907,38 +3906,28 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm)
        add_uevent_var(env, "CREATED=%llu", created);
        add_uevent_var(env, "COUNT=%llu", active);
 
-       if (type == KVM_EVENT_CREATE_VM)
+       if (type == KVM_EVENT_CREATE_VM) {
                add_uevent_var(env, "EVENT=create");
-       else if (type == KVM_EVENT_DESTROY_VM)
+               kvm->userspace_pid = task_pid_nr(current);
+       } else if (type == KVM_EVENT_DESTROY_VM) {
                add_uevent_var(env, "EVENT=destroy");
+       }
+       add_uevent_var(env, "PID=%d", kvm->userspace_pid);
 
        if (kvm->debugfs_dentry) {
-               char p[ITOA_MAX_LEN];
-
-               snprintf(p, sizeof(p), "%s", kvm->debugfs_dentry->d_name.name);
-               tmp = strchrnul(p + 1, '-');
-               *tmp = '\0';
-               add_uevent_var(env, "PID=%s", p);
-               pathbuf = kmalloc(PATH_MAX, GFP_KERNEL);
-               if (pathbuf) {
-                       /* sizeof counts the final '\0' */
-                       int len = sizeof("STATS_PATH=") - 1;
-                       const char *pvar = "STATS_PATH=";
-
-                       tmp = dentry_path_raw(kvm->debugfs_dentry,
-                                             pathbuf + len,
-                                             PATH_MAX - len);
-                       if (!IS_ERR(tmp)) {
-                               memcpy(tmp - len, pvar, len);
-                               env->envp[env->envp_idx++] = tmp - len;
-                       }
+               char *tmp, *p = kmalloc(PATH_MAX, GFP_KERNEL);
+
+               if (p) {
+                       tmp = dentry_path_raw(kvm->debugfs_dentry, p, PATH_MAX);
+                       if (!IS_ERR(tmp))
+                               add_uevent_var(env, "STATS_PATH=%s", tmp);
+                       kfree(p);
                }
        }
        /* no need for checks, since we are adding at most only 5 keys */
        env->envp[env->envp_idx++] = NULL;
        kobject_uevent_env(&kvm_dev.this_device->kobj, KOBJ_CHANGE, env->envp);
        kfree(env);
-       kfree(pathbuf);
 }
 
 static int kvm_init_debug(void)