]> git.karo-electronics.de Git - karo-tx-linux.git/commitdiff
Merge remote-tracking branch 'kvm/linux-next'
authorThierry Reding <treding@nvidia.com>
Thu, 24 Oct 2013 12:58:35 +0000 (14:58 +0200)
committerThierry Reding <treding@nvidia.com>
Thu, 24 Oct 2013 12:58:35 +0000 (14:58 +0200)
Conflicts:
arch/arm/kvm/reset.c

42 files changed:
Documentation/virtual/kvm/api.txt
Documentation/virtual/kvm/locking.txt
arch/arm/include/asm/kvm_arm.h
arch/arm/include/asm/kvm_asm.h
arch/arm/include/asm/kvm_host.h
arch/arm/include/uapi/asm/kvm.h
arch/arm/kvm/Makefile
arch/arm/kvm/arm.c
arch/arm/kvm/coproc.c
arch/arm/kvm/coproc_a15.c
arch/arm/kvm/coproc_a7.c [new file with mode: 0644]
arch/arm/kvm/emulate.c
arch/arm/kvm/guest.c
arch/arm/kvm/reset.c
arch/arm64/include/asm/kvm_host.h
arch/arm64/kvm/guest.c
arch/ia64/include/asm/kvm_host.h
arch/mips/include/asm/kvm_host.h
arch/powerpc/include/asm/kvm_host.h
arch/s390/include/asm/kvm_host.h
arch/s390/kvm/diag.c
arch/s390/kvm/gaccess.h
arch/s390/kvm/intercept.c
arch/s390/kvm/interrupt.c
arch/s390/kvm/kvm-s390.c
arch/s390/kvm/kvm-s390.h
arch/s390/kvm/priv.c
arch/x86/include/asm/kvm_host.h
arch/x86/include/uapi/asm/msr-index.h
arch/x86/kvm/cpuid.c
arch/x86/kvm/mmu.c
arch/x86/kvm/mmu.h
arch/x86/kvm/svm.c
arch/x86/kvm/vmx.c
arch/x86/kvm/x86.c
arch/x86/kvm/x86.h
include/linux/kvm_host.h
include/trace/events/kvm.h
include/uapi/linux/kvm.h
virt/kvm/async_pf.c
virt/kvm/iommu.c
virt/kvm/kvm_main.c

index 858aecf21db2c9d449d984af406a838ed5143ed4..a89a5ee0b9405db4ab99a5e71f678f22ce1886e3 100644 (file)
@@ -2304,7 +2304,31 @@ Possible features:
          Depends on KVM_CAP_ARM_EL1_32BIT (arm64 only).
 
 
-4.83 KVM_GET_REG_LIST
+4.83 KVM_ARM_PREFERRED_TARGET
+
+Capability: basic
+Architectures: arm, arm64
+Type: vm ioctl
+Parameters: struct struct kvm_vcpu_init (out)
+Returns: 0 on success; -1 on error
+Errors:
+  ENODEV:    no preferred target available for the host
+
+This queries KVM for preferred CPU target type which can be emulated
+by KVM on underlying host.
+
+The ioctl returns struct kvm_vcpu_init instance containing information
+about preferred CPU target type and recommended features for it.  The
+kvm_vcpu_init->features bitmap returned will have feature bits set if
+the preferred target recommends setting these features, but this is
+not mandatory.
+
+The information returned by this ioctl can be used to prepare an instance
+of struct kvm_vcpu_init for KVM_ARM_VCPU_INIT ioctl which will result in
+in VCPU matching underlying host.
+
+
+4.84 KVM_GET_REG_LIST
 
 Capability: basic
 Architectures: arm, arm64
@@ -2323,8 +2347,7 @@ struct kvm_reg_list {
 This ioctl returns the guest registers that are supported for the
 KVM_GET_ONE_REG/KVM_SET_ONE_REG calls.
 
-
-4.84 KVM_ARM_SET_DEVICE_ADDR
+4.85 KVM_ARM_SET_DEVICE_ADDR
 
 Capability: KVM_CAP_ARM_SET_DEVICE_ADDR
 Architectures: arm, arm64
@@ -2362,7 +2385,7 @@ must be called after calling KVM_CREATE_IRQCHIP, but before calling
 KVM_RUN on any of the VCPUs.  Calling this ioctl twice for any of the
 base addresses will return -EEXIST.
 
-4.85 KVM_PPC_RTAS_DEFINE_TOKEN
+4.86 KVM_PPC_RTAS_DEFINE_TOKEN
 
 Capability: KVM_CAP_PPC_RTAS
 Architectures: ppc
index 41b7ac9884b5ebdeaba602077bebb11efebcc648..f8869410d40ce8ba4eac30d3193f28aedb8383a6 100644 (file)
@@ -132,10 +132,14 @@ See the comments in spte_has_volatile_bits() and mmu_spte_update().
 ------------
 
 Name:          kvm_lock
-Type:          raw_spinlock
+Type:          spinlock_t
 Arch:          any
 Protects:      - vm_list
-               - hardware virtualization enable/disable
+
+Name:          kvm_count_lock
+Type:          raw_spinlock_t
+Arch:          any
+Protects:      - hardware virtualization enable/disable
 Comment:       'raw' because hardware enabling/disabling must be atomic /wrt
                migration.
 
@@ -151,3 +155,14 @@ Type:              spinlock_t
 Arch:          any
 Protects:      -shadow page/shadow tlb entry
 Comment:       it is a spinlock since it is used in mmu notifier.
+
+Name:          kvm->srcu
+Type:          srcu lock
+Arch:          any
+Protects:      - kvm->memslots
+               - kvm->buses
+Comment:       The srcu read lock must be held while accessing memslots (e.g.
+               when using gfn_to_* functions) and while accessing in-kernel
+               MMIO/PIO address->device structure mapping (kvm->buses).
+               The srcu index can be stored in kvm_vcpu->srcu_idx per vcpu
+               if it is needed by multiple functions.
index 64e96960de297c850c480f1df3be1dc331ee891a..d556f03bca17491eeecf26a976641389cb49feb2 100644 (file)
 #define TTBCR_IRGN1    (3 << 24)
 #define TTBCR_EPD1     (1 << 23)
 #define TTBCR_A1       (1 << 22)
-#define TTBCR_T1SZ     (3 << 16)
+#define TTBCR_T1SZ     (7 << 16)
 #define TTBCR_SH0      (3 << 12)
 #define TTBCR_ORGN0    (3 << 10)
 #define TTBCR_IRGN0    (3 << 8)
 #define TTBCR_EPD0     (1 << 7)
-#define TTBCR_T0SZ     3
+#define TTBCR_T0SZ     (7 << 0)
 #define HTCR_MASK      (TTBCR_T0SZ | TTBCR_IRGN0 | TTBCR_ORGN0 | TTBCR_SH0)
 
 /* Hyp System Trap Register */
index a2f43ddcc3004aeaa342666c4973277f076390cd..661da11f76f4a5eca006850396c7760034fab48f 100644 (file)
@@ -39,7 +39,7 @@
 #define c6_IFAR                17      /* Instruction Fault Address Register */
 #define c7_PAR         18      /* Physical Address Register */
 #define c7_PAR_high    19      /* PAR top 32 bits */
-#define c9_L2CTLR      20      /* Cortex A15 L2 Control Register */
+#define c9_L2CTLR      20      /* Cortex A15/A7 L2 Control Register */
 #define c10_PRRR       21      /* Primary Region Remap Register */
 #define c10_NMRR       22      /* Normal Memory Remap Register */
 #define c12_VBAR       23      /* Vector Base Address Register */
index 7d22517d8071192e467a6653a177ab08742d2650..8a6f6db14ee412ae06dc18bd4027d9652321fc0b 100644 (file)
 
 #define KVM_VCPU_MAX_FEATURES 1
 
-/* We don't currently support large pages. */
-#define KVM_HPAGE_GFN_SHIFT(x) 0
-#define KVM_NR_PAGE_SIZES      1
-#define KVM_PAGES_PER_HPAGE(x) (1UL<<31)
-
 #include <kvm/arm_vgic.h>
 
 struct kvm_vcpu;
@@ -154,6 +149,7 @@ struct kvm_vcpu_stat {
 struct kvm_vcpu_init;
 int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
                        const struct kvm_vcpu_init *init);
+int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init);
 unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
 int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
 struct kvm_one_reg;
index c1ee007523d78dd25b1dd21661af605da4aa7ef3..c498b60c0505c35ed1da467923795975075e7455 100644 (file)
@@ -63,7 +63,8 @@ struct kvm_regs {
 
 /* Supported Processor Types */
 #define KVM_ARM_TARGET_CORTEX_A15      0
-#define KVM_ARM_NUM_TARGETS            1
+#define KVM_ARM_TARGET_CORTEX_A7       1
+#define KVM_ARM_NUM_TARGETS            2
 
 /* KVM_ARM_SET_DEVICE_ADDR ioctl id encoding */
 #define KVM_ARM_DEVICE_TYPE_SHIFT      0
index d99bee4950e50a6c26e428acc710448fba1a726f..789bca9e64a7fdeabe091df30db44daf8b4b1b40 100644 (file)
@@ -19,6 +19,6 @@ kvm-arm-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o
 
 obj-y += kvm-arm.o init.o interrupts.o
 obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o
-obj-y += coproc.o coproc_a15.o mmio.o psci.o perf.o
+obj-y += coproc.o coproc_a15.o coproc_a7.o mmio.o psci.o perf.o
 obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o
 obj-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o
index 9c697db2787e2a524cf59db4519f5272fa2918b0..cc5adb9349efe1b044ed834f6233a58e606e3aa0 100644 (file)
@@ -797,6 +797,19 @@ long kvm_arch_vm_ioctl(struct file *filp,
                        return -EFAULT;
                return kvm_vm_ioctl_set_device_addr(kvm, &dev_addr);
        }
+       case KVM_ARM_PREFERRED_TARGET: {
+               int err;
+               struct kvm_vcpu_init init;
+
+               err = kvm_vcpu_preferred_target(&init);
+               if (err)
+                       return err;
+
+               if (copy_to_user(argp, &init, sizeof(init)))
+                       return -EFAULT;
+
+               return 0;
+       }
        default:
                return -EINVAL;
        }
index db9cf692d4dded3e2a6cc7e5622ba90ee5bef2e8..a629f2c1d0f968e7a39e9d8ab98e0e1bee32d492 100644 (file)
@@ -71,6 +71,92 @@ int kvm_handle_cp14_access(struct kvm_vcpu *vcpu, struct kvm_run *run)
        return 1;
 }
 
+static void reset_mpidr(struct kvm_vcpu *vcpu, const struct coproc_reg *r)
+{
+       /*
+        * Compute guest MPIDR. No need to mess around with different clusters
+        * but we read the 'U' bit from the underlying hardware directly.
+        */
+       vcpu->arch.cp15[c0_MPIDR] = (read_cpuid_mpidr() & MPIDR_SMP_BITMASK)
+                                       | vcpu->vcpu_id;
+}
+
+/* TRM entries A7:4.3.31 A15:4.3.28 - RO WI */
+static bool access_actlr(struct kvm_vcpu *vcpu,
+                        const struct coproc_params *p,
+                        const struct coproc_reg *r)
+{
+       if (p->is_write)
+               return ignore_write(vcpu, p);
+
+       *vcpu_reg(vcpu, p->Rt1) = vcpu->arch.cp15[c1_ACTLR];
+       return true;
+}
+
+/* TRM entries A7:4.3.56, A15:4.3.60 - R/O. */
+static bool access_cbar(struct kvm_vcpu *vcpu,
+                       const struct coproc_params *p,
+                       const struct coproc_reg *r)
+{
+       if (p->is_write)
+               return write_to_read_only(vcpu, p);
+       return read_zero(vcpu, p);
+}
+
+/* TRM entries A7:4.3.49, A15:4.3.48 - R/O WI */
+static bool access_l2ctlr(struct kvm_vcpu *vcpu,
+                         const struct coproc_params *p,
+                         const struct coproc_reg *r)
+{
+       if (p->is_write)
+               return ignore_write(vcpu, p);
+
+       *vcpu_reg(vcpu, p->Rt1) = vcpu->arch.cp15[c9_L2CTLR];
+       return true;
+}
+
+static void reset_l2ctlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r)
+{
+       u32 l2ctlr, ncores;
+
+       asm volatile("mrc p15, 1, %0, c9, c0, 2\n" : "=r" (l2ctlr));
+       l2ctlr &= ~(3 << 24);
+       ncores = atomic_read(&vcpu->kvm->online_vcpus) - 1;
+       l2ctlr |= (ncores & 3) << 24;
+
+       vcpu->arch.cp15[c9_L2CTLR] = l2ctlr;
+}
+
+static void reset_actlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r)
+{
+       u32 actlr;
+
+       /* ACTLR contains SMP bit: make sure you create all cpus first! */
+       asm volatile("mrc p15, 0, %0, c1, c0, 1\n" : "=r" (actlr));
+       /* Make the SMP bit consistent with the guest configuration */
+       if (atomic_read(&vcpu->kvm->online_vcpus) > 1)
+               actlr |= 1U << 6;
+       else
+               actlr &= ~(1U << 6);
+
+       vcpu->arch.cp15[c1_ACTLR] = actlr;
+}
+
+/*
+ * TRM entries: A7:4.3.50, A15:4.3.49
+ * R/O WI (even if NSACR.NS_L2ERR, a write of 1 is ignored).
+ */
+static bool access_l2ectlr(struct kvm_vcpu *vcpu,
+                          const struct coproc_params *p,
+                          const struct coproc_reg *r)
+{
+       if (p->is_write)
+               return ignore_write(vcpu, p);
+
+       *vcpu_reg(vcpu, p->Rt1) = 0;
+       return true;
+}
+
 /* See note at ARM ARM B1.14.4 */
 static bool access_dcsw(struct kvm_vcpu *vcpu,
                        const struct coproc_params *p,
@@ -153,10 +239,22 @@ static bool pm_fake(struct kvm_vcpu *vcpu,
  *            registers preceding 32-bit ones.
  */
 static const struct coproc_reg cp15_regs[] = {
+       /* MPIDR: we use VMPIDR for guest access. */
+       { CRn( 0), CRm( 0), Op1( 0), Op2( 5), is32,
+                       NULL, reset_mpidr, c0_MPIDR },
+
        /* CSSELR: swapped by interrupt.S. */
        { CRn( 0), CRm( 0), Op1( 2), Op2( 0), is32,
                        NULL, reset_unknown, c0_CSSELR },
 
+       /* ACTLR: trapped by HCR.TAC bit. */
+       { CRn( 1), CRm( 0), Op1( 0), Op2( 1), is32,
+                       access_actlr, reset_actlr, c1_ACTLR },
+
+       /* CPACR: swapped by interrupt.S. */
+       { CRn( 1), CRm( 0), Op1( 0), Op2( 2), is32,
+                       NULL, reset_val, c1_CPACR, 0x00000000 },
+
        /* TTBR0/TTBR1: swapped by interrupt.S. */
        { CRm64( 2), Op1( 0), is64, NULL, reset_unknown64, c2_TTBR0 },
        { CRm64( 2), Op1( 1), is64, NULL, reset_unknown64, c2_TTBR1 },
@@ -194,6 +292,13 @@ static const struct coproc_reg cp15_regs[] = {
        { CRn( 7), CRm( 6), Op1( 0), Op2( 2), is32, access_dcsw},
        { CRn( 7), CRm(10), Op1( 0), Op2( 2), is32, access_dcsw},
        { CRn( 7), CRm(14), Op1( 0), Op2( 2), is32, access_dcsw},
+       /*
+        * L2CTLR access (guest wants to know #CPUs).
+        */
+       { CRn( 9), CRm( 0), Op1( 1), Op2( 2), is32,
+                       access_l2ctlr, reset_l2ctlr, c9_L2CTLR },
+       { CRn( 9), CRm( 0), Op1( 1), Op2( 3), is32, access_l2ectlr},
+
        /*
         * Dummy performance monitor implementation.
         */
@@ -234,6 +339,9 @@ static const struct coproc_reg cp15_regs[] = {
        /* CNTKCTL: swapped by interrupt.S. */
        { CRn(14), CRm( 1), Op1( 0), Op2( 0), is32,
                        NULL, reset_val, c14_CNTKCTL, 0x00000000 },
+
+       /* The Configuration Base Address Register. */
+       { CRn(15), CRm( 0), Op1( 4), Op2( 0), is32, access_cbar},
 };
 
 /* Target specific emulation tables */
@@ -241,6 +349,12 @@ static struct kvm_coproc_target_table *target_tables[KVM_ARM_NUM_TARGETS];
 
 void kvm_register_target_coproc_table(struct kvm_coproc_target_table *table)
 {
+       unsigned int i;
+
+       for (i = 1; i < table->num; i++)
+               BUG_ON(cmp_reg(&table->table[i-1],
+                              &table->table[i]) >= 0);
+
        target_tables[table->target] = table;
 }
 
index cf93472b9dd60daf3da620cf3a44a9ff65a6eac6..bb0cac1410ccf3f2bf6c6e4ff8436e8750036357 100644 (file)
  * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
  */
 #include <linux/kvm_host.h>
-#include <asm/cputype.h>
-#include <asm/kvm_arm.h>
-#include <asm/kvm_host.h>
-#include <asm/kvm_emulate.h>
 #include <asm/kvm_coproc.h>
+#include <asm/kvm_emulate.h>
 #include <linux/init.h>
 
-static void reset_mpidr(struct kvm_vcpu *vcpu, const struct coproc_reg *r)
-{
-       /*
-        * Compute guest MPIDR:
-        * (Even if we present only one VCPU to the guest on an SMP
-        * host we don't set the U bit in the MPIDR, or vice versa, as
-        * revealing the underlying hardware properties is likely to
-        * be the best choice).
-        */
-       vcpu->arch.cp15[c0_MPIDR] = (read_cpuid_mpidr() & ~MPIDR_LEVEL_MASK)
-               | (vcpu->vcpu_id & MPIDR_LEVEL_MASK);
-}
-
 #include "coproc.h"
 
-/* A15 TRM 4.3.28: RO WI */
-static bool access_actlr(struct kvm_vcpu *vcpu,
-                        const struct coproc_params *p,
-                        const struct coproc_reg *r)
-{
-       if (p->is_write)
-               return ignore_write(vcpu, p);
-
-       *vcpu_reg(vcpu, p->Rt1) = vcpu->arch.cp15[c1_ACTLR];
-       return true;
-}
-
-/* A15 TRM 4.3.60: R/O. */
-static bool access_cbar(struct kvm_vcpu *vcpu,
-                       const struct coproc_params *p,
-                       const struct coproc_reg *r)
-{
-       if (p->is_write)
-               return write_to_read_only(vcpu, p);
-       return read_zero(vcpu, p);
-}
-
-/* A15 TRM 4.3.48: R/O WI. */
-static bool access_l2ctlr(struct kvm_vcpu *vcpu,
-                         const struct coproc_params *p,
-                         const struct coproc_reg *r)
-{
-       if (p->is_write)
-               return ignore_write(vcpu, p);
-
-       *vcpu_reg(vcpu, p->Rt1) = vcpu->arch.cp15[c9_L2CTLR];
-       return true;
-}
-
-static void reset_l2ctlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r)
-{
-       u32 l2ctlr, ncores;
-
-       asm volatile("mrc p15, 1, %0, c9, c0, 2\n" : "=r" (l2ctlr));
-       l2ctlr &= ~(3 << 24);
-       ncores = atomic_read(&vcpu->kvm->online_vcpus) - 1;
-       l2ctlr |= (ncores & 3) << 24;
-
-       vcpu->arch.cp15[c9_L2CTLR] = l2ctlr;
-}
-
-static void reset_actlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r)
-{
-       u32 actlr;
-
-       /* ACTLR contains SMP bit: make sure you create all cpus first! */
-       asm volatile("mrc p15, 0, %0, c1, c0, 1\n" : "=r" (actlr));
-       /* Make the SMP bit consistent with the guest configuration */
-       if (atomic_read(&vcpu->kvm->online_vcpus) > 1)
-               actlr |= 1U << 6;
-       else
-               actlr &= ~(1U << 6);
-
-       vcpu->arch.cp15[c1_ACTLR] = actlr;
-}
-
-/* A15 TRM 4.3.49: R/O WI (even if NSACR.NS_L2ERR, a write of 1 is ignored). */
-static bool access_l2ectlr(struct kvm_vcpu *vcpu,
-                          const struct coproc_params *p,
-                          const struct coproc_reg *r)
-{
-       if (p->is_write)
-               return ignore_write(vcpu, p);
-
-       *vcpu_reg(vcpu, p->Rt1) = 0;
-       return true;
-}
-
 /*
  * A15-specific CP15 registers.
  * CRn denotes the primary register number, but is copied to the CRm in the
@@ -121,29 +32,9 @@ static bool access_l2ectlr(struct kvm_vcpu *vcpu,
  *            registers preceding 32-bit ones.
  */
 static const struct coproc_reg a15_regs[] = {
-       /* MPIDR: we use VMPIDR for guest access. */
-       { CRn( 0), CRm( 0), Op1( 0), Op2( 5), is32,
-                       NULL, reset_mpidr, c0_MPIDR },
-
        /* SCTLR: swapped by interrupt.S. */
        { CRn( 1), CRm( 0), Op1( 0), Op2( 0), is32,
                        NULL, reset_val, c1_SCTLR, 0x00C50078 },
-       /* ACTLR: trapped by HCR.TAC bit. */
-       { CRn( 1), CRm( 0), Op1( 0), Op2( 1), is32,
-                       access_actlr, reset_actlr, c1_ACTLR },
-       /* CPACR: swapped by interrupt.S. */
-       { CRn( 1), CRm( 0), Op1( 0), Op2( 2), is32,
-                       NULL, reset_val, c1_CPACR, 0x00000000 },
-
-       /*
-        * L2CTLR access (guest wants to know #CPUs).
-        */
-       { CRn( 9), CRm( 0), Op1( 1), Op2( 2), is32,
-                       access_l2ctlr, reset_l2ctlr, c9_L2CTLR },
-       { CRn( 9), CRm( 0), Op1( 1), Op2( 3), is32, access_l2ectlr},
-
-       /* The Configuration Base Address Register. */
-       { CRn(15), CRm( 0), Op1( 4), Op2( 0), is32, access_cbar},
 };
 
 static struct kvm_coproc_target_table a15_target_table = {
@@ -154,12 +45,6 @@ static struct kvm_coproc_target_table a15_target_table = {
 
 static int __init coproc_a15_init(void)
 {
-       unsigned int i;
-
-       for (i = 1; i < ARRAY_SIZE(a15_regs); i++)
-               BUG_ON(cmp_reg(&a15_regs[i-1],
-                              &a15_regs[i]) >= 0);
-
        kvm_register_target_coproc_table(&a15_target_table);
        return 0;
 }
diff --git a/arch/arm/kvm/coproc_a7.c b/arch/arm/kvm/coproc_a7.c
new file mode 100644 (file)
index 0000000..1df7673
--- /dev/null
@@ -0,0 +1,54 @@
+/*
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Copyright (C) 2013 - ARM Ltd
+ *
+ * Authors: Rusty Russell <rusty@rustcorp.au>
+ *          Christoffer Dall <c.dall@virtualopensystems.com>
+ *          Jonathan Austin <jonathan.austin@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#include <linux/kvm_host.h>
+#include <asm/kvm_coproc.h>
+#include <asm/kvm_emulate.h>
+#include <linux/init.h>
+
+#include "coproc.h"
+
+/*
+ * Cortex-A7 specific CP15 registers.
+ * CRn denotes the primary register number, but is copied to the CRm in the
+ * user space API for 64-bit register access in line with the terminology used
+ * in the ARM ARM.
+ * Important: Must be sorted ascending by CRn, CRM, Op1, Op2 and with 64-bit
+ *            registers preceding 32-bit ones.
+ */
+static const struct coproc_reg a7_regs[] = {
+       /* SCTLR: swapped by interrupt.S. */
+       { CRn( 1), CRm( 0), Op1( 0), Op2( 0), is32,
+                       NULL, reset_val, c1_SCTLR, 0x00C50878 },
+};
+
+static struct kvm_coproc_target_table a7_target_table = {
+       .target = KVM_ARM_TARGET_CORTEX_A7,
+       .table = a7_regs,
+       .num = ARRAY_SIZE(a7_regs),
+};
+
+static int __init coproc_a7_init(void)
+{
+       kvm_register_target_coproc_table(&a7_target_table);
+       return 0;
+}
+late_initcall(coproc_a7_init);
index bdede9e7da516a43b5a3d681850727860f0534ab..d6c005283678fe5061a50cc8f5efd1febcc0f27b 100644 (file)
@@ -354,7 +354,7 @@ static void inject_abt(struct kvm_vcpu *vcpu, bool is_pabt, unsigned long addr)
        *vcpu_pc(vcpu) = exc_vector_base(vcpu) + vect_offset;
 
        if (is_pabt) {
-               /* Set DFAR and DFSR */
+               /* Set IFAR and IFSR */
                vcpu->arch.cp15[c6_IFAR] = addr;
                is_lpae = (vcpu->arch.cp15[c2_TTBCR] >> 31);
                /* Always give debug fault for now - should give guest a clue */
index 152d03612181d16d5fef5e1e84da8d2c178fbf58..20f8d97904afafc4ec9814b9d74479a7233088ab 100644 (file)
@@ -190,6 +190,8 @@ int __attribute_const__ kvm_target_cpu(void)
                return -EINVAL;
 
        switch (part_number) {
+       case ARM_CPU_PART_CORTEX_A7:
+               return KVM_ARM_TARGET_CORTEX_A7;
        case ARM_CPU_PART_CORTEX_A15:
                return KVM_ARM_TARGET_CORTEX_A15;
        default:
@@ -202,7 +204,7 @@ int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
 {
        unsigned int i;
 
-       /* We can only do a cortex A15 for now. */
+       /* We can only cope with guest==host and only on A15/A7 (for now). */
        if (init->target != kvm_target_cpu())
                return -EINVAL;
 
@@ -222,6 +224,26 @@ int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
        return kvm_reset_vcpu(vcpu);
 }
 
+int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init)
+{
+       int target = kvm_target_cpu();
+
+       if (target < 0)
+               return -ENODEV;
+
+       memset(init, 0, sizeof(*init));
+
+       /*
+        * For now, we don't return any features.
+        * In future, we might use features to return target
+        * specific features available for the preferred
+        * target type.
+        */
+       init->target = (__u32)target;
+
+       return 0;
+}
+
 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
 {
        return -EINVAL;
index c02ba4af599f417113fdb2c260270ae7162575e6..d153e64d125505c9a8623521053fbe507db83a2f 100644 (file)
 #include <kvm/arm_arch_timer.h>
 
 /******************************************************************************
- * Cortex-A15 Reset Values
+ * Cortex-A15 and Cortex-A7 Reset Values
  */
 
-static const int a15_max_cpu_idx = 3;
+static const int cortexa_max_cpu_idx = 3;
 
-static struct kvm_regs a15_regs_reset = {
+static struct kvm_regs cortexa_regs_reset = {
        .usr_regs.ARM_cpsr = SVC_MODE | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT,
 };
 
-static const struct kvm_irq_level a15_vtimer_irq = {
+static const struct kvm_irq_level cortexa_vtimer_irq = {
        { .irq = 27 },
        .level = 1,
 };
@@ -62,12 +62,13 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
        const struct kvm_irq_level *cpu_vtimer_irq;
 
        switch (vcpu->arch.target) {
+       case KVM_ARM_TARGET_CORTEX_A7:
        case KVM_ARM_TARGET_CORTEX_A15:
-               if (vcpu->vcpu_id > a15_max_cpu_idx)
+               if (vcpu->vcpu_id > cortexa_max_cpu_idx)
                        return -EINVAL;
-               reset_regs = &a15_regs_reset;
+               reset_regs = &cortexa_regs_reset;
                vcpu->arch.midr = read_cpuid_id();
-               cpu_vtimer_irq = &a15_vtimer_irq;
+               cpu_vtimer_irq = &cortexa_vtimer_irq;
                break;
        default:
                return -ENODEV;
index 0859a4ddd1e7d0e8b1792416b19a8f9908457af7..5d85a02d1231e9b6a3738e48cee574e19f987c6b 100644 (file)
 
 #define KVM_VCPU_MAX_FEATURES 2
 
-/* We don't currently support large pages. */
-#define KVM_HPAGE_GFN_SHIFT(x) 0
-#define KVM_NR_PAGE_SIZES      1
-#define KVM_PAGES_PER_HPAGE(x) (1UL<<31)
-
 struct kvm_vcpu;
 int kvm_target_cpu(void);
 int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
@@ -151,6 +146,7 @@ struct kvm_vcpu_stat {
 struct kvm_vcpu_init;
 int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
                        const struct kvm_vcpu_init *init);
+int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init);
 unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
 int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
 struct kvm_one_reg;
index 2c3ff67a8ecbef4625769f53811bfb9958083d1a..3f0731e53274c92b540a2eab2d0f154005d815f4 100644 (file)
@@ -248,6 +248,26 @@ int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
        return kvm_reset_vcpu(vcpu);
 }
 
+int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init)
+{
+       int target = kvm_target_cpu();
+
+       if (target < 0)
+               return -ENODEV;
+
+       memset(init, 0, sizeof(*init));
+
+       /*
+        * For now, we don't return any features.
+        * In future, we might use features to return target
+        * specific features available for the preferred
+        * target type.
+        */
+       init->target = (__u32)target;
+
+       return 0;
+}
+
 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
 {
        return -EINVAL;
index 989dd3fe8de19d9fc40de248f5788f359eb3ebc6..95a3ff93777c0658729138da3404c03adbc59028 100644 (file)
@@ -234,10 +234,6 @@ struct kvm_vm_data {
 #define KVM_REQ_PTC_G          32
 #define KVM_REQ_RESUME         33
 
-#define KVM_HPAGE_GFN_SHIFT(x) 0
-#define KVM_NR_PAGE_SIZES      1
-#define KVM_PAGES_PER_HPAGE(x) 1
-
 struct kvm;
 struct kvm_vcpu;
 
index 4d6fa0bf1305d7376c7ae2ce2201fc412a8791de..32966969f2f975f56bddbf9ea0aa514d8f435d97 100644 (file)
 
 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
 
-/* Don't support huge pages */
-#define KVM_HPAGE_GFN_SHIFT(x) 0
-
-/* We don't currently support large pages. */
-#define KVM_NR_PAGE_SIZES      1
-#define KVM_PAGES_PER_HPAGE(x) 1
-
 
 
 /* Special address that contains the comm page, used for reducing # of traps */
index 33283532e9d8fd8c14bbd376b3112136a134e233..0866230b7c2dcda8f30660f42759876bd9e980a6 100644 (file)
@@ -63,11 +63,6 @@ extern void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
 
 #endif
 
-/* We don't currently support large pages. */
-#define KVM_HPAGE_GFN_SHIFT(x) 0
-#define KVM_NR_PAGE_SIZES      1
-#define KVM_PAGES_PER_HPAGE(x) (1UL<<31)
-
 #define HPTEG_CACHE_NUM                        (1 << 15)
 #define HPTEG_HASH_BITS_PTE            13
 #define HPTEG_HASH_BITS_PTE_LONG       12
index e87ecaa2c569860f0c9353fc3ed398a0eba034ad..d5bc3750616ebae0c2fedac6be4587238577b3ca 100644 (file)
@@ -38,13 +38,6 @@ struct sca_block {
        struct sca_entry cpu[64];
 } __attribute__((packed));
 
-#define KVM_NR_PAGE_SIZES 2
-#define KVM_HPAGE_GFN_SHIFT(x) (((x) - 1) * 8)
-#define KVM_HPAGE_SHIFT(x) (PAGE_SHIFT + KVM_HPAGE_GFN_SHIFT(x))
-#define KVM_HPAGE_SIZE(x) (1UL << KVM_HPAGE_SHIFT(x))
-#define KVM_HPAGE_MASK(x)      (~(KVM_HPAGE_SIZE(x) - 1))
-#define KVM_PAGES_PER_HPAGE(x) (KVM_HPAGE_SIZE(x) / PAGE_SIZE)
-
 #define CPUSTAT_STOPPED    0x80000000
 #define CPUSTAT_WAIT       0x10000000
 #define CPUSTAT_ECALL_PEND 0x08000000
@@ -220,7 +213,6 @@ struct kvm_s390_interrupt_info {
 /* for local_interrupt.action_flags */
 #define ACTION_STORE_ON_STOP           (1<<0)
 #define ACTION_STOP_ON_STOP            (1<<1)
-#define ACTION_RELOADVCPU_ON_STOP      (1<<2)
 
 struct kvm_s390_local_interrupt {
        spinlock_t lock;
index 3a74d8af0d69dd2e3bac77a427a6bfd64723bae9..78d967f180f4adc088aea609df1cc664ad57b917 100644 (file)
@@ -107,14 +107,13 @@ static int __diag_ipl_functions(struct kvm_vcpu *vcpu)
 
 static int __diag_virtio_hypercall(struct kvm_vcpu *vcpu)
 {
-       int ret, idx;
+       int ret;
 
        /* No virtio-ccw notification? Get out quickly. */
        if (!vcpu->kvm->arch.css_support ||
            (vcpu->run->s.regs.gprs[1] != KVM_S390_VIRTIO_CCW_NOTIFY))
                return -EOPNOTSUPP;
 
-       idx = srcu_read_lock(&vcpu->kvm->srcu);
        /*
         * The layout is as follows:
         * - gpr 2 contains the subchannel id (passed as addr)
@@ -125,7 +124,6 @@ static int __diag_virtio_hypercall(struct kvm_vcpu *vcpu)
                                      vcpu->run->s.regs.gprs[2],
                                      8, &vcpu->run->s.regs.gprs[3],
                                      vcpu->run->s.regs.gprs[4]);
-       srcu_read_unlock(&vcpu->kvm->srcu, idx);
 
        /*
         * Return cookie in gpr 2, but don't overwrite the register if the
index 99d789e8a0189c0ab319d775b1c54beba0dac05b..374a439ccc6080a004c7593f6227bc0c799ff7a6 100644 (file)
 #include <asm/uaccess.h>
 #include "kvm-s390.h"
 
+/* Convert real to absolute address by applying the prefix of the CPU */
+static inline unsigned long kvm_s390_real_to_abs(struct kvm_vcpu *vcpu,
+                                                unsigned long gaddr)
+{
+       unsigned long prefix  = vcpu->arch.sie_block->prefix;
+       if (gaddr < 2 * PAGE_SIZE)
+               gaddr += prefix;
+       else if (gaddr >= prefix && gaddr < prefix + 2 * PAGE_SIZE)
+               gaddr -= prefix;
+       return gaddr;
+}
+
 static inline void __user *__gptr_to_uptr(struct kvm_vcpu *vcpu,
                                          void __user *gptr,
                                          int prefixing)
 {
-       unsigned long prefix  = vcpu->arch.sie_block->prefix;
        unsigned long gaddr = (unsigned long) gptr;
        unsigned long uaddr;
 
-       if (prefixing) {
-               if (gaddr < 2 * PAGE_SIZE)
-                       gaddr += prefix;
-               else if ((gaddr >= prefix) && (gaddr < prefix + 2 * PAGE_SIZE))
-                       gaddr -= prefix;
-       }
+       if (prefixing)
+               gaddr = kvm_s390_real_to_abs(vcpu, gaddr);
        uaddr = gmap_fault(gaddr, vcpu->arch.gmap);
        if (IS_ERR_VALUE(uaddr))
                uaddr = -EFAULT;
index 5ee56e5acc2396f594854dd8a3e0e99bc3b72720..5ddbbde6f65c32fde299b9b34adbddfb02c5886d 100644 (file)
@@ -62,12 +62,6 @@ static int handle_stop(struct kvm_vcpu *vcpu)
 
        trace_kvm_s390_stop_request(vcpu->arch.local_int.action_bits);
 
-       if (vcpu->arch.local_int.action_bits & ACTION_RELOADVCPU_ON_STOP) {
-               vcpu->arch.local_int.action_bits &= ~ACTION_RELOADVCPU_ON_STOP;
-               rc = SIE_INTERCEPT_RERUNVCPU;
-               vcpu->run->exit_reason = KVM_EXIT_INTR;
-       }
-
        if (vcpu->arch.local_int.action_bits & ACTION_STOP_ON_STOP) {
                atomic_set_mask(CPUSTAT_STOPPED,
                                &vcpu->arch.sie_block->cpuflags);
index 7f1f7ac5cf7f8a2c3f3966d4fe96fa23af90ea04..5f79d2d79ca76f34648677bb3514802458882b81 100644 (file)
@@ -436,6 +436,7 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
        hrtimer_start(&vcpu->arch.ckc_timer, ktime_set (0, sltime) , HRTIMER_MODE_REL);
        VCPU_EVENT(vcpu, 5, "enabled wait via clock comparator: %llx ns", sltime);
 no_timer:
+       srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
        spin_lock(&vcpu->arch.local_int.float_int->lock);
        spin_lock_bh(&vcpu->arch.local_int.lock);
        add_wait_queue(&vcpu->wq, &wait);
@@ -455,6 +456,8 @@ no_timer:
        remove_wait_queue(&vcpu->wq, &wait);
        spin_unlock_bh(&vcpu->arch.local_int.lock);
        spin_unlock(&vcpu->arch.local_int.float_int->lock);
+       vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+
        hrtimer_try_to_cancel(&vcpu->arch.ckc_timer);
        return 0;
 }
index ed8064cb5c4921424d5981b890e6fd9b07f9ed02..2d67b3bbf1906d4a0f33d72c5e31344c94f2feab 100644 (file)
@@ -695,9 +695,9 @@ static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
        return 0;
 }
 
-static int __vcpu_run(struct kvm_vcpu *vcpu)
+static int vcpu_pre_run(struct kvm_vcpu *vcpu)
 {
-       int rc;
+       int rc, cpuflags;
 
        memcpy(&vcpu->arch.sie_block->gg14, &vcpu->run->s.regs.gprs[14], 16);
 
@@ -715,28 +715,24 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
                return rc;
 
        vcpu->arch.sie_block->icptcode = 0;
-       VCPU_EVENT(vcpu, 6, "entering sie flags %x",
-                  atomic_read(&vcpu->arch.sie_block->cpuflags));
-       trace_kvm_s390_sie_enter(vcpu,
-                                atomic_read(&vcpu->arch.sie_block->cpuflags));
+       cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
+       VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
+       trace_kvm_s390_sie_enter(vcpu, cpuflags);
 
-       /*
-        * As PF_VCPU will be used in fault handler, between guest_enter
-        * and guest_exit should be no uaccess.
-        */
-       preempt_disable();
-       kvm_guest_enter();
-       preempt_enable();
-       rc = sie64a(vcpu->arch.sie_block, vcpu->run->s.regs.gprs);
-       kvm_guest_exit();
+       return 0;
+}
+
+static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
+{
+       int rc;
 
        VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
                   vcpu->arch.sie_block->icptcode);
        trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
 
-       if (rc > 0)
+       if (exit_reason >= 0) {
                rc = 0;
-       if (rc < 0) {
+       } else {
                if (kvm_is_ucontrol(vcpu->kvm)) {
                        rc = SIE_INTERCEPT_UCONTROL;
                } else {
@@ -747,6 +743,49 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
        }
 
        memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16);
+
+       if (rc == 0) {
+               if (kvm_is_ucontrol(vcpu->kvm))
+                       rc = -EOPNOTSUPP;
+               else
+                       rc = kvm_handle_sie_intercept(vcpu);
+       }
+
+       return rc;
+}
+
+static int __vcpu_run(struct kvm_vcpu *vcpu)
+{
+       int rc, exit_reason;
+
+       /*
+        * We try to hold kvm->srcu during most of vcpu_run (except when run-
+        * ning the guest), so that memslots (and other stuff) are protected
+        */
+       vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+
+       do {
+               rc = vcpu_pre_run(vcpu);
+               if (rc)
+                       break;
+
+               srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
+               /*
+                * As PF_VCPU will be used in fault handler, between
+                * guest_enter and guest_exit should be no uaccess.
+                */
+               preempt_disable();
+               kvm_guest_enter();
+               preempt_enable();
+               exit_reason = sie64a(vcpu->arch.sie_block,
+                                    vcpu->run->s.regs.gprs);
+               kvm_guest_exit();
+               vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+
+               rc = vcpu_post_run(vcpu, exit_reason);
+       } while (!signal_pending(current) && !rc);
+
+       srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
        return rc;
 }
 
@@ -755,7 +794,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
        int rc;
        sigset_t sigsaved;
 
-rerun_vcpu:
        if (vcpu->sigset_active)
                sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
 
@@ -788,19 +826,7 @@ rerun_vcpu:
        }
 
        might_fault();
-
-       do {
-               rc = __vcpu_run(vcpu);
-               if (rc)
-                       break;
-               if (kvm_is_ucontrol(vcpu->kvm))
-                       rc = -EOPNOTSUPP;
-               else
-                       rc = kvm_handle_sie_intercept(vcpu);
-       } while (!signal_pending(current) && !rc);
-
-       if (rc == SIE_INTERCEPT_RERUNVCPU)
-               goto rerun_vcpu;
+       rc = __vcpu_run(vcpu);
 
        if (signal_pending(current) && !rc) {
                kvm_run->exit_reason = KVM_EXIT_INTR;
@@ -958,6 +984,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 {
        struct kvm_vcpu *vcpu = filp->private_data;
        void __user *argp = (void __user *)arg;
+       int idx;
        long r;
 
        switch (ioctl) {
@@ -971,7 +998,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
                break;
        }
        case KVM_S390_STORE_STATUS:
+               idx = srcu_read_lock(&vcpu->kvm->srcu);
                r = kvm_s390_vcpu_store_status(vcpu, arg);
+               srcu_read_unlock(&vcpu->kvm->srcu, idx);
                break;
        case KVM_S390_SET_INITIAL_PSW: {
                psw_t psw;
index dc99f1ca42678768e5e0241cbeaa20de4e0aae87..b44912a3294941b8df9e9ca27d140ce52149408d 100644 (file)
@@ -28,8 +28,7 @@ typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu);
 extern unsigned long *vfacilities;
 
 /* negativ values are error codes, positive values for internal conditions */
-#define SIE_INTERCEPT_RERUNVCPU                (1<<0)
-#define SIE_INTERCEPT_UCONTROL         (1<<1)
+#define SIE_INTERCEPT_UCONTROL         (1<<0)
 int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu);
 
 #define VM_EVENT(d_kvm, d_loglevel, d_string, d_args...)\
@@ -91,8 +90,10 @@ static inline void kvm_s390_get_base_disp_sse(struct kvm_vcpu *vcpu,
 
 static inline void kvm_s390_get_regs_rre(struct kvm_vcpu *vcpu, int *r1, int *r2)
 {
-       *r1 = (vcpu->arch.sie_block->ipb & 0x00f00000) >> 20;
-       *r2 = (vcpu->arch.sie_block->ipb & 0x000f0000) >> 16;
+       if (r1)
+               *r1 = (vcpu->arch.sie_block->ipb & 0x00f00000) >> 20;
+       if (r2)
+               *r2 = (vcpu->arch.sie_block->ipb & 0x000f0000) >> 16;
 }
 
 static inline u64 kvm_s390_get_base_disp_rsy(struct kvm_vcpu *vcpu)
index 59200ee275e568ae99d593484b5575be320b79b8..2440602e6df1e19ab442b3eafd6f18b6342792ec 100644 (file)
 #include "kvm-s390.h"
 #include "trace.h"
 
+/* Handle SCK (SET CLOCK) interception */
+static int handle_set_clock(struct kvm_vcpu *vcpu)
+{
+       struct kvm_vcpu *cpup;
+       s64 hostclk, val;
+       u64 op2;
+       int i;
+
+       if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+               return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+       op2 = kvm_s390_get_base_disp_s(vcpu);
+       if (op2 & 7)    /* Operand must be on a doubleword boundary */
+               return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+       if (get_guest(vcpu, val, (u64 __user *) op2))
+               return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+
+       if (store_tod_clock(&hostclk)) {
+               kvm_s390_set_psw_cc(vcpu, 3);
+               return 0;
+       }
+       val = (val - hostclk) & ~0x3fUL;
+
+       mutex_lock(&vcpu->kvm->lock);
+       kvm_for_each_vcpu(i, cpup, vcpu->kvm)
+               cpup->arch.sie_block->epoch = val;
+       mutex_unlock(&vcpu->kvm->lock);
+
+       kvm_s390_set_psw_cc(vcpu, 0);
+       return 0;
+}
+
 static int handle_set_prefix(struct kvm_vcpu *vcpu)
 {
        u64 operand2;
@@ -128,6 +160,33 @@ static int handle_skey(struct kvm_vcpu *vcpu)
        return 0;
 }
 
+static int handle_test_block(struct kvm_vcpu *vcpu)
+{
+       unsigned long hva;
+       gpa_t addr;
+       int reg2;
+
+       if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+               return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+       kvm_s390_get_regs_rre(vcpu, NULL, &reg2);
+       addr = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK;
+       addr = kvm_s390_real_to_abs(vcpu, addr);
+
+       hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(addr));
+       if (kvm_is_error_hva(hva))
+               return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+       /*
+        * We don't expect errors on modern systems, and do not care
+        * about storage keys (yet), so let's just clear the page.
+        */
+       if (clear_user((void __user *)hva, PAGE_SIZE) != 0)
+               return -EFAULT;
+       kvm_s390_set_psw_cc(vcpu, 0);
+       vcpu->run->s.regs.gprs[0] = 0;
+       return 0;
+}
+
 static int handle_tpi(struct kvm_vcpu *vcpu)
 {
        struct kvm_s390_interrupt_info *inti;
@@ -438,12 +497,14 @@ out_exception:
 
 static const intercept_handler_t b2_handlers[256] = {
        [0x02] = handle_stidp,
+       [0x04] = handle_set_clock,
        [0x10] = handle_set_prefix,
        [0x11] = handle_store_prefix,
        [0x12] = handle_store_cpu_address,
        [0x29] = handle_skey,
        [0x2a] = handle_skey,
        [0x2b] = handle_skey,
+       [0x2c] = handle_test_block,
        [0x30] = handle_io_inst,
        [0x31] = handle_io_inst,
        [0x32] = handle_io_inst,
index c76ff74a98f2ed5ffcd72b4dcefbcb0c9203c0cd..5cbf3166257c1003809702b817cc5f32868f13c0 100644 (file)
 #define KVM_HPAGE_MASK(x)      (~(KVM_HPAGE_SIZE(x) - 1))
 #define KVM_PAGES_PER_HPAGE(x) (KVM_HPAGE_SIZE(x) / PAGE_SIZE)
 
+static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level)
+{
+       /* KVM_HPAGE_GFN_SHIFT(PT_PAGE_TABLE_LEVEL) must be 0. */
+       return (gfn >> KVM_HPAGE_GFN_SHIFT(level)) -
+               (base_gfn >> KVM_HPAGE_GFN_SHIFT(level));
+}
+
 #define SELECTOR_TI_MASK (1 << 2)
 #define SELECTOR_RPL_MASK 0x03
 
@@ -253,7 +260,6 @@ struct kvm_pio_request {
  * mode.
  */
 struct kvm_mmu {
-       void (*new_cr3)(struct kvm_vcpu *vcpu);
        void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long root);
        unsigned long (*get_cr3)(struct kvm_vcpu *vcpu);
        u64 (*get_pdptr)(struct kvm_vcpu *vcpu, int index);
@@ -261,7 +267,6 @@ struct kvm_mmu {
                          bool prefault);
        void (*inject_page_fault)(struct kvm_vcpu *vcpu,
                                  struct x86_exception *fault);
-       void (*free)(struct kvm_vcpu *vcpu);
        gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva, u32 access,
                            struct x86_exception *exception);
        gpa_t (*translate_gpa)(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access);
@@ -389,6 +394,8 @@ struct kvm_vcpu_arch {
 
        struct fpu guest_fpu;
        u64 xcr0;
+       u64 guest_supported_xcr0;
+       u32 guest_xstate_size;
 
        struct kvm_pio_request pio;
        void *pio_data;
@@ -780,11 +787,11 @@ void kvm_mmu_module_exit(void);
 
 void kvm_mmu_destroy(struct kvm_vcpu *vcpu);
 int kvm_mmu_create(struct kvm_vcpu *vcpu);
-int kvm_mmu_setup(struct kvm_vcpu *vcpu);
+void kvm_mmu_setup(struct kvm_vcpu *vcpu);
 void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
                u64 dirty_mask, u64 nx_mask, u64 x_mask);
 
-int kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
+void kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
 void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot);
 void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
                                     struct kvm_memory_slot *slot,
@@ -922,6 +929,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
 int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u32 error_code,
                       void *insn, int insn_len);
 void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva);
+void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu);
 
 void kvm_enable_tdp(void);
 void kvm_disable_tdp(void);
index 940ed3fd889a743732891945b191f34cddc8e217..37813b5ddc37472dba6c64b8ff3f2508dc085de0 100644 (file)
 
 /* MSR_IA32_VMX_MISC bits */
 #define MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS (1ULL << 29)
+#define MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE   0x1F
 /* AMD-V MSRs */
 
 #define MSR_VM_CR                       0xc0010114
index b110fe6c03d43908146d05ad689937d3bd991bb9..0a1e3b8b964de8e42ebbc41adbd3c67a8c938dd4 100644 (file)
 #include "mmu.h"
 #include "trace.h"
 
+static u32 xstate_required_size(u64 xstate_bv)
+{
+       int feature_bit = 0;
+       u32 ret = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET;
+
+       xstate_bv &= ~XSTATE_FPSSE;
+       while (xstate_bv) {
+               if (xstate_bv & 0x1) {
+                       u32 eax, ebx, ecx, edx;
+                       cpuid_count(0xD, feature_bit, &eax, &ebx, &ecx, &edx);
+                       ret = max(ret, eax + ebx);
+               }
+
+               xstate_bv >>= 1;
+               feature_bit++;
+       }
+
+       return ret;
+}
+
 void kvm_update_cpuid(struct kvm_vcpu *vcpu)
 {
        struct kvm_cpuid_entry2 *best;
@@ -46,6 +66,18 @@ void kvm_update_cpuid(struct kvm_vcpu *vcpu)
                        apic->lapic_timer.timer_mode_mask = 1 << 17;
        }
 
+       best = kvm_find_cpuid_entry(vcpu, 0xD, 0);
+       if (!best) {
+               vcpu->arch.guest_supported_xcr0 = 0;
+               vcpu->arch.guest_xstate_size = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET;
+       } else {
+               vcpu->arch.guest_supported_xcr0 =
+                       (best->eax | ((u64)best->edx << 32)) &
+                       host_xcr0 & KVM_SUPPORTED_XCR0;
+               vcpu->arch.guest_xstate_size =
+                       xstate_required_size(vcpu->arch.guest_supported_xcr0);
+       }
+
        kvm_pmu_cpuid_update(vcpu);
 }
 
@@ -182,7 +214,7 @@ static bool supported_xcr0_bit(unsigned bit)
 {
        u64 mask = ((u64)1 << bit);
 
-       return mask & (XSTATE_FP | XSTATE_SSE | XSTATE_YMM) & host_xcr0;
+       return mask & KVM_SUPPORTED_XCR0 & host_xcr0;
 }
 
 #define F(x) bit(X86_FEATURE_##x)
@@ -383,6 +415,8 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
        case 0xd: {
                int idx, i;
 
+               entry->eax &= host_xcr0 & KVM_SUPPORTED_XCR0;
+               entry->edx &= (host_xcr0 & KVM_SUPPORTED_XCR0) >> 32;
                entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
                for (idx = 1, i = 1; idx < 64; ++idx) {
                        if (*nent >= maxnent)
index dce0df8150df23709607ca05ef4d09cb867cc035..40772ef0f2b12458f21094d9a642a487a081fdde 100644 (file)
@@ -2570,11 +2570,6 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
        kvm_release_pfn_clean(pfn);
 }
 
-static void nonpaging_new_cr3(struct kvm_vcpu *vcpu)
-{
-       mmu_free_roots(vcpu);
-}
-
 static pfn_t pte_prefetch_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn,
                                     bool no_dirty_log)
 {
@@ -3424,18 +3419,11 @@ out_unlock:
        return 0;
 }
 
-static void nonpaging_free(struct kvm_vcpu *vcpu)
-{
-       mmu_free_roots(vcpu);
-}
-
-static int nonpaging_init_context(struct kvm_vcpu *vcpu,
-                                 struct kvm_mmu *context)
+static void nonpaging_init_context(struct kvm_vcpu *vcpu,
+                                  struct kvm_mmu *context)
 {
-       context->new_cr3 = nonpaging_new_cr3;
        context->page_fault = nonpaging_page_fault;
        context->gva_to_gpa = nonpaging_gva_to_gpa;
-       context->free = nonpaging_free;
        context->sync_page = nonpaging_sync_page;
        context->invlpg = nonpaging_invlpg;
        context->update_pte = nonpaging_update_pte;
@@ -3444,7 +3432,6 @@ static int nonpaging_init_context(struct kvm_vcpu *vcpu,
        context->root_hpa = INVALID_PAGE;
        context->direct_map = true;
        context->nx = false;
-       return 0;
 }
 
 void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu)
@@ -3454,9 +3441,8 @@ void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu)
 }
 EXPORT_SYMBOL_GPL(kvm_mmu_flush_tlb);
 
-static void paging_new_cr3(struct kvm_vcpu *vcpu)
+void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu)
 {
-       pgprintk("%s: cr3 %lx\n", __func__, kvm_read_cr3(vcpu));
        mmu_free_roots(vcpu);
 }
 
@@ -3471,11 +3457,6 @@ static void inject_page_fault(struct kvm_vcpu *vcpu,
        vcpu->arch.mmu.inject_page_fault(vcpu, fault);
 }
 
-static void paging_free(struct kvm_vcpu *vcpu)
-{
-       nonpaging_free(vcpu);
-}
-
 static bool sync_mmio_spte(struct kvm *kvm, u64 *sptep, gfn_t gfn,
                           unsigned access, int *nr_present)
 {
@@ -3665,9 +3646,9 @@ static void update_last_pte_bitmap(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu)
        mmu->last_pte_bitmap = map;
 }
 
-static int paging64_init_context_common(struct kvm_vcpu *vcpu,
-                                       struct kvm_mmu *context,
-                                       int level)
+static void paging64_init_context_common(struct kvm_vcpu *vcpu,
+                                        struct kvm_mmu *context,
+                                        int level)
 {
        context->nx = is_nx(vcpu);
        context->root_level = level;
@@ -3677,27 +3658,24 @@ static int paging64_init_context_common(struct kvm_vcpu *vcpu,
        update_last_pte_bitmap(vcpu, context);
 
        ASSERT(is_pae(vcpu));
-       context->new_cr3 = paging_new_cr3;
        context->page_fault = paging64_page_fault;
        context->gva_to_gpa = paging64_gva_to_gpa;
        context->sync_page = paging64_sync_page;
        context->invlpg = paging64_invlpg;
        context->update_pte = paging64_update_pte;
-       context->free = paging_free;
        context->shadow_root_level = level;
        context->root_hpa = INVALID_PAGE;
        context->direct_map = false;
-       return 0;
 }
 
-static int paging64_init_context(struct kvm_vcpu *vcpu,
-                                struct kvm_mmu *context)
+static void paging64_init_context(struct kvm_vcpu *vcpu,
+                                 struct kvm_mmu *context)
 {
-       return paging64_init_context_common(vcpu, context, PT64_ROOT_LEVEL);
+       paging64_init_context_common(vcpu, context, PT64_ROOT_LEVEL);
 }
 
-static int paging32_init_context(struct kvm_vcpu *vcpu,
-                                struct kvm_mmu *context)
+static void paging32_init_context(struct kvm_vcpu *vcpu,
+                                 struct kvm_mmu *context)
 {
        context->nx = false;
        context->root_level = PT32_ROOT_LEVEL;
@@ -3706,33 +3684,28 @@ static int paging32_init_context(struct kvm_vcpu *vcpu,
        update_permission_bitmask(vcpu, context, false);
        update_last_pte_bitmap(vcpu, context);
 
-       context->new_cr3 = paging_new_cr3;
        context->page_fault = paging32_page_fault;
        context->gva_to_gpa = paging32_gva_to_gpa;
-       context->free = paging_free;
        context->sync_page = paging32_sync_page;
        context->invlpg = paging32_invlpg;
        context->update_pte = paging32_update_pte;
        context->shadow_root_level = PT32E_ROOT_LEVEL;
        context->root_hpa = INVALID_PAGE;
        context->direct_map = false;
-       return 0;
 }
 
-static int paging32E_init_context(struct kvm_vcpu *vcpu,
-                                 struct kvm_mmu *context)
+static void paging32E_init_context(struct kvm_vcpu *vcpu,
+                                  struct kvm_mmu *context)
 {
-       return paging64_init_context_common(vcpu, context, PT32E_ROOT_LEVEL);
+       paging64_init_context_common(vcpu, context, PT32E_ROOT_LEVEL);
 }
 
-static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
+static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
 {
        struct kvm_mmu *context = vcpu->arch.walk_mmu;
 
        context->base_role.word = 0;
-       context->new_cr3 = nonpaging_new_cr3;
        context->page_fault = tdp_page_fault;
-       context->free = nonpaging_free;
        context->sync_page = nonpaging_sync_page;
        context->invlpg = nonpaging_invlpg;
        context->update_pte = nonpaging_update_pte;
@@ -3767,37 +3740,32 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
 
        update_permission_bitmask(vcpu, context, false);
        update_last_pte_bitmap(vcpu, context);
-
-       return 0;
 }
 
-int kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context)
+void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context)
 {
-       int r;
        bool smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP);
        ASSERT(vcpu);
        ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa));
 
        if (!is_paging(vcpu))
-               r = nonpaging_init_context(vcpu, context);
+               nonpaging_init_context(vcpu, context);
        else if (is_long_mode(vcpu))
-               r = paging64_init_context(vcpu, context);
+               paging64_init_context(vcpu, context);
        else if (is_pae(vcpu))
-               r = paging32E_init_context(vcpu, context);
+               paging32E_init_context(vcpu, context);
        else
-               r = paging32_init_context(vcpu, context);
+               paging32_init_context(vcpu, context);
 
        vcpu->arch.mmu.base_role.nxe = is_nx(vcpu);
        vcpu->arch.mmu.base_role.cr4_pae = !!is_pae(vcpu);
        vcpu->arch.mmu.base_role.cr0_wp  = is_write_protection(vcpu);
        vcpu->arch.mmu.base_role.smep_andnot_wp
                = smep && !is_write_protection(vcpu);
-
-       return r;
 }
 EXPORT_SYMBOL_GPL(kvm_init_shadow_mmu);
 
-int kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context,
+void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context,
                bool execonly)
 {
        ASSERT(vcpu);
@@ -3806,37 +3774,30 @@ int kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context,
        context->shadow_root_level = kvm_x86_ops->get_tdp_level();
 
        context->nx = true;
-       context->new_cr3 = paging_new_cr3;
        context->page_fault = ept_page_fault;
        context->gva_to_gpa = ept_gva_to_gpa;
        context->sync_page = ept_sync_page;
        context->invlpg = ept_invlpg;
        context->update_pte = ept_update_pte;
-       context->free = paging_free;
        context->root_level = context->shadow_root_level;
        context->root_hpa = INVALID_PAGE;
        context->direct_map = false;
 
        update_permission_bitmask(vcpu, context, true);
        reset_rsvds_bits_mask_ept(vcpu, context, execonly);
-
-       return 0;
 }
 EXPORT_SYMBOL_GPL(kvm_init_shadow_ept_mmu);
 
-static int init_kvm_softmmu(struct kvm_vcpu *vcpu)
+static void init_kvm_softmmu(struct kvm_vcpu *vcpu)
 {
-       int r = kvm_init_shadow_mmu(vcpu, vcpu->arch.walk_mmu);
-
+       kvm_init_shadow_mmu(vcpu, vcpu->arch.walk_mmu);
        vcpu->arch.walk_mmu->set_cr3           = kvm_x86_ops->set_cr3;
        vcpu->arch.walk_mmu->get_cr3           = get_cr3;
        vcpu->arch.walk_mmu->get_pdptr         = kvm_pdptr_read;
        vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault;
-
-       return r;
 }
 
-static int init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
+static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
 {
        struct kvm_mmu *g_context = &vcpu->arch.nested_mmu;
 
@@ -3873,11 +3834,9 @@ static int init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
 
        update_permission_bitmask(vcpu, g_context, false);
        update_last_pte_bitmap(vcpu, g_context);
-
-       return 0;
 }
 
-static int init_kvm_mmu(struct kvm_vcpu *vcpu)
+static void init_kvm_mmu(struct kvm_vcpu *vcpu)
 {
        if (mmu_is_nested(vcpu))
                return init_kvm_nested_mmu(vcpu);
@@ -3887,18 +3846,12 @@ static int init_kvm_mmu(struct kvm_vcpu *vcpu)
                return init_kvm_softmmu(vcpu);
 }
 
-static void destroy_kvm_mmu(struct kvm_vcpu *vcpu)
+void kvm_mmu_reset_context(struct kvm_vcpu *vcpu)
 {
        ASSERT(vcpu);
-       if (VALID_PAGE(vcpu->arch.mmu.root_hpa))
-               /* mmu.free() should set root_hpa = INVALID_PAGE */
-               vcpu->arch.mmu.free(vcpu);
-}
 
-int kvm_mmu_reset_context(struct kvm_vcpu *vcpu)
-{
-       destroy_kvm_mmu(vcpu);
-       return init_kvm_mmu(vcpu);
+       kvm_mmu_unload(vcpu);
+       init_kvm_mmu(vcpu);
 }
 EXPORT_SYMBOL_GPL(kvm_mmu_reset_context);
 
@@ -3923,6 +3876,7 @@ EXPORT_SYMBOL_GPL(kvm_mmu_load);
 void kvm_mmu_unload(struct kvm_vcpu *vcpu)
 {
        mmu_free_roots(vcpu);
+       WARN_ON(VALID_PAGE(vcpu->arch.mmu.root_hpa));
 }
 EXPORT_SYMBOL_GPL(kvm_mmu_unload);
 
@@ -4281,12 +4235,12 @@ int kvm_mmu_create(struct kvm_vcpu *vcpu)
        return alloc_mmu_pages(vcpu);
 }
 
-int kvm_mmu_setup(struct kvm_vcpu *vcpu)
+void kvm_mmu_setup(struct kvm_vcpu *vcpu)
 {
        ASSERT(vcpu);
        ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa));
 
-       return init_kvm_mmu(vcpu);
+       init_kvm_mmu(vcpu);
 }
 
 void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot)
@@ -4428,7 +4382,7 @@ mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
        int nr_to_scan = sc->nr_to_scan;
        unsigned long freed = 0;
 
-       raw_spin_lock(&kvm_lock);
+       spin_lock(&kvm_lock);
 
        list_for_each_entry(kvm, &vm_list, vm_list) {
                int idx;
@@ -4478,9 +4432,8 @@ unlock:
                break;
        }
 
-       raw_spin_unlock(&kvm_lock);
+       spin_unlock(&kvm_lock);
        return freed;
-
 }
 
 static unsigned long
@@ -4574,7 +4527,7 @@ void kvm_mmu_destroy(struct kvm_vcpu *vcpu)
 {
        ASSERT(vcpu);
 
-       destroy_kvm_mmu(vcpu);
+       kvm_mmu_unload(vcpu);
        free_mmu_pages(vcpu);
        mmu_free_memory_caches(vcpu);
 }
index 77e044a0f5f70f36222510cf8012ecb9607da65e..292615274358ee33a1afeb1eef159901ae6f164e 100644 (file)
@@ -70,8 +70,8 @@ enum {
 };
 
 int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct);
-int kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context);
-int kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context,
+void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context);
+void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context,
                bool execonly);
 
 static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm)
index c0bc80391e40a9bd1dfbf97243160ddcef61f145..c7168a5cff1b33ad454cc85176695cb0c7394dfc 100644 (file)
@@ -1959,11 +1959,9 @@ static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu,
        nested_svm_vmexit(svm);
 }
 
-static int nested_svm_init_mmu_context(struct kvm_vcpu *vcpu)
+static void nested_svm_init_mmu_context(struct kvm_vcpu *vcpu)
 {
-       int r;
-
-       r = kvm_init_shadow_mmu(vcpu, &vcpu->arch.mmu);
+       kvm_init_shadow_mmu(vcpu, &vcpu->arch.mmu);
 
        vcpu->arch.mmu.set_cr3           = nested_svm_set_tdp_cr3;
        vcpu->arch.mmu.get_cr3           = nested_svm_get_tdp_cr3;
@@ -1971,8 +1969,6 @@ static int nested_svm_init_mmu_context(struct kvm_vcpu *vcpu)
        vcpu->arch.mmu.inject_page_fault = nested_svm_inject_npf_exit;
        vcpu->arch.mmu.shadow_root_level = get_npt_level();
        vcpu->arch.walk_mmu              = &vcpu->arch.nested_mmu;
-
-       return r;
 }
 
 static void nested_svm_uninit_mmu_context(struct kvm_vcpu *vcpu)
index 2b2fce1b200900b1af42865f946d5faa25fdc56a..06fd7629068ac6ddd3adde76dbec5aff7d39bf29 100644 (file)
@@ -1898,16 +1898,12 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
 /*
  * KVM wants to inject page-faults which it got to the guest. This function
  * checks whether in a nested guest, we need to inject them to L1 or L2.
- * This function assumes it is called with the exit reason in vmcs02 being
- * a #PF exception (this is the only case in which KVM injects a #PF when L2
- * is running).
  */
-static int nested_pf_handled(struct kvm_vcpu *vcpu)
+static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned nr)
 {
        struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
 
-       /* TODO: also check PFEC_MATCH/MASK, not just EB.PF. */
-       if (!(vmcs12->exception_bitmap & (1u << PF_VECTOR)))
+       if (!(vmcs12->exception_bitmap & (1u << nr)))
                return 0;
 
        nested_vmx_vmexit(vcpu);
@@ -1921,8 +1917,8 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
        struct vcpu_vmx *vmx = to_vmx(vcpu);
        u32 intr_info = nr | INTR_INFO_VALID_MASK;
 
-       if (nr == PF_VECTOR && is_guest_mode(vcpu) &&
-           !vmx->nested.nested_run_pending && nested_pf_handled(vcpu))
+       if (!reinject && is_guest_mode(vcpu) &&
+           nested_vmx_check_exception(vcpu, nr))
                return;
 
        if (has_error_code) {
@@ -2204,9 +2200,15 @@ static __init void nested_vmx_setup_ctls_msrs(void)
 #ifdef CONFIG_X86_64
                VM_EXIT_HOST_ADDR_SPACE_SIZE |
 #endif
-               VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT;
+               VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT |
+               VM_EXIT_SAVE_VMX_PREEMPTION_TIMER;
+       if (!(nested_vmx_pinbased_ctls_high & PIN_BASED_VMX_PREEMPTION_TIMER) ||
+           !(nested_vmx_exit_ctls_high & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)) {
+               nested_vmx_exit_ctls_high &= ~VM_EXIT_SAVE_VMX_PREEMPTION_TIMER;
+               nested_vmx_pinbased_ctls_high &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
+       }
        nested_vmx_exit_ctls_high |= (VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR |
-                                     VM_EXIT_LOAD_IA32_EFER);
+               VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER);
 
        /* entry controls */
        rdmsr(MSR_IA32_VMX_ENTRY_CTLS,
@@ -2252,6 +2254,7 @@ static __init void nested_vmx_setup_ctls_msrs(void)
        nested_vmx_secondary_ctls_low = 0;
        nested_vmx_secondary_ctls_high &=
                SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
+               SECONDARY_EXEC_UNRESTRICTED_GUEST |
                SECONDARY_EXEC_WBINVD_EXITING;
 
        if (enable_ept) {
@@ -3380,8 +3383,10 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
        if (enable_ept) {
                eptp = construct_eptp(cr3);
                vmcs_write64(EPT_POINTER, eptp);
-               guest_cr3 = is_paging(vcpu) ? kvm_read_cr3(vcpu) :
-                       vcpu->kvm->arch.ept_identity_map_addr;
+               if (is_paging(vcpu) || is_guest_mode(vcpu))
+                       guest_cr3 = kvm_read_cr3(vcpu);
+               else
+                       guest_cr3 = vcpu->kvm->arch.ept_identity_map_addr;
                ept_load_pdptrs(vcpu);
        }
 
@@ -4879,6 +4884,17 @@ vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)
        hypercall[2] = 0xc1;
 }
 
+static bool nested_cr0_valid(struct vmcs12 *vmcs12, unsigned long val)
+{
+       unsigned long always_on = VMXON_CR0_ALWAYSON;
+
+       if (nested_vmx_secondary_ctls_high &
+               SECONDARY_EXEC_UNRESTRICTED_GUEST &&
+           nested_cpu_has2(vmcs12, SECONDARY_EXEC_UNRESTRICTED_GUEST))
+               always_on &= ~(X86_CR0_PE | X86_CR0_PG);
+       return (val & always_on) == always_on;
+}
+
 /* called to set cr0 as appropriate for a mov-to-cr0 exit. */
 static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val)
 {
@@ -4897,9 +4913,7 @@ static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val)
                val = (val & ~vmcs12->cr0_guest_host_mask) |
                        (vmcs12->guest_cr0 & vmcs12->cr0_guest_host_mask);
 
-               /* TODO: will have to take unrestricted guest mode into
-                * account */
-               if ((val & VMXON_CR0_ALWAYSON) != VMXON_CR0_ALWAYSON)
+               if (!nested_cr0_valid(vmcs12, val))
                        return 1;
 
                if (kvm_set_cr0(vcpu, val))
@@ -6722,6 +6736,27 @@ static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
        *info2 = vmcs_read32(VM_EXIT_INTR_INFO);
 }
 
+static void nested_adjust_preemption_timer(struct kvm_vcpu *vcpu)
+{
+       u64 delta_tsc_l1;
+       u32 preempt_val_l1, preempt_val_l2, preempt_scale;
+
+       if (!(get_vmcs12(vcpu)->pin_based_vm_exec_control &
+                       PIN_BASED_VMX_PREEMPTION_TIMER))
+               return;
+       preempt_scale = native_read_msr(MSR_IA32_VMX_MISC) &
+                       MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE;
+       preempt_val_l2 = vmcs_read32(VMX_PREEMPTION_TIMER_VALUE);
+       delta_tsc_l1 = vmx_read_l1_tsc(vcpu, native_read_tsc())
+               - vcpu->arch.last_guest_tsc;
+       preempt_val_l1 = delta_tsc_l1 >> preempt_scale;
+       if (preempt_val_l2 <= preempt_val_l1)
+               preempt_val_l2 = 0;
+       else
+               preempt_val_l2 -= preempt_val_l1;
+       vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, preempt_val_l2);
+}
+
 /*
  * The guest has exited.  See if we can fix it or if we need userspace
  * assistance.
@@ -6736,20 +6771,6 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
        if (vmx->emulation_required)
                return handle_invalid_guest_state(vcpu);
 
-       /*
-        * the KVM_REQ_EVENT optimization bit is only on for one entry, and if
-        * we did not inject a still-pending event to L1 now because of
-        * nested_run_pending, we need to re-enable this bit.
-        */
-       if (vmx->nested.nested_run_pending)
-               kvm_make_request(KVM_REQ_EVENT, vcpu);
-
-       if (!is_guest_mode(vcpu) && (exit_reason == EXIT_REASON_VMLAUNCH ||
-           exit_reason == EXIT_REASON_VMRESUME))
-               vmx->nested.nested_run_pending = 1;
-       else
-               vmx->nested.nested_run_pending = 0;
-
        if (is_guest_mode(vcpu) && nested_vmx_exit_handled(vcpu)) {
                nested_vmx_vmexit(vcpu);
                return 1;
@@ -7061,9 +7082,9 @@ static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu,
        case INTR_TYPE_HARD_EXCEPTION:
                if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) {
                        u32 err = vmcs_read32(error_code_field);
-                       kvm_queue_exception_e(vcpu, vector, err);
+                       kvm_requeue_exception_e(vcpu, vector, err);
                } else
-                       kvm_queue_exception(vcpu, vector);
+                       kvm_requeue_exception(vcpu, vector);
                break;
        case INTR_TYPE_SOFT_INTR:
                vcpu->arch.event_exit_inst_len = vmcs_read32(instr_len_field);
@@ -7146,6 +7167,8 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
        atomic_switch_perf_msrs(vmx);
        debugctlmsr = get_debugctlmsr();
 
+       if (is_guest_mode(vcpu) && !vmx->nested.nested_run_pending)
+               nested_adjust_preemption_timer(vcpu);
        vmx->__launched = vmx->loaded_vmcs->launched;
        asm(
                /* Store host registers */
@@ -7284,6 +7307,16 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
        vmx->exit_reason = vmcs_read32(VM_EXIT_REASON);
        trace_kvm_exit(vmx->exit_reason, vcpu, KVM_ISA_VMX);
 
+       /*
+        * the KVM_REQ_EVENT optimization bit is only on for one entry, and if
+        * we did not inject a still-pending event to L1 now because of
+        * nested_run_pending, we need to re-enable this bit.
+        */
+       if (vmx->nested.nested_run_pending)
+               kvm_make_request(KVM_REQ_EVENT, vcpu);
+
+       vmx->nested.nested_run_pending = 0;
+
        vmx_complete_atomic_exit(vmx);
        vmx_recover_nmi_blocking(vmx);
        vmx_complete_interrupts(vmx);
@@ -7501,9 +7534,9 @@ static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu)
        return get_vmcs12(vcpu)->ept_pointer;
 }
 
-static int nested_ept_init_mmu_context(struct kvm_vcpu *vcpu)
+static void nested_ept_init_mmu_context(struct kvm_vcpu *vcpu)
 {
-       int r = kvm_init_shadow_ept_mmu(vcpu, &vcpu->arch.mmu,
+       kvm_init_shadow_ept_mmu(vcpu, &vcpu->arch.mmu,
                        nested_vmx_ept_caps & VMX_EPT_EXECUTE_ONLY_BIT);
 
        vcpu->arch.mmu.set_cr3           = vmx_set_cr3;
@@ -7511,8 +7544,6 @@ static int nested_ept_init_mmu_context(struct kvm_vcpu *vcpu)
        vcpu->arch.mmu.inject_page_fault = nested_ept_inject_page_fault;
 
        vcpu->arch.walk_mmu              = &vcpu->arch.nested_mmu;
-
-       return r;
 }
 
 static void nested_ept_uninit_mmu_context(struct kvm_vcpu *vcpu)
@@ -7520,6 +7551,20 @@ static void nested_ept_uninit_mmu_context(struct kvm_vcpu *vcpu)
        vcpu->arch.walk_mmu = &vcpu->arch.mmu;
 }
 
+static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu,
+               struct x86_exception *fault)
+{
+       struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+
+       WARN_ON(!is_guest_mode(vcpu));
+
+       /* TODO: also check PFEC_MATCH/MASK, not just EB.PF. */
+       if (vmcs12->exception_bitmap & (1u << PF_VECTOR))
+               nested_vmx_vmexit(vcpu);
+       else
+               kvm_inject_page_fault(vcpu, fault);
+}
+
 /*
  * prepare_vmcs02 is called when the L1 guest hypervisor runs its nested
  * L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it
@@ -7533,6 +7578,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
        u32 exec_control;
+       u32 exit_control;
 
        vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector);
        vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector);
@@ -7706,7 +7752,10 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
         * we should use its exit controls. Note that VM_EXIT_LOAD_IA32_EFER
         * bits are further modified by vmx_set_efer() below.
         */
-       vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl);
+       exit_control = vmcs_config.vmexit_ctrl;
+       if (vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER)
+               exit_control |= VM_EXIT_SAVE_VMX_PREEMPTION_TIMER;
+       vmcs_write32(VM_EXIT_CONTROLS, exit_control);
 
        /* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are
         * emulated by vmx_set_efer(), below.
@@ -7773,6 +7822,9 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
        kvm_set_cr3(vcpu, vmcs12->guest_cr3);
        kvm_mmu_reset_context(vcpu);
 
+       if (!enable_ept)
+               vcpu->arch.walk_mmu->inject_page_fault = vmx_inject_page_fault_nested;
+
        /*
         * L1 may access the L2's PDPTR, so save them to construct vmcs12
         */
@@ -7876,7 +7928,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
                return 1;
        }
 
-       if (((vmcs12->guest_cr0 & VMXON_CR0_ALWAYSON) != VMXON_CR0_ALWAYSON) ||
+       if (!nested_cr0_valid(vmcs12, vmcs12->guest_cr0) ||
            ((vmcs12->guest_cr4 & VMXON_CR4_ALWAYSON) != VMXON_CR4_ALWAYSON)) {
                nested_vmx_entry_failure(vcpu, vmcs12,
                        EXIT_REASON_INVALID_STATE, ENTRY_FAIL_DEFAULT);
@@ -7938,6 +7990,8 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
 
        enter_guest_mode(vcpu);
 
+       vmx->nested.nested_run_pending = 1;
+
        vmx->nested.vmcs01_tsc_offset = vmcs_read64(TSC_OFFSET);
 
        cpu = get_cpu();
@@ -8005,7 +8059,7 @@ static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu,
        u32 idt_vectoring;
        unsigned int nr;
 
-       if (vcpu->arch.exception.pending) {
+       if (vcpu->arch.exception.pending && vcpu->arch.exception.reinject) {
                nr = vcpu->arch.exception.nr;
                idt_vectoring = nr | VECTORING_INFO_VALID_MASK;
 
@@ -8105,6 +8159,11 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
        vmcs12->guest_pending_dbg_exceptions =
                vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS);
 
+       if ((vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) &&
+           (vmcs12->vm_exit_controls & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER))
+               vmcs12->vmx_preemption_timer_value =
+                       vmcs_read32(VMX_PREEMPTION_TIMER_VALUE);
+
        /*
         * In some cases (usually, nested EPT), L2 is allowed to change its
         * own CR3 without exiting. If it has changed it, we must keep it.
@@ -8130,6 +8189,8 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
        vmcs12->guest_ia32_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
        if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_PAT)
                vmcs12->guest_ia32_pat = vmcs_read64(GUEST_IA32_PAT);
+       if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_EFER)
+               vmcs12->guest_ia32_efer = vcpu->arch.efer;
        vmcs12->guest_sysenter_cs = vmcs_read32(GUEST_SYSENTER_CS);
        vmcs12->guest_sysenter_esp = vmcs_readl(GUEST_SYSENTER_ESP);
        vmcs12->guest_sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP);
@@ -8201,7 +8262,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
         * fpu_active (which may have changed).
         * Note that vmx_set_cr0 refers to efer set above.
         */
-       kvm_set_cr0(vcpu, vmcs12->host_cr0);
+       vmx_set_cr0(vcpu, vmcs12->host_cr0);
        /*
         * If we did fpu_activate()/fpu_deactivate() during L2's run, we need
         * to apply the same changes to L1's vmcs. We just set cr0 correctly,
@@ -8224,6 +8285,9 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
        kvm_set_cr3(vcpu, vmcs12->host_cr3);
        kvm_mmu_reset_context(vcpu);
 
+       if (!enable_ept)
+               vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault;
+
        if (enable_vpid) {
                /*
                 * Trivially support vpid by letting L2s share their parent
index e5ca72a5cdb6da13617033ad8c0c65c4391d9e2f..edf2a07df3a3e01ba42b85d2e2dc1d1bc146d11e 100644 (file)
@@ -586,7 +586,7 @@ int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
                return 1;
        if ((xcr0 & XSTATE_YMM) && !(xcr0 & XSTATE_SSE))
                return 1;
-       if (xcr0 & ~host_xcr0)
+       if (xcr0 & ~vcpu->arch.guest_supported_xcr0)
                return 1;
        kvm_put_guest_xcr0(vcpu);
        vcpu->arch.xcr0 = xcr0;
@@ -684,7 +684,7 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
 
        vcpu->arch.cr3 = cr3;
        __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
-       vcpu->arch.mmu.new_cr3(vcpu);
+       kvm_mmu_new_cr3(vcpu);
        return 0;
 }
 EXPORT_SYMBOL_GPL(kvm_set_cr3);
@@ -2984,11 +2984,13 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
 static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
                                         struct kvm_xsave *guest_xsave)
 {
-       if (cpu_has_xsave)
+       if (cpu_has_xsave) {
                memcpy(guest_xsave->region,
                        &vcpu->arch.guest_fpu.state->xsave,
-                       xstate_size);
-       else {
+                       vcpu->arch.guest_xstate_size);
+               *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)] &=
+                       vcpu->arch.guest_supported_xcr0 | XSTATE_FPSSE;
+       } else {
                memcpy(guest_xsave->region,
                        &vcpu->arch.guest_fpu.state->fxsave,
                        sizeof(struct i387_fxsave_struct));
@@ -3003,10 +3005,19 @@ static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
        u64 xstate_bv =
                *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)];
 
-       if (cpu_has_xsave)
+       if (cpu_has_xsave) {
+               /*
+                * Here we allow setting states that are not present in
+                * CPUID leaf 0xD, index 0, EDX:EAX.  This is for compatibility
+                * with old userspace.
+                */
+               if (xstate_bv & ~KVM_SUPPORTED_XCR0)
+                       return -EINVAL;
+               if (xstate_bv & ~host_xcr0)
+                       return -EINVAL;
                memcpy(&vcpu->arch.guest_fpu.state->xsave,
-                       guest_xsave->region, xstate_size);
-       else {
+                       guest_xsave->region, vcpu->arch.guest_xstate_size);
+       else {
                if (xstate_bv & ~XSTATE_FPSSE)
                        return -EINVAL;
                memcpy(&vcpu->arch.guest_fpu.state->fxsave,
@@ -5263,7 +5274,7 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va
 
        smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1);
 
-       raw_spin_lock(&kvm_lock);
+       spin_lock(&kvm_lock);
        list_for_each_entry(kvm, &vm_list, vm_list) {
                kvm_for_each_vcpu(i, vcpu, kvm) {
                        if (vcpu->cpu != freq->cpu)
@@ -5273,7 +5284,7 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va
                                send_ipi = 1;
                }
        }
-       raw_spin_unlock(&kvm_lock);
+       spin_unlock(&kvm_lock);
 
        if (freq->old < freq->new && send_ipi) {
                /*
@@ -5426,12 +5437,12 @@ static void pvclock_gtod_update_fn(struct work_struct *work)
        struct kvm_vcpu *vcpu;
        int i;
 
-       raw_spin_lock(&kvm_lock);
+       spin_lock(&kvm_lock);
        list_for_each_entry(kvm, &vm_list, vm_list)
                kvm_for_each_vcpu(i, vcpu, kvm)
                        set_bit(KVM_REQ_MASTERCLOCK_UPDATE, &vcpu->requests);
        atomic_set(&kvm_guest_has_master_clock, 0);
-       raw_spin_unlock(&kvm_lock);
+       spin_unlock(&kvm_lock);
 }
 
 static DECLARE_WORK(pvclock_gtod_work, pvclock_gtod_update_fn);
@@ -6688,7 +6699,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
        if (r)
                return r;
        kvm_vcpu_reset(vcpu);
-       r = kvm_mmu_setup(vcpu);
+       kvm_mmu_setup(vcpu);
        vcpu_put(vcpu);
 
        return r;
@@ -6940,6 +6951,10 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 
        vcpu->arch.ia32_tsc_adjust_msr = 0x0;
        vcpu->arch.pv_time_enabled = false;
+
+       vcpu->arch.guest_supported_xcr0 = 0;
+       vcpu->arch.guest_xstate_size = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET;
+
        kvm_async_pf_hash_reset(vcpu);
        kvm_pmu_init(vcpu);
 
@@ -7283,7 +7298,7 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work)
        int r;
 
        if ((vcpu->arch.mmu.direct_map != work->arch.direct_map) ||
-             is_error_page(work->page))
+             work->wakeup_all)
                return;
 
        r = kvm_mmu_reload(vcpu);
@@ -7393,7 +7408,7 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
        struct x86_exception fault;
 
        trace_kvm_async_pf_ready(work->arch.token, work->gva);
-       if (is_error_page(work->page))
+       if (work->wakeup_all)
                work->arch.token = ~0; /* broadcast wakeup */
        else
                kvm_del_async_pf_gfn(vcpu, work->arch.gfn);
index e224f7a671b66fb23a291cdf25c5eda7f4bdd157..587fb9ede436076050b71592a0eb1ee562dc8517 100644 (file)
@@ -122,6 +122,7 @@ int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt,
        gva_t addr, void *val, unsigned int bytes,
        struct x86_exception *exception);
 
+#define KVM_SUPPORTED_XCR0     (XSTATE_FP | XSTATE_SSE | XSTATE_YMM)
 extern u64 host_xcr0;
 
 extern struct static_key kvm_no_apic_vcpu;
index 0fbbc7aa02cb17c9c7d1cc5b5a17330d58858c10..c9d4236ab442d0ba90f3b96dd520d87f1ec59a7c 100644 (file)
@@ -142,7 +142,7 @@ struct kvm;
 struct kvm_vcpu;
 extern struct kmem_cache *kvm_vcpu_cache;
 
-extern raw_spinlock_t kvm_lock;
+extern spinlock_t kvm_lock;
 extern struct list_head vm_list;
 
 struct kvm_io_range {
@@ -189,8 +189,7 @@ struct kvm_async_pf {
        gva_t gva;
        unsigned long addr;
        struct kvm_arch_async_pf arch;
-       struct page *page;
-       bool done;
+       bool   wakeup_all;
 };
 
 void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu);
@@ -842,13 +841,6 @@ static inline int memslot_id(struct kvm *kvm, gfn_t gfn)
        return gfn_to_memslot(kvm, gfn)->id;
 }
 
-static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level)
-{
-       /* KVM_HPAGE_GFN_SHIFT(PT_PAGE_TABLE_LEVEL) must be 0. */
-       return (gfn >> KVM_HPAGE_GFN_SHIFT(level)) -
-               (base_gfn >> KVM_HPAGE_GFN_SHIFT(level));
-}
-
 static inline gfn_t
 hva_to_gfn_memslot(unsigned long hva, struct kvm_memory_slot *slot)
 {
index 7005d1109ec94c8c2839bbff6984395e20e8a873..131a0bda7aecec634b61ac72078d07d31eb1602c 100644 (file)
@@ -296,23 +296,21 @@ DEFINE_EVENT(kvm_async_pf_nopresent_ready, kvm_async_pf_ready,
 
 TRACE_EVENT(
        kvm_async_pf_completed,
-       TP_PROTO(unsigned long address, struct page *page, u64 gva),
-       TP_ARGS(address, page, gva),
+       TP_PROTO(unsigned long address, u64 gva),
+       TP_ARGS(address, gva),
 
        TP_STRUCT__entry(
                __field(unsigned long, address)
-               __field(pfn_t, pfn)
                __field(u64, gva)
                ),
 
        TP_fast_assign(
                __entry->address = address;
-               __entry->pfn = page ? page_to_pfn(page) : 0;
                __entry->gva = gva;
                ),
 
-       TP_printk("gva %#llx address %#lx pfn %#llx",  __entry->gva,
-                 __entry->address, __entry->pfn)
+       TP_printk("gva %#llx address %#lx",  __entry->gva,
+                 __entry->address)
 );
 
 #endif
index 99c25338ede88c755698fe892983665decb005bc..e32e776f20c084c9295d02f044d7b17450cb5c73 100644 (file)
@@ -1012,6 +1012,7 @@ struct kvm_s390_ucas_mapping {
 /* VM is being stopped by host */
 #define KVM_KVMCLOCK_CTRL        _IO(KVMIO,   0xad)
 #define KVM_ARM_VCPU_INIT        _IOW(KVMIO,  0xae, struct kvm_vcpu_init)
+#define KVM_ARM_PREFERRED_TARGET  _IOR(KVMIO,  0xaf, struct kvm_vcpu_init)
 #define KVM_GET_REG_LIST         _IOWR(KVMIO, 0xb0, struct kvm_reg_list)
 
 #define KVM_DEV_ASSIGN_ENABLE_IOMMU    (1 << 0)
index 8a39dda7a3254677df5996c07c965a2f9f5ef8a8..8631d9c14320bea69b4e9713013ab54b2e3752be 100644 (file)
@@ -56,7 +56,6 @@ void kvm_async_pf_vcpu_init(struct kvm_vcpu *vcpu)
 
 static void async_pf_execute(struct work_struct *work)
 {
-       struct page *page = NULL;
        struct kvm_async_pf *apf =
                container_of(work, struct kvm_async_pf, work);
        struct mm_struct *mm = apf->mm;
@@ -68,14 +67,12 @@ static void async_pf_execute(struct work_struct *work)
 
        use_mm(mm);
        down_read(&mm->mmap_sem);
-       get_user_pages(current, mm, addr, 1, 1, 0, &page, NULL);
+       get_user_pages(current, mm, addr, 1, 1, 0, NULL, NULL);
        up_read(&mm->mmap_sem);
        unuse_mm(mm);
 
        spin_lock(&vcpu->async_pf.lock);
        list_add_tail(&apf->link, &vcpu->async_pf.done);
-       apf->page = page;
-       apf->done = true;
        spin_unlock(&vcpu->async_pf.lock);
 
        /*
@@ -83,7 +80,7 @@ static void async_pf_execute(struct work_struct *work)
         * this point
         */
 
-       trace_kvm_async_pf_completed(addr, page, gva);
+       trace_kvm_async_pf_completed(addr, gva);
 
        if (waitqueue_active(&vcpu->wq))
                wake_up_interruptible(&vcpu->wq);
@@ -99,9 +96,8 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu)
                struct kvm_async_pf *work =
                        list_entry(vcpu->async_pf.queue.next,
                                   typeof(*work), queue);
-               cancel_work_sync(&work->work);
                list_del(&work->queue);
-               if (!work->done) { /* work was canceled */
+               if (cancel_work_sync(&work->work)) {
                        mmdrop(work->mm);
                        kvm_put_kvm(vcpu->kvm); /* == work->vcpu->kvm */
                        kmem_cache_free(async_pf_cache, work);
@@ -114,8 +110,6 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu)
                        list_entry(vcpu->async_pf.done.next,
                                   typeof(*work), link);
                list_del(&work->link);
-               if (!is_error_page(work->page))
-                       kvm_release_page_clean(work->page);
                kmem_cache_free(async_pf_cache, work);
        }
        spin_unlock(&vcpu->async_pf.lock);
@@ -135,14 +129,11 @@ void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu)
                list_del(&work->link);
                spin_unlock(&vcpu->async_pf.lock);
 
-               if (work->page)
-                       kvm_arch_async_page_ready(vcpu, work);
+               kvm_arch_async_page_ready(vcpu, work);
                kvm_arch_async_page_present(vcpu, work);
 
                list_del(&work->queue);
                vcpu->async_pf.queued--;
-               if (!is_error_page(work->page))
-                       kvm_release_page_clean(work->page);
                kmem_cache_free(async_pf_cache, work);
        }
 }
@@ -165,8 +156,7 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn,
        if (!work)
                return 0;
 
-       work->page = NULL;
-       work->done = false;
+       work->wakeup_all = false;
        work->vcpu = vcpu;
        work->gva = gva;
        work->addr = gfn_to_hva(vcpu->kvm, gfn);
@@ -206,7 +196,7 @@ int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu)
        if (!work)
                return -ENOMEM;
 
-       work->page = KVM_ERR_PTR_BAD_PAGE;
+       work->wakeup_all = true;
        INIT_LIST_HEAD(&work->queue); /* for list_del to work */
 
        spin_lock(&vcpu->async_pf.lock);
index 72a130bc448aa97adb1ca35a2dfff09a102b717d..a3b14109049befc4da95f97658427a9bef6bbd30 100644 (file)
@@ -190,11 +190,7 @@ int kvm_assign_device(struct kvm *kvm,
 
        pdev->dev_flags |= PCI_DEV_FLAGS_ASSIGNED;
 
-       printk(KERN_DEBUG "assign device %x:%x:%x.%x\n",
-               assigned_dev->host_segnr,
-               assigned_dev->host_busnr,
-               PCI_SLOT(assigned_dev->host_devfn),
-               PCI_FUNC(assigned_dev->host_devfn));
+       dev_info(&pdev->dev, "kvm assign device\n");
 
        return 0;
 out_unmap:
@@ -220,11 +216,7 @@ int kvm_deassign_device(struct kvm *kvm,
 
        pdev->dev_flags &= ~PCI_DEV_FLAGS_ASSIGNED;
 
-       printk(KERN_DEBUG "deassign device %x:%x:%x.%x\n",
-               assigned_dev->host_segnr,
-               assigned_dev->host_busnr,
-               PCI_SLOT(assigned_dev->host_devfn),
-               PCI_FUNC(assigned_dev->host_devfn));
+       dev_info(&pdev->dev, "kvm deassign device\n");
 
        return 0;
 }
index a9dd682cf5e3f5117de017156396337a8352914f..0d20c320a33daa5b20710e2fa0f7c3ab18390d27 100644 (file)
@@ -70,7 +70,8 @@ MODULE_LICENSE("GPL");
  *             kvm->lock --> kvm->slots_lock --> kvm->irq_lock
  */
 
-DEFINE_RAW_SPINLOCK(kvm_lock);
+DEFINE_SPINLOCK(kvm_lock);
+static DEFINE_RAW_SPINLOCK(kvm_count_lock);
 LIST_HEAD(vm_list);
 
 static cpumask_var_t cpus_hardware_enabled;
@@ -490,9 +491,9 @@ static struct kvm *kvm_create_vm(unsigned long type)
        if (r)
                goto out_err;
 
-       raw_spin_lock(&kvm_lock);
+       spin_lock(&kvm_lock);
        list_add(&kvm->vm_list, &vm_list);
-       raw_spin_unlock(&kvm_lock);
+       spin_unlock(&kvm_lock);
 
        return kvm;
 
@@ -581,9 +582,9 @@ static void kvm_destroy_vm(struct kvm *kvm)
        struct mm_struct *mm = kvm->mm;
 
        kvm_arch_sync_events(kvm);
-       raw_spin_lock(&kvm_lock);
+       spin_lock(&kvm_lock);
        list_del(&kvm->vm_list);
-       raw_spin_unlock(&kvm_lock);
+       spin_unlock(&kvm_lock);
        kvm_free_irq_routing(kvm);
        for (i = 0; i < KVM_NR_BUSES; i++)
                kvm_io_bus_destroy(kvm->buses[i]);
@@ -2683,11 +2684,12 @@ static void hardware_enable_nolock(void *junk)
        }
 }
 
-static void hardware_enable(void *junk)
+static void hardware_enable(void)
 {
-       raw_spin_lock(&kvm_lock);
-       hardware_enable_nolock(junk);
-       raw_spin_unlock(&kvm_lock);
+       raw_spin_lock(&kvm_count_lock);
+       if (kvm_usage_count)
+               hardware_enable_nolock(NULL);
+       raw_spin_unlock(&kvm_count_lock);
 }
 
 static void hardware_disable_nolock(void *junk)
@@ -2700,11 +2702,12 @@ static void hardware_disable_nolock(void *junk)
        kvm_arch_hardware_disable(NULL);
 }
 
-static void hardware_disable(void *junk)
+static void hardware_disable(void)
 {
-       raw_spin_lock(&kvm_lock);
-       hardware_disable_nolock(junk);
-       raw_spin_unlock(&kvm_lock);
+       raw_spin_lock(&kvm_count_lock);
+       if (kvm_usage_count)
+               hardware_disable_nolock(NULL);
+       raw_spin_unlock(&kvm_count_lock);
 }
 
 static void hardware_disable_all_nolock(void)
@@ -2718,16 +2721,16 @@ static void hardware_disable_all_nolock(void)
 
 static void hardware_disable_all(void)
 {
-       raw_spin_lock(&kvm_lock);
+       raw_spin_lock(&kvm_count_lock);
        hardware_disable_all_nolock();
-       raw_spin_unlock(&kvm_lock);
+       raw_spin_unlock(&kvm_count_lock);
 }
 
 static int hardware_enable_all(void)
 {
        int r = 0;
 
-       raw_spin_lock(&kvm_lock);
+       raw_spin_lock(&kvm_count_lock);
 
        kvm_usage_count++;
        if (kvm_usage_count == 1) {
@@ -2740,7 +2743,7 @@ static int hardware_enable_all(void)
                }
        }
 
-       raw_spin_unlock(&kvm_lock);
+       raw_spin_unlock(&kvm_count_lock);
 
        return r;
 }
@@ -2750,20 +2753,17 @@ static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val,
 {
        int cpu = (long)v;
 
-       if (!kvm_usage_count)
-               return NOTIFY_OK;
-
        val &= ~CPU_TASKS_FROZEN;
        switch (val) {
        case CPU_DYING:
                printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n",
                       cpu);
-               hardware_disable(NULL);
+               hardware_disable();
                break;
        case CPU_STARTING:
                printk(KERN_INFO "kvm: enabling virtualization on CPU%d\n",
                       cpu);
-               hardware_enable(NULL);
+               hardware_enable();
                break;
        }
        return NOTIFY_OK;
@@ -3056,10 +3056,10 @@ static int vm_stat_get(void *_offset, u64 *val)
        struct kvm *kvm;
 
        *val = 0;
-       raw_spin_lock(&kvm_lock);
+       spin_lock(&kvm_lock);
        list_for_each_entry(kvm, &vm_list, vm_list)
                *val += *(u32 *)((void *)kvm + offset);
-       raw_spin_unlock(&kvm_lock);
+       spin_unlock(&kvm_lock);
        return 0;
 }
 
@@ -3073,12 +3073,12 @@ static int vcpu_stat_get(void *_offset, u64 *val)
        int i;
 
        *val = 0;
-       raw_spin_lock(&kvm_lock);
+       spin_lock(&kvm_lock);
        list_for_each_entry(kvm, &vm_list, vm_list)
                kvm_for_each_vcpu(i, vcpu, kvm)
                        *val += *(u32 *)((void *)vcpu + offset);
 
-       raw_spin_unlock(&kvm_lock);
+       spin_unlock(&kvm_lock);
        return 0;
 }
 
@@ -3133,7 +3133,7 @@ static int kvm_suspend(void)
 static void kvm_resume(void)
 {
        if (kvm_usage_count) {
-               WARN_ON(raw_spin_is_locked(&kvm_lock));
+               WARN_ON(raw_spin_is_locked(&kvm_count_lock));
                hardware_enable_nolock(NULL);
        }
 }