]> git.karo-electronics.de Git - karo-tx-linux.git/commitdiff
Merge tag 'kvm-s390-next-4.12-3' of git://git.kernel.org/pub/scm/linux/kernel/git...
authorPaolo Bonzini <pbonzini@redhat.com>
Thu, 27 Apr 2017 12:11:07 +0000 (14:11 +0200)
committerPaolo Bonzini <pbonzini@redhat.com>
Thu, 27 Apr 2017 12:11:07 +0000 (14:11 +0200)
KVM: s390: MSA8 feature for guests

- Detect all function codes for KMA and export the features
  for use in the cpu model

73 files changed:
Documentation/virtual/kvm/api.txt
Documentation/virtual/kvm/devices/vfio.txt
arch/powerpc/include/asm/disassemble.h
arch/powerpc/include/asm/iommu.h
arch/powerpc/include/asm/kvm_host.h
arch/powerpc/include/asm/kvm_ppc.h
arch/powerpc/include/asm/mmu_context.h
arch/powerpc/include/asm/ppc-opcode.h
arch/powerpc/kernel/iommu.c
arch/powerpc/kvm/Kconfig
arch/powerpc/kvm/book3s.c
arch/powerpc/kvm/book3s_64_mmu.c
arch/powerpc/kvm/book3s_64_mmu_host.c
arch/powerpc/kvm/book3s_64_vio.c
arch/powerpc/kvm/book3s_64_vio_hv.c
arch/powerpc/kvm/book3s_emulate.c
arch/powerpc/kvm/book3s_hv.c
arch/powerpc/kvm/book3s_pr.c
arch/powerpc/kvm/booke.c
arch/powerpc/kvm/e500_mmu_host.c
arch/powerpc/kvm/emulate.c
arch/powerpc/kvm/emulate_loadstore.c
arch/powerpc/kvm/powerpc.c
arch/powerpc/mm/mmu_context_iommu.c
arch/powerpc/platforms/powernv/pci-ioda.c
arch/powerpc/platforms/powernv/pci.c
arch/powerpc/platforms/pseries/iommu.c
arch/powerpc/platforms/pseries/vio.c
arch/um/include/shared/os.h
arch/x86/entry/syscalls/syscall_32.tbl
arch/x86/include/asm/cpufeatures.h
arch/x86/include/asm/kvm_host.h
arch/x86/include/asm/msr-index.h
arch/x86/include/asm/processor.h
arch/x86/include/asm/proto.h
arch/x86/include/asm/thread_info.h
arch/x86/include/asm/tlbflush.h
arch/x86/include/uapi/asm/prctl.h
arch/x86/kernel/cpu/intel.c
arch/x86/kernel/kvm.c
arch/x86/kernel/process.c
arch/x86/kernel/process_32.c
arch/x86/kernel/process_64.c
arch/x86/kernel/ptrace.c
arch/x86/kvm/cpuid.c
arch/x86/kvm/cpuid.h
arch/x86/kvm/emulate.c
arch/x86/kvm/i8259.c
arch/x86/kvm/ioapic.c
arch/x86/kvm/ioapic.h
arch/x86/kvm/irq.c
arch/x86/kvm/irq.h
arch/x86/kvm/irq_comm.c
arch/x86/kvm/svm.c
arch/x86/kvm/vmx.c
arch/x86/kvm/x86.c
arch/x86/kvm/x86.h
arch/x86/um/Makefile
arch/x86/um/asm/ptrace.h
arch/x86/um/os-Linux/prctl.c
arch/x86/um/syscalls_32.c [new file with mode: 0644]
arch/x86/um/syscalls_64.c
drivers/vfio/vfio_iommu_spapr_tce.c
fs/exec.c
include/linux/compat.h
include/linux/kvm_host.h
include/linux/thread_info.h
include/uapi/linux/kvm.h
scripts/checksyscalls.sh
virt/kvm/eventfd.c
virt/kvm/irqchip.c
virt/kvm/kvm_main.c
virt/kvm/vfio.c

index e60be91d8036629110cd1f63751159912b81238f..dc674c2b8b31f8476cd861ed2b3984f406647966 100644 (file)
@@ -4111,3 +4111,12 @@ reserved.
  2: MIPS64 or microMIPS64 with access to all address segments.
     Both registers and addresses are 64-bits wide.
     It will be possible to run 64-bit or 32-bit guest code.
+
+8.8 KVM_CAP_X86_GUEST_MWAIT
+
+Architectures: x86
+
+This capability indicates that guest using memory monotoring instructions
+(MWAIT/MWAITX) to stop the virtual CPU will not cause a VM exit.  As such time
+spent while virtual CPU is halted in this way will then be accounted for as
+guest running time on the host (as opposed to e.g. HLT).
index ef51740c67ca139153b6af059ae8c3ccfe282fd7..528c77c8022c66e846bb8a41cbacd999fcd069dc 100644 (file)
@@ -16,7 +16,21 @@ Groups:
 
 KVM_DEV_VFIO_GROUP attributes:
   KVM_DEV_VFIO_GROUP_ADD: Add a VFIO group to VFIO-KVM device tracking
+       kvm_device_attr.addr points to an int32_t file descriptor
+       for the VFIO group.
   KVM_DEV_VFIO_GROUP_DEL: Remove a VFIO group from VFIO-KVM device tracking
+       kvm_device_attr.addr points to an int32_t file descriptor
+       for the VFIO group.
+  KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE: attaches a guest visible TCE table
+       allocated by sPAPR KVM.
+       kvm_device_attr.addr points to a struct:
 
-For each, kvm_device_attr.addr points to an int32_t file descriptor
-for the VFIO group.
+       struct kvm_vfio_spapr_tce {
+               __s32   groupfd;
+               __s32   tablefd;
+       };
+
+       where
+       @groupfd is a file descriptor for a VFIO group;
+       @tablefd is a file descriptor for a TCE table allocated via
+               KVM_CREATE_SPAPR_TCE.
index 4852e849128bf88f057d9af05ff4bdce5fa6976d..c0a55050f70f416247174e6fe2c2a21315e1194a 100644 (file)
@@ -87,6 +87,11 @@ static inline unsigned int get_oc(u32 inst)
        return (inst >> 11) & 0x7fff;
 }
 
+static inline unsigned int get_tx_or_sx(u32 inst)
+{
+       return (inst) & 0x1;
+}
+
 #define IS_XFORM(inst) (get_op(inst)  == 31)
 #define IS_DSFORM(inst)        (get_op(inst) >= 56)
 
index 2c1d5079294434c679ba6dee8f8c0b56714ccc8f..8a8ce220d7d04a0c077f6a8f53401f61f24f35a4 100644 (file)
@@ -64,6 +64,11 @@ struct iommu_table_ops {
                        long index,
                        unsigned long *hpa,
                        enum dma_data_direction *direction);
+       /* Real mode */
+       int (*exchange_rm)(struct iommu_table *tbl,
+                       long index,
+                       unsigned long *hpa,
+                       enum dma_data_direction *direction);
 #endif
        void (*clear)(struct iommu_table *tbl,
                        long index, long npages);
@@ -114,6 +119,7 @@ struct iommu_table {
        struct list_head it_group_list;/* List of iommu_table_group_link */
        unsigned long *it_userspace; /* userspace view of the table */
        struct iommu_table_ops *it_ops;
+       struct kref    it_kref;
 };
 
 #define IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry) \
@@ -146,8 +152,8 @@ static inline void *get_iommu_table_base(struct device *dev)
 
 extern int dma_iommu_dma_supported(struct device *dev, u64 mask);
 
-/* Frees table for an individual device node */
-extern void iommu_free_table(struct iommu_table *tbl, const char *node_name);
+extern struct iommu_table *iommu_tce_table_get(struct iommu_table *tbl);
+extern int iommu_tce_table_put(struct iommu_table *tbl);
 
 /* Initializes an iommu_table based in values set in the passed-in
  * structure
@@ -208,6 +214,8 @@ extern void iommu_del_device(struct device *dev);
 extern int __init tce_iommu_bus_notifier_init(void);
 extern long iommu_tce_xchg(struct iommu_table *tbl, unsigned long entry,
                unsigned long *hpa, enum dma_data_direction *direction);
+extern long iommu_tce_xchg_rm(struct iommu_table *tbl, unsigned long entry,
+               unsigned long *hpa, enum dma_data_direction *direction);
 #else
 static inline void iommu_register_group(struct iommu_table_group *table_group,
                                        int pci_domain_number,
@@ -288,11 +296,21 @@ static inline void iommu_restore(void)
 #endif
 
 /* The API to support IOMMU operations for VFIO */
-extern int iommu_tce_clear_param_check(struct iommu_table *tbl,
-               unsigned long ioba, unsigned long tce_value,
-               unsigned long npages);
-extern int iommu_tce_put_param_check(struct iommu_table *tbl,
-               unsigned long ioba, unsigned long tce);
+extern int iommu_tce_check_ioba(unsigned long page_shift,
+               unsigned long offset, unsigned long size,
+               unsigned long ioba, unsigned long npages);
+extern int iommu_tce_check_gpa(unsigned long page_shift,
+               unsigned long gpa);
+
+#define iommu_tce_clear_param_check(tbl, ioba, tce_value, npages) \
+               (iommu_tce_check_ioba((tbl)->it_page_shift,       \
+                               (tbl)->it_offset, (tbl)->it_size, \
+                               (ioba), (npages)) || (tce_value))
+#define iommu_tce_put_param_check(tbl, ioba, gpa)                 \
+               (iommu_tce_check_ioba((tbl)->it_page_shift,       \
+                               (tbl)->it_offset, (tbl)->it_size, \
+                               (ioba), 1) ||                     \
+               iommu_tce_check_gpa((tbl)->it_page_shift, (gpa)))
 
 extern void iommu_flush_tce(struct iommu_table *tbl);
 extern int iommu_take_ownership(struct iommu_table *tbl);
index 01d05c76f1c7ddc5da990c5d7eaf312bc77d1017..77c60826d1456c1d63c4df46eba0a7ad14c3bab0 100644 (file)
@@ -188,6 +188,13 @@ struct kvmppc_pginfo {
        atomic_t refcnt;
 };
 
+struct kvmppc_spapr_tce_iommu_table {
+       struct rcu_head rcu;
+       struct list_head next;
+       struct iommu_table *tbl;
+       struct kref kref;
+};
+
 struct kvmppc_spapr_tce_table {
        struct list_head list;
        struct kvm *kvm;
@@ -196,6 +203,7 @@ struct kvmppc_spapr_tce_table {
        u32 page_shift;
        u64 offset;             /* in pages */
        u64 size;               /* window size in pages */
+       struct list_head iommu_tables;
        struct page *pages[0];
 };
 
@@ -342,6 +350,7 @@ struct kvmppc_pte {
        bool may_read           : 1;
        bool may_write          : 1;
        bool may_execute        : 1;
+       unsigned long wimg;
        u8 page_size;           /* MMU_PAGE_xxx */
 };
 
@@ -438,6 +447,11 @@ struct mmio_hpte_cache {
        unsigned int index;
 };
 
+#define KVMPPC_VSX_COPY_NONE           0
+#define KVMPPC_VSX_COPY_WORD           1
+#define KVMPPC_VSX_COPY_DWORD          2
+#define KVMPPC_VSX_COPY_DWORD_LOAD_DUMP        3
+
 struct openpic;
 
 struct kvm_vcpu_arch {
@@ -641,6 +655,21 @@ struct kvm_vcpu_arch {
        u8 io_gpr; /* GPR used as IO source/target */
        u8 mmio_host_swabbed;
        u8 mmio_sign_extend;
+       /* conversion between single and double precision */
+       u8 mmio_sp64_extend;
+       /*
+        * Number of simulations for vsx.
+        * If we use 2*8bytes to simulate 1*16bytes,
+        * then the number should be 2 and
+        * mmio_vsx_copy_type=KVMPPC_VSX_COPY_DWORD.
+        * If we use 4*4bytes to simulate 1*16bytes,
+        * the number should be 4 and
+        * mmio_vsx_copy_type=KVMPPC_VSX_COPY_WORD.
+        */
+       u8 mmio_vsx_copy_nums;
+       u8 mmio_vsx_offset;
+       u8 mmio_vsx_copy_type;
+       u8 mmio_vsx_tx_sx_enabled;
        u8 osi_needed;
        u8 osi_enabled;
        u8 papr_enabled;
@@ -729,6 +758,8 @@ struct kvm_vcpu_arch {
 };
 
 #define VCPU_FPR(vcpu, i)      (vcpu)->arch.fp.fpr[i][TS_FPROFFSET]
+#define VCPU_VSX_FPR(vcpu, i, j)       ((vcpu)->arch.fp.fpr[i][j])
+#define VCPU_VSX_VR(vcpu, i)           ((vcpu)->arch.vr.vr[i])
 
 /* Values for vcpu->arch.state */
 #define KVMPPC_VCPU_NOTREADY           0
@@ -742,6 +773,7 @@ struct kvm_vcpu_arch {
 #define KVM_MMIO_REG_FPR       0x0020
 #define KVM_MMIO_REG_QPR       0x0040
 #define KVM_MMIO_REG_FQPR      0x0060
+#define KVM_MMIO_REG_VSX       0x0080
 
 #define __KVM_HAVE_ARCH_WQP
 #define __KVM_HAVE_CREATE_DEVICE
index dd11c4c8c56a0cc93bef66c5fe8791bcfbb9699a..5885d327c025e33efb3697dbddd8ecdb7c22423b 100644 (file)
@@ -78,9 +78,15 @@ extern int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
 extern int kvmppc_handle_loads(struct kvm_run *run, struct kvm_vcpu *vcpu,
                                unsigned int rt, unsigned int bytes,
                               int is_default_endian);
+extern int kvmppc_handle_vsx_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
+                               unsigned int rt, unsigned int bytes,
+                       int is_default_endian, int mmio_sign_extend);
 extern int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
                               u64 val, unsigned int bytes,
                               int is_default_endian);
+extern int kvmppc_handle_vsx_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
+                               int rs, unsigned int bytes,
+                               int is_default_endian);
 
 extern int kvmppc_load_last_inst(struct kvm_vcpu *vcpu,
                                 enum instruction_type type, u32 *inst);
@@ -132,6 +138,9 @@ extern void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu);
 extern int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu);
 extern int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu);
 extern void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags);
+extern void kvmppc_core_queue_fpunavail(struct kvm_vcpu *vcpu);
+extern void kvmppc_core_queue_vec_unavail(struct kvm_vcpu *vcpu);
+extern void kvmppc_core_queue_vsx_unavail(struct kvm_vcpu *vcpu);
 extern void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu);
 extern void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu);
 extern void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
@@ -164,13 +173,19 @@ extern long kvmppc_prepare_vrma(struct kvm *kvm,
 extern void kvmppc_map_vrma(struct kvm_vcpu *vcpu,
                        struct kvm_memory_slot *memslot, unsigned long porder);
 extern int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu);
+extern long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd,
+               struct iommu_group *grp);
+extern void kvm_spapr_tce_release_iommu_group(struct kvm *kvm,
+               struct iommu_group *grp);
 
 extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
                                struct kvm_create_spapr_tce_64 *args);
 extern struct kvmppc_spapr_tce_table *kvmppc_find_table(
-               struct kvm_vcpu *vcpu, unsigned long liobn);
-extern long kvmppc_ioba_validate(struct kvmppc_spapr_tce_table *stt,
-               unsigned long ioba, unsigned long npages);
+               struct kvm *kvm, unsigned long liobn);
+#define kvmppc_ioba_validate(stt, ioba, npages)                         \
+               (iommu_tce_check_ioba((stt)->page_shift, (stt)->offset, \
+                               (stt)->size, (ioba), (npages)) ?        \
+                               H_PARAMETER : H_SUCCESS)
 extern long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *tt,
                unsigned long tce);
 extern long kvmppc_gpa_to_ua(struct kvm *kvm, unsigned long gpa,
@@ -240,6 +255,7 @@ union kvmppc_one_reg {
        u64     dval;
        vector128 vval;
        u64     vsxval[2];
+       u32     vsx32val[4];
        struct {
                u64     addr;
                u64     length;
index b9e3f0aca261da2233a086cb41150bc3afacb9dc..c70c8272523d0ec3d76381c9f2d66275e4f33c25 100644 (file)
@@ -29,10 +29,14 @@ extern void mm_iommu_init(struct mm_struct *mm);
 extern void mm_iommu_cleanup(struct mm_struct *mm);
 extern struct mm_iommu_table_group_mem_t *mm_iommu_lookup(struct mm_struct *mm,
                unsigned long ua, unsigned long size);
+extern struct mm_iommu_table_group_mem_t *mm_iommu_lookup_rm(
+               struct mm_struct *mm, unsigned long ua, unsigned long size);
 extern struct mm_iommu_table_group_mem_t *mm_iommu_find(struct mm_struct *mm,
                unsigned long ua, unsigned long entries);
 extern long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem,
                unsigned long ua, unsigned long *hpa);
+extern long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem,
+               unsigned long ua, unsigned long *hpa);
 extern long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem);
 extern void mm_iommu_mapped_dec(struct mm_iommu_table_group_mem_t *mem);
 #endif
index e7d6d86563eeda924598b1079a491d0e5945c566..73f06f4dddc78a44725a59a150b42b270b312704 100644 (file)
 #define OP_TRAP_64 2
 
 #define OP_31_XOP_TRAP      4
+#define OP_31_XOP_LDX       21
 #define OP_31_XOP_LWZX      23
+#define OP_31_XOP_LDUX      53
 #define OP_31_XOP_DCBST     54
 #define OP_31_XOP_LWZUX     55
 #define OP_31_XOP_TRAP_64   68
 #define OP_31_XOP_DCBF      86
 #define OP_31_XOP_LBZX      87
+#define OP_31_XOP_STDX      149
 #define OP_31_XOP_STWX      151
+#define OP_31_XOP_STDUX     181
+#define OP_31_XOP_STWUX     183
 #define OP_31_XOP_STBX      215
 #define OP_31_XOP_LBZUX     119
 #define OP_31_XOP_STBUX     247
 #define OP_31_XOP_LHZX      279
 #define OP_31_XOP_LHZUX     311
 #define OP_31_XOP_MFSPR     339
+#define OP_31_XOP_LWAX      341
 #define OP_31_XOP_LHAX      343
+#define OP_31_XOP_LWAUX     373
 #define OP_31_XOP_LHAUX     375
 #define OP_31_XOP_STHX      407
 #define OP_31_XOP_STHUX     439
 #define OP_31_XOP_MTSPR     467
 #define OP_31_XOP_DCBI      470
+#define OP_31_XOP_LDBRX     532
 #define OP_31_XOP_LWBRX     534
 #define OP_31_XOP_TLBSYNC   566
+#define OP_31_XOP_STDBRX    660
 #define OP_31_XOP_STWBRX    662
+#define OP_31_XOP_STFSX            663
+#define OP_31_XOP_STFSUX    695
+#define OP_31_XOP_STFDX     727
+#define OP_31_XOP_STFDUX    759
 #define OP_31_XOP_LHBRX     790
+#define OP_31_XOP_LFIWAX    855
+#define OP_31_XOP_LFIWZX    887
 #define OP_31_XOP_STHBRX    918
+#define OP_31_XOP_STFIWX    983
+
+/* VSX Scalar Load Instructions */
+#define OP_31_XOP_LXSDX         588
+#define OP_31_XOP_LXSSPX        524
+#define OP_31_XOP_LXSIWAX       76
+#define OP_31_XOP_LXSIWZX       12
+
+/* VSX Scalar Store Instructions */
+#define OP_31_XOP_STXSDX        716
+#define OP_31_XOP_STXSSPX       652
+#define OP_31_XOP_STXSIWX       140
+
+/* VSX Vector Load Instructions */
+#define OP_31_XOP_LXVD2X        844
+#define OP_31_XOP_LXVW4X        780
+
+/* VSX Vector Load and Splat Instruction */
+#define OP_31_XOP_LXVDSX        332
+
+/* VSX Vector Store Instructions */
+#define OP_31_XOP_STXVD2X       972
+#define OP_31_XOP_STXVW4X       908
+
+#define OP_31_XOP_LFSX          535
+#define OP_31_XOP_LFSUX         567
+#define OP_31_XOP_LFDX          599
+#define OP_31_XOP_LFDUX                631
 
 #define OP_LWZ  32
+#define OP_STFS 52
+#define OP_STFSU 53
+#define OP_STFD 54
+#define OP_STFDU 55
 #define OP_LD   58
 #define OP_LWZU 33
 #define OP_LBZ  34
 #define OP_LHAU 43
 #define OP_STH  44
 #define OP_STHU 45
+#define OP_LMW  46
+#define OP_STMW 47
+#define OP_LFS  48
+#define OP_LFSU 49
+#define OP_LFD  50
+#define OP_LFDU 51
+#define OP_STFS 52
+#define OP_STFSU 53
+#define OP_STFD  54
+#define OP_STFDU 55
+#define OP_LQ    56
 
 /* sorted alphabetically */
 #define PPC_INST_BHRBE                 0x7c00025c
index 5f202a566ec5f0296a22a71ac238a63a6a90ba7f..f2b724cd9e64d8629554052183511abb8bfb4719 100644 (file)
@@ -711,13 +711,16 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid)
        return tbl;
 }
 
-void iommu_free_table(struct iommu_table *tbl, const char *node_name)
+static void iommu_table_free(struct kref *kref)
 {
        unsigned long bitmap_sz;
        unsigned int order;
+       struct iommu_table *tbl;
 
-       if (!tbl)
-               return;
+       tbl = container_of(kref, struct iommu_table, it_kref);
+
+       if (tbl->it_ops->free)
+               tbl->it_ops->free(tbl);
 
        if (!tbl->it_map) {
                kfree(tbl);
@@ -733,7 +736,7 @@ void iommu_free_table(struct iommu_table *tbl, const char *node_name)
 
        /* verify that table contains no entries */
        if (!bitmap_empty(tbl->it_map, tbl->it_size))
-               pr_warn("%s: Unexpected TCEs for %s\n", __func__, node_name);
+               pr_warn("%s: Unexpected TCEs\n", __func__);
 
        /* calculate bitmap size in bytes */
        bitmap_sz = BITS_TO_LONGS(tbl->it_size) * sizeof(unsigned long);
@@ -746,6 +749,24 @@ void iommu_free_table(struct iommu_table *tbl, const char *node_name)
        kfree(tbl);
 }
 
+struct iommu_table *iommu_tce_table_get(struct iommu_table *tbl)
+{
+       if (kref_get_unless_zero(&tbl->it_kref))
+               return tbl;
+
+       return NULL;
+}
+EXPORT_SYMBOL_GPL(iommu_tce_table_get);
+
+int iommu_tce_table_put(struct iommu_table *tbl)
+{
+       if (WARN_ON(!tbl))
+               return 0;
+
+       return kref_put(&tbl->it_kref, iommu_table_free);
+}
+EXPORT_SYMBOL_GPL(iommu_tce_table_put);
+
 /* Creates TCEs for a user provided buffer.  The user buffer must be
  * contiguous real kernel storage (not vmalloc).  The address passed here
  * comprises a page address and offset into that page. The dma_addr_t
@@ -942,47 +963,36 @@ void iommu_flush_tce(struct iommu_table *tbl)
 }
 EXPORT_SYMBOL_GPL(iommu_flush_tce);
 
-int iommu_tce_clear_param_check(struct iommu_table *tbl,
-               unsigned long ioba, unsigned long tce_value,
-               unsigned long npages)
+int iommu_tce_check_ioba(unsigned long page_shift,
+               unsigned long offset, unsigned long size,
+               unsigned long ioba, unsigned long npages)
 {
-       /* tbl->it_ops->clear() does not support any value but 0 */
-       if (tce_value)
-               return -EINVAL;
+       unsigned long mask = (1UL << page_shift) - 1;
 
-       if (ioba & ~IOMMU_PAGE_MASK(tbl))
+       if (ioba & mask)
                return -EINVAL;
 
-       ioba >>= tbl->it_page_shift;
-       if (ioba < tbl->it_offset)
+       ioba >>= page_shift;
+       if (ioba < offset)
                return -EINVAL;
 
-       if ((ioba + npages) > (tbl->it_offset + tbl->it_size))
+       if ((ioba + 1) > (offset + size))
                return -EINVAL;
 
        return 0;
 }
-EXPORT_SYMBOL_GPL(iommu_tce_clear_param_check);
+EXPORT_SYMBOL_GPL(iommu_tce_check_ioba);
 
-int iommu_tce_put_param_check(struct iommu_table *tbl,
-               unsigned long ioba, unsigned long tce)
+int iommu_tce_check_gpa(unsigned long page_shift, unsigned long gpa)
 {
-       if (tce & ~IOMMU_PAGE_MASK(tbl))
-               return -EINVAL;
-
-       if (ioba & ~IOMMU_PAGE_MASK(tbl))
-               return -EINVAL;
+       unsigned long mask = (1UL << page_shift) - 1;
 
-       ioba >>= tbl->it_page_shift;
-       if (ioba < tbl->it_offset)
-               return -EINVAL;
-
-       if ((ioba + 1) > (tbl->it_offset + tbl->it_size))
+       if (gpa & mask)
                return -EINVAL;
 
        return 0;
 }
-EXPORT_SYMBOL_GPL(iommu_tce_put_param_check);
+EXPORT_SYMBOL_GPL(iommu_tce_check_gpa);
 
 long iommu_tce_xchg(struct iommu_table *tbl, unsigned long entry,
                unsigned long *hpa, enum dma_data_direction *direction)
@@ -1004,6 +1014,31 @@ long iommu_tce_xchg(struct iommu_table *tbl, unsigned long entry,
 }
 EXPORT_SYMBOL_GPL(iommu_tce_xchg);
 
+#ifdef CONFIG_PPC_BOOK3S_64
+long iommu_tce_xchg_rm(struct iommu_table *tbl, unsigned long entry,
+               unsigned long *hpa, enum dma_data_direction *direction)
+{
+       long ret;
+
+       ret = tbl->it_ops->exchange_rm(tbl, entry, hpa, direction);
+
+       if (!ret && ((*direction == DMA_FROM_DEVICE) ||
+                       (*direction == DMA_BIDIRECTIONAL))) {
+               struct page *pg = realmode_pfn_to_page(*hpa >> PAGE_SHIFT);
+
+               if (likely(pg)) {
+                       SetPageDirty(pg);
+               } else {
+                       tbl->it_ops->exchange_rm(tbl, entry, hpa, direction);
+                       ret = -EFAULT;
+               }
+       }
+
+       return ret;
+}
+EXPORT_SYMBOL_GPL(iommu_tce_xchg_rm);
+#endif
+
 int iommu_take_ownership(struct iommu_table *tbl)
 {
        unsigned long flags, i, sz = (tbl->it_size + 7) >> 3;
index 029be26b5a17edc01b5486bde83baea9a2b3765f..65a471de96de2bfe7b743b497ec396f008849826 100644 (file)
@@ -67,6 +67,7 @@ config KVM_BOOK3S_64
        select KVM_BOOK3S_64_HANDLER
        select KVM
        select KVM_BOOK3S_PR_POSSIBLE if !KVM_BOOK3S_HV_POSSIBLE
+       select SPAPR_TCE_IOMMU if IOMMU_SUPPORT
        ---help---
          Support running unmodified book3s_64 and book3s_32 guest kernels
          in virtual machines on book3s_64 host processors.
index b6b5c185bd92f85753924f1a476effd0916e39b4..0ff0d07c07575cf4bd5430fa6e986067e83f77c8 100644 (file)
@@ -197,6 +197,24 @@ void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags)
 }
 EXPORT_SYMBOL_GPL(kvmppc_core_queue_program);
 
+void kvmppc_core_queue_fpunavail(struct kvm_vcpu *vcpu)
+{
+       /* might as well deliver this straight away */
+       kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, 0);
+}
+
+void kvmppc_core_queue_vec_unavail(struct kvm_vcpu *vcpu)
+{
+       /* might as well deliver this straight away */
+       kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_ALTIVEC, 0);
+}
+
+void kvmppc_core_queue_vsx_unavail(struct kvm_vcpu *vcpu)
+{
+       /* might as well deliver this straight away */
+       kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_VSX, 0);
+}
+
 void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu)
 {
        kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_DECREMENTER);
index 70153578131a047593185152b4be4bd4276d99d1..29ebe2fd58674c59f27803a5426e4d2414f39418 100644 (file)
@@ -319,6 +319,7 @@ do_second:
                gpte->may_execute = true;
        gpte->may_read = false;
        gpte->may_write = false;
+       gpte->wimg = r & HPTE_R_WIMG;
 
        switch (pp) {
        case 0:
index a587e8f4fd2648caf61535176ef5a1d03a2c79ae..145a61892c48817e9d19061564bf90778c2b3c4a 100644 (file)
@@ -145,6 +145,8 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte,
        else
                kvmppc_mmu_flush_icache(pfn);
 
+       rflags = (rflags & ~HPTE_R_WIMG) | orig_pte->wimg;
+
        /*
         * Use 64K pages if possible; otherwise, on 64K page kernels,
         * we need to transfer 4 more bits from guest real to host real addr.
@@ -177,12 +179,15 @@ map_again:
        ret = mmu_hash_ops.hpte_insert(hpteg, vpn, hpaddr, rflags, vflags,
                                       hpsize, hpsize, MMU_SEGSIZE_256M);
 
-       if (ret < 0) {
+       if (ret == -1) {
                /* If we couldn't map a primary PTE, try a secondary */
                hash = ~hash;
                vflags ^= HPTE_V_SECONDARY;
                attempt++;
                goto map_again;
+       } else if (ret < 0) {
+               r = -EIO;
+               goto out_unlock;
        } else {
                trace_kvm_book3s_64_mmu_map(rflags, hpteg,
                                            vpn, hpaddr, orig_pte);
index 3e26cd4979f9365c4a0575df139e900f4203ff44..a160c14304eba22bd83ee98b8f052a2ed8bf7a3f 100644 (file)
@@ -28,6 +28,8 @@
 #include <linux/hugetlb.h>
 #include <linux/list.h>
 #include <linux/anon_inodes.h>
+#include <linux/iommu.h>
+#include <linux/file.h>
 
 #include <asm/tlbflush.h>
 #include <asm/kvm_ppc.h>
@@ -40,6 +42,7 @@
 #include <asm/udbg.h>
 #include <asm/iommu.h>
 #include <asm/tce.h>
+#include <asm/mmu_context.h>
 
 static unsigned long kvmppc_tce_pages(unsigned long iommu_pages)
 {
@@ -91,6 +94,137 @@ static long kvmppc_account_memlimit(unsigned long stt_pages, bool inc)
        return ret;
 }
 
+static void kvm_spapr_tce_iommu_table_free(struct rcu_head *head)
+{
+       struct kvmppc_spapr_tce_iommu_table *stit = container_of(head,
+                       struct kvmppc_spapr_tce_iommu_table, rcu);
+
+       iommu_tce_table_put(stit->tbl);
+
+       kfree(stit);
+}
+
+static void kvm_spapr_tce_liobn_put(struct kref *kref)
+{
+       struct kvmppc_spapr_tce_iommu_table *stit = container_of(kref,
+                       struct kvmppc_spapr_tce_iommu_table, kref);
+
+       list_del_rcu(&stit->next);
+
+       call_rcu(&stit->rcu, kvm_spapr_tce_iommu_table_free);
+}
+
+extern void kvm_spapr_tce_release_iommu_group(struct kvm *kvm,
+               struct iommu_group *grp)
+{
+       int i;
+       struct kvmppc_spapr_tce_table *stt;
+       struct kvmppc_spapr_tce_iommu_table *stit, *tmp;
+       struct iommu_table_group *table_group = NULL;
+
+       list_for_each_entry_rcu(stt, &kvm->arch.spapr_tce_tables, list) {
+
+               table_group = iommu_group_get_iommudata(grp);
+               if (WARN_ON(!table_group))
+                       continue;
+
+               list_for_each_entry_safe(stit, tmp, &stt->iommu_tables, next) {
+                       for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
+                               if (table_group->tables[i] != stit->tbl)
+                                       continue;
+
+                               kref_put(&stit->kref, kvm_spapr_tce_liobn_put);
+                               return;
+                       }
+               }
+       }
+}
+
+extern long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd,
+               struct iommu_group *grp)
+{
+       struct kvmppc_spapr_tce_table *stt = NULL;
+       bool found = false;
+       struct iommu_table *tbl = NULL;
+       struct iommu_table_group *table_group;
+       long i;
+       struct kvmppc_spapr_tce_iommu_table *stit;
+       struct fd f;
+
+       f = fdget(tablefd);
+       if (!f.file)
+               return -EBADF;
+
+       list_for_each_entry_rcu(stt, &kvm->arch.spapr_tce_tables, list) {
+               if (stt == f.file->private_data) {
+                       found = true;
+                       break;
+               }
+       }
+
+       fdput(f);
+
+       if (!found)
+               return -EINVAL;
+
+       table_group = iommu_group_get_iommudata(grp);
+       if (WARN_ON(!table_group))
+               return -EFAULT;
+
+       for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
+               struct iommu_table *tbltmp = table_group->tables[i];
+
+               if (!tbltmp)
+                       continue;
+               /*
+                * Make sure hardware table parameters are exactly the same;
+                * this is used in the TCE handlers where boundary checks
+                * use only the first attached table.
+                */
+               if ((tbltmp->it_page_shift == stt->page_shift) &&
+                               (tbltmp->it_offset == stt->offset) &&
+                               (tbltmp->it_size == stt->size)) {
+                       /*
+                        * Reference the table to avoid races with
+                        * add/remove DMA windows.
+                        */
+                       tbl = iommu_tce_table_get(tbltmp);
+                       break;
+               }
+       }
+       if (!tbl)
+               return -EINVAL;
+
+       list_for_each_entry_rcu(stit, &stt->iommu_tables, next) {
+               if (tbl != stit->tbl)
+                       continue;
+
+               if (!kref_get_unless_zero(&stit->kref)) {
+                       /* stit is being destroyed */
+                       iommu_tce_table_put(tbl);
+                       return -ENOTTY;
+               }
+               /*
+                * The table is already known to this KVM, we just increased
+                * its KVM reference counter and can return.
+                */
+               return 0;
+       }
+
+       stit = kzalloc(sizeof(*stit), GFP_KERNEL);
+       if (!stit) {
+               iommu_tce_table_put(tbl);
+               return -ENOMEM;
+       }
+
+       stit->tbl = tbl;
+       kref_init(&stit->kref);
+
+       list_add_rcu(&stit->next, &stt->iommu_tables);
+
+       return 0;
+}
+
 static void release_spapr_tce_table(struct rcu_head *head)
 {
        struct kvmppc_spapr_tce_table *stt = container_of(head,
@@ -130,9 +264,18 @@ static int kvm_spapr_tce_mmap(struct file *file, struct vm_area_struct *vma)
 static int kvm_spapr_tce_release(struct inode *inode, struct file *filp)
 {
        struct kvmppc_spapr_tce_table *stt = filp->private_data;
+       struct kvmppc_spapr_tce_iommu_table *stit, *tmp;
 
        list_del_rcu(&stt->list);
 
+       list_for_each_entry_safe(stit, tmp, &stt->iommu_tables, next) {
+               WARN_ON(!kref_read(&stit->kref));
+               while (1) {
+                       if (kref_put(&stit->kref, kvm_spapr_tce_liobn_put))
+                               break;
+               }
+       }
+
        kvm_put_kvm(stt->kvm);
 
        kvmppc_account_memlimit(
@@ -164,7 +307,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
                        return -EBUSY;
        }
 
-       size = args->size;
+       size = _ALIGN_UP(args->size, PAGE_SIZE >> 3);
        npages = kvmppc_tce_pages(size);
        ret = kvmppc_account_memlimit(kvmppc_stt_pages(npages), true);
        if (ret) {
@@ -183,6 +326,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
        stt->offset = args->offset;
        stt->size = size;
        stt->kvm = kvm;
+       INIT_LIST_HEAD_RCU(&stt->iommu_tables);
 
        for (i = 0; i < npages; i++) {
                stt->pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO);
@@ -211,15 +355,106 @@ fail:
        return ret;
 }
 
+static void kvmppc_clear_tce(struct iommu_table *tbl, unsigned long entry)
+{
+       unsigned long hpa = 0;
+       enum dma_data_direction dir = DMA_NONE;
+
+       iommu_tce_xchg(tbl, entry, &hpa, &dir);
+}
+
+static long kvmppc_tce_iommu_mapped_dec(struct kvm *kvm,
+               struct iommu_table *tbl, unsigned long entry)
+{
+       struct mm_iommu_table_group_mem_t *mem = NULL;
+       const unsigned long pgsize = 1ULL << tbl->it_page_shift;
+       unsigned long *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry);
+
+       if (!pua)
+               /* it_userspace allocation might be delayed */
+               return H_TOO_HARD;
+
+       mem = mm_iommu_lookup(kvm->mm, *pua, pgsize);
+       if (!mem)
+               return H_TOO_HARD;
+
+       mm_iommu_mapped_dec(mem);
+
+       *pua = 0;
+
+       return H_SUCCESS;
+}
+
+static long kvmppc_tce_iommu_unmap(struct kvm *kvm,
+               struct iommu_table *tbl, unsigned long entry)
+{
+       enum dma_data_direction dir = DMA_NONE;
+       unsigned long hpa = 0;
+       long ret;
+
+       if (WARN_ON_ONCE(iommu_tce_xchg(tbl, entry, &hpa, &dir)))
+               return H_HARDWARE;
+
+       if (dir == DMA_NONE)
+               return H_SUCCESS;
+
+       ret = kvmppc_tce_iommu_mapped_dec(kvm, tbl, entry);
+       if (ret != H_SUCCESS)
+               iommu_tce_xchg(tbl, entry, &hpa, &dir);
+
+       return ret;
+}
+
+long kvmppc_tce_iommu_map(struct kvm *kvm, struct iommu_table *tbl,
+               unsigned long entry, unsigned long ua,
+               enum dma_data_direction dir)
+{
+       long ret;
+       unsigned long hpa, *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry);
+       struct mm_iommu_table_group_mem_t *mem;
+
+       if (!pua)
+               /* it_userspace allocation might be delayed */
+               return H_TOO_HARD;
+
+       mem = mm_iommu_lookup(kvm->mm, ua, 1ULL << tbl->it_page_shift);
+       if (!mem)
+               /* This only handles v2 IOMMU type, v1 is handled via ioctl() */
+               return H_TOO_HARD;
+
+       if (WARN_ON_ONCE(mm_iommu_ua_to_hpa(mem, ua, &hpa)))
+               return H_HARDWARE;
+
+       if (mm_iommu_mapped_inc(mem))
+               return H_CLOSED;
+
+       ret = iommu_tce_xchg(tbl, entry, &hpa, &dir);
+       if (WARN_ON_ONCE(ret)) {
+               mm_iommu_mapped_dec(mem);
+               return H_HARDWARE;
+       }
+
+       if (dir != DMA_NONE)
+               kvmppc_tce_iommu_mapped_dec(kvm, tbl, entry);
+
+       *pua = ua;
+
+       return 0;
+}
+
 long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
                      unsigned long ioba, unsigned long tce)
 {
-       struct kvmppc_spapr_tce_table *stt = kvmppc_find_table(vcpu, liobn);
-       long ret;
+       struct kvmppc_spapr_tce_table *stt;
+       long ret, idx;
+       struct kvmppc_spapr_tce_iommu_table *stit;
+       unsigned long entry, ua = 0;
+       enum dma_data_direction dir;
 
        /* udbg_printf("H_PUT_TCE(): liobn=0x%lx ioba=0x%lx, tce=0x%lx\n", */
        /*          liobn, ioba, tce); */
 
+       stt = kvmppc_find_table(vcpu->kvm, liobn);
        if (!stt)
                return H_TOO_HARD;
 
@@ -231,7 +466,35 @@ long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
        if (ret != H_SUCCESS)
                return ret;
 
-       kvmppc_tce_put(stt, ioba >> stt->page_shift, tce);
+       dir = iommu_tce_direction(tce);
+       if ((dir != DMA_NONE) && kvmppc_gpa_to_ua(vcpu->kvm,
+                       tce & ~(TCE_PCI_READ | TCE_PCI_WRITE), &ua, NULL))
+               return H_PARAMETER;
+
+       entry = ioba >> stt->page_shift;
+
+       list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
+               if (dir == DMA_NONE) {
+                       ret = kvmppc_tce_iommu_unmap(vcpu->kvm,
+                                       stit->tbl, entry);
+               } else {
+                       idx = srcu_read_lock(&vcpu->kvm->srcu);
+                       ret = kvmppc_tce_iommu_map(vcpu->kvm, stit->tbl,
+                                       entry, ua, dir);
+                       srcu_read_unlock(&vcpu->kvm->srcu, idx);
+               }
+
+               if (ret == H_SUCCESS)
+                       continue;
+
+               if (ret == H_TOO_HARD)
+                       return ret;
+
+               WARN_ON_ONCE(1);
+               kvmppc_clear_tce(stit->tbl, entry);
+       }
+
+       kvmppc_tce_put(stt, entry, tce);
 
        return H_SUCCESS;
 }
@@ -246,8 +509,9 @@ long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu,
        unsigned long entry, ua = 0;
        u64 __user *tces;
        u64 tce;
+       struct kvmppc_spapr_tce_iommu_table *stit;
 
-       stt = kvmppc_find_table(vcpu, liobn);
+       stt = kvmppc_find_table(vcpu->kvm, liobn);
        if (!stt)
                return H_TOO_HARD;
 
@@ -284,6 +548,26 @@ long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu,
                if (ret != H_SUCCESS)
                        goto unlock_exit;
 
+               if (kvmppc_gpa_to_ua(vcpu->kvm,
+                               tce & ~(TCE_PCI_READ | TCE_PCI_WRITE),
+                               &ua, NULL))
+                       return H_PARAMETER;
+
+               list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
+                       ret = kvmppc_tce_iommu_map(vcpu->kvm,
+                                       stit->tbl, entry + i, ua,
+                                       iommu_tce_direction(tce));
+
+                       if (ret == H_SUCCESS)
+                               continue;
+
+                       if (ret == H_TOO_HARD)
+                               goto unlock_exit;
+
+                       WARN_ON_ONCE(1);
+                       kvmppc_clear_tce(stit->tbl, entry);
+               }
+
                kvmppc_tce_put(stt, entry + i, tce);
        }
 
@@ -300,8 +584,9 @@ long kvmppc_h_stuff_tce(struct kvm_vcpu *vcpu,
 {
        struct kvmppc_spapr_tce_table *stt;
        long i, ret;
+       struct kvmppc_spapr_tce_iommu_table *stit;
 
-       stt = kvmppc_find_table(vcpu, liobn);
+       stt = kvmppc_find_table(vcpu->kvm, liobn);
        if (!stt)
                return H_TOO_HARD;
 
@@ -313,6 +598,24 @@ long kvmppc_h_stuff_tce(struct kvm_vcpu *vcpu,
        if (tce_value & (TCE_PCI_WRITE | TCE_PCI_READ))
                return H_PARAMETER;
 
+       list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
+               unsigned long entry = ioba >> stit->tbl->it_page_shift;
+
+               for (i = 0; i < npages; ++i) {
+                       ret = kvmppc_tce_iommu_unmap(vcpu->kvm,
+                                       stit->tbl, entry + i);
+
+                       if (ret == H_SUCCESS)
+                               continue;
+
+                       if (ret == H_TOO_HARD)
+                               return ret;
+
+                       WARN_ON_ONCE(1);
+                       kvmppc_clear_tce(stit->tbl, entry);
+               }
+       }
+
        for (i = 0; i < npages; ++i, ioba += (1ULL << stt->page_shift))
                kvmppc_tce_put(stt, ioba >> stt->page_shift, tce_value);
 
index e4c4ea973e57892ccae032eba7218d107fee6cd8..eda0a8f6fae8eba29b5b04c28fe49e7f9d605d45 100644 (file)
 #include <asm/iommu.h>
 #include <asm/tce.h>
 
+#ifdef CONFIG_BUG
+
+#define WARN_ON_ONCE_RM(condition)     ({                      \
+       static bool __section(.data.unlikely) __warned;         \
+       int __ret_warn_once = !!(condition);                    \
+                                                               \
+       if (unlikely(__ret_warn_once && !__warned)) {           \
+               __warned = true;                                \
+               pr_err("WARN_ON_ONCE_RM: (%s) at %s:%u\n",      \
+                               __stringify(condition),         \
+                               __func__, __LINE__);            \
+               dump_stack();                                   \
+       }                                                       \
+       unlikely(__ret_warn_once);                              \
+})
+
+#else
+
+#define WARN_ON_ONCE_RM(condition) ({                          \
+       int __ret_warn_on = !!(condition);                      \
+       unlikely(__ret_warn_on);                                \
+})
+
+#endif
+
 #define TCES_PER_PAGE  (PAGE_SIZE / sizeof(u64))
 
 /*
  * WARNING: This will be called in real or virtual mode on HV KVM and virtual
  *          mode on PR KVM
  */
-struct kvmppc_spapr_tce_table *kvmppc_find_table(struct kvm_vcpu *vcpu,
+struct kvmppc_spapr_tce_table *kvmppc_find_table(struct kvm *kvm,
                unsigned long liobn)
 {
-       struct kvm *kvm = vcpu->kvm;
        struct kvmppc_spapr_tce_table *stt;
 
        list_for_each_entry_lockless(stt, &kvm->arch.spapr_tce_tables, list)
@@ -62,27 +86,6 @@ struct kvmppc_spapr_tce_table *kvmppc_find_table(struct kvm_vcpu *vcpu,
 }
 EXPORT_SYMBOL_GPL(kvmppc_find_table);
 
-/*
- * Validates IO address.
- *
- * WARNING: This will be called in real-mode on HV KVM and virtual
- *          mode on PR KVM
- */
-long kvmppc_ioba_validate(struct kvmppc_spapr_tce_table *stt,
-               unsigned long ioba, unsigned long npages)
-{
-       unsigned long mask = (1ULL << stt->page_shift) - 1;
-       unsigned long idx = ioba >> stt->page_shift;
-
-       if ((ioba & mask) || (idx < stt->offset) ||
-                       (idx - stt->offset + npages > stt->size) ||
-                       (idx + npages < idx))
-               return H_PARAMETER;
-
-       return H_SUCCESS;
-}
-EXPORT_SYMBOL_GPL(kvmppc_ioba_validate);
-
 /*
  * Validates TCE address.
  * At the moment flags and page mask are validated.
@@ -96,10 +99,14 @@ EXPORT_SYMBOL_GPL(kvmppc_ioba_validate);
  */
 long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *stt, unsigned long tce)
 {
-       unsigned long page_mask = ~((1ULL << stt->page_shift) - 1);
-       unsigned long mask = ~(page_mask | TCE_PCI_WRITE | TCE_PCI_READ);
+       unsigned long gpa = tce & ~(TCE_PCI_READ | TCE_PCI_WRITE);
+       enum dma_data_direction dir = iommu_tce_direction(tce);
+
+       /* Allow userspace to poison TCE table */
+       if (dir == DMA_NONE)
+               return H_SUCCESS;
 
-       if (tce & mask)
+       if (iommu_tce_check_gpa(stt->page_shift, gpa))
                return H_PARAMETER;
 
        return H_SUCCESS;
@@ -179,15 +186,122 @@ long kvmppc_gpa_to_ua(struct kvm *kvm, unsigned long gpa,
 EXPORT_SYMBOL_GPL(kvmppc_gpa_to_ua);
 
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+static void kvmppc_rm_clear_tce(struct iommu_table *tbl, unsigned long entry)
+{
+       unsigned long hpa = 0;
+       enum dma_data_direction dir = DMA_NONE;
+
+       iommu_tce_xchg_rm(tbl, entry, &hpa, &dir);
+}
+
+static long kvmppc_rm_tce_iommu_mapped_dec(struct kvm *kvm,
+               struct iommu_table *tbl, unsigned long entry)
+{
+       struct mm_iommu_table_group_mem_t *mem = NULL;
+       const unsigned long pgsize = 1ULL << tbl->it_page_shift;
+       unsigned long *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry);
+
+       if (!pua)
+               /* it_userspace allocation might be delayed */
+               return H_TOO_HARD;
+
+       pua = (void *) vmalloc_to_phys(pua);
+       if (WARN_ON_ONCE_RM(!pua))
+               return H_HARDWARE;
+
+       mem = mm_iommu_lookup_rm(kvm->mm, *pua, pgsize);
+       if (!mem)
+               return H_TOO_HARD;
+
+       mm_iommu_mapped_dec(mem);
+
+       *pua = 0;
+
+       return H_SUCCESS;
+}
+
+static long kvmppc_rm_tce_iommu_unmap(struct kvm *kvm,
+               struct iommu_table *tbl, unsigned long entry)
+{
+       enum dma_data_direction dir = DMA_NONE;
+       unsigned long hpa = 0;
+       long ret;
+
+       if (iommu_tce_xchg_rm(tbl, entry, &hpa, &dir))
+               /*
+                * real mode xchg can fail if struct page crosses
+                * a page boundary
+                */
+               return H_TOO_HARD;
+
+       if (dir == DMA_NONE)
+               return H_SUCCESS;
+
+       ret = kvmppc_rm_tce_iommu_mapped_dec(kvm, tbl, entry);
+       if (ret)
+               iommu_tce_xchg_rm(tbl, entry, &hpa, &dir);
+
+       return ret;
+}
+
+static long kvmppc_rm_tce_iommu_map(struct kvm *kvm, struct iommu_table *tbl,
+               unsigned long entry, unsigned long ua,
+               enum dma_data_direction dir)
+{
+       long ret;
+       unsigned long hpa = 0;
+       unsigned long *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry);
+       struct mm_iommu_table_group_mem_t *mem;
+
+       if (!pua)
+               /* it_userspace allocation might be delayed */
+               return H_TOO_HARD;
+
+       mem = mm_iommu_lookup_rm(kvm->mm, ua, 1ULL << tbl->it_page_shift);
+       if (!mem)
+               return H_TOO_HARD;
+
+       if (WARN_ON_ONCE_RM(mm_iommu_ua_to_hpa_rm(mem, ua, &hpa)))
+               return H_HARDWARE;
+
+       pua = (void *) vmalloc_to_phys(pua);
+       if (WARN_ON_ONCE_RM(!pua))
+               return H_HARDWARE;
+
+       if (WARN_ON_ONCE_RM(mm_iommu_mapped_inc(mem)))
+               return H_CLOSED;
+
+       ret = iommu_tce_xchg_rm(tbl, entry, &hpa, &dir);
+       if (ret) {
+               mm_iommu_mapped_dec(mem);
+               /*
+                * real mode xchg can fail if struct page crosses
+                * a page boundary
+                */
+               return H_TOO_HARD;
+       }
+
+       if (dir != DMA_NONE)
+               kvmppc_rm_tce_iommu_mapped_dec(kvm, tbl, entry);
+
+       *pua = ua;
+
+       return 0;
+}
+
 long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
                unsigned long ioba, unsigned long tce)
 {
-       struct kvmppc_spapr_tce_table *stt = kvmppc_find_table(vcpu, liobn);
+       struct kvmppc_spapr_tce_table *stt;
        long ret;
+       struct kvmppc_spapr_tce_iommu_table *stit;
+       unsigned long entry, ua = 0;
+       enum dma_data_direction dir;
 
        /* udbg_printf("H_PUT_TCE(): liobn=0x%lx ioba=0x%lx, tce=0x%lx\n", */
        /*          liobn, ioba, tce); */
 
+       stt = kvmppc_find_table(vcpu->kvm, liobn);
        if (!stt)
                return H_TOO_HARD;
 
@@ -199,7 +313,32 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
        if (ret != H_SUCCESS)
                return ret;
 
-       kvmppc_tce_put(stt, ioba >> stt->page_shift, tce);
+       dir = iommu_tce_direction(tce);
+       if ((dir != DMA_NONE) && kvmppc_gpa_to_ua(vcpu->kvm,
+                       tce & ~(TCE_PCI_READ | TCE_PCI_WRITE), &ua, NULL))
+               return H_PARAMETER;
+
+       entry = ioba >> stt->page_shift;
+
+       list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
+               if (dir == DMA_NONE)
+                       ret = kvmppc_rm_tce_iommu_unmap(vcpu->kvm,
+                                       stit->tbl, entry);
+               else
+                       ret = kvmppc_rm_tce_iommu_map(vcpu->kvm,
+                                       stit->tbl, entry, ua, dir);
+
+               if (ret == H_SUCCESS)
+                       continue;
+
+               if (ret == H_TOO_HARD)
+                       return ret;
+
+               WARN_ON_ONCE_RM(1);
+               kvmppc_rm_clear_tce(stit->tbl, entry);
+       }
+
+       kvmppc_tce_put(stt, entry, tce);
 
        return H_SUCCESS;
 }
@@ -239,8 +378,10 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
        long i, ret = H_SUCCESS;
        unsigned long tces, entry, ua = 0;
        unsigned long *rmap = NULL;
+       bool prereg = false;
+       struct kvmppc_spapr_tce_iommu_table *stit;
 
-       stt = kvmppc_find_table(vcpu, liobn);
+       stt = kvmppc_find_table(vcpu->kvm, liobn);
        if (!stt)
                return H_TOO_HARD;
 
@@ -259,23 +400,49 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
        if (ret != H_SUCCESS)
                return ret;
 
-       if (kvmppc_gpa_to_ua(vcpu->kvm, tce_list, &ua, &rmap))
-               return H_TOO_HARD;
+       if (mm_iommu_preregistered(vcpu->kvm->mm)) {
+               /*
+                * We get here if guest memory was pre-registered which
+                * is normally VFIO case and gpa->hpa translation does not
+                * depend on hpt.
+                */
+               struct mm_iommu_table_group_mem_t *mem;
 
-       rmap = (void *) vmalloc_to_phys(rmap);
+               if (kvmppc_gpa_to_ua(vcpu->kvm, tce_list, &ua, NULL))
+                       return H_TOO_HARD;
 
-       /*
-        * Synchronize with the MMU notifier callbacks in
-        * book3s_64_mmu_hv.c (kvm_unmap_hva_hv etc.).
-        * While we have the rmap lock, code running on other CPUs
-        * cannot finish unmapping the host real page that backs
-        * this guest real page, so we are OK to access the host
-        * real page.
-        */
-       lock_rmap(rmap);
-       if (kvmppc_rm_ua_to_hpa(vcpu, ua, &tces)) {
-               ret = H_TOO_HARD;
-               goto unlock_exit;
+               mem = mm_iommu_lookup_rm(vcpu->kvm->mm, ua, IOMMU_PAGE_SIZE_4K);
+               if (mem)
+                       prereg = mm_iommu_ua_to_hpa_rm(mem, ua, &tces) == 0;
+       }
+
+       if (!prereg) {
+               /*
+                * This is usually a case of a guest with emulated devices only
+                * when TCE list is not in preregistered memory.
+                * We do not require memory to be preregistered in this case
+                * so lock rmap and do __find_linux_pte_or_hugepte().
+                */
+               if (kvmppc_gpa_to_ua(vcpu->kvm, tce_list, &ua, &rmap))
+                       return H_TOO_HARD;
+
+               rmap = (void *) vmalloc_to_phys(rmap);
+               if (WARN_ON_ONCE_RM(!rmap))
+                       return H_HARDWARE;
+
+               /*
+                * Synchronize with the MMU notifier callbacks in
+                * book3s_64_mmu_hv.c (kvm_unmap_hva_hv etc.).
+                * While we have the rmap lock, code running on other CPUs
+                * cannot finish unmapping the host real page that backs
+                * this guest real page, so we are OK to access the host
+                * real page.
+                */
+               lock_rmap(rmap);
+               if (kvmppc_rm_ua_to_hpa(vcpu, ua, &tces)) {
+                       ret = H_TOO_HARD;
+                       goto unlock_exit;
+               }
        }
 
        for (i = 0; i < npages; ++i) {
@@ -285,11 +452,33 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
                if (ret != H_SUCCESS)
                        goto unlock_exit;
 
+               ua = 0;
+               if (kvmppc_gpa_to_ua(vcpu->kvm,
+                               tce & ~(TCE_PCI_READ | TCE_PCI_WRITE),
+                               &ua, NULL))
+                       return H_PARAMETER;
+
+               list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
+                       ret = kvmppc_rm_tce_iommu_map(vcpu->kvm,
+                                       stit->tbl, entry + i, ua,
+                                       iommu_tce_direction(tce));
+
+                       if (ret == H_SUCCESS)
+                               continue;
+
+                       if (ret == H_TOO_HARD)
+                               goto unlock_exit;
+
+                       WARN_ON_ONCE_RM(1);
+                       kvmppc_rm_clear_tce(stit->tbl, entry);
+               }
+
                kvmppc_tce_put(stt, entry + i, tce);
        }
 
 unlock_exit:
-       unlock_rmap(rmap);
+       if (rmap)
+               unlock_rmap(rmap);
 
        return ret;
 }
@@ -300,8 +489,9 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu,
 {
        struct kvmppc_spapr_tce_table *stt;
        long i, ret;
+       struct kvmppc_spapr_tce_iommu_table *stit;
 
-       stt = kvmppc_find_table(vcpu, liobn);
+       stt = kvmppc_find_table(vcpu->kvm, liobn);
        if (!stt)
                return H_TOO_HARD;
 
@@ -313,6 +503,24 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu,
        if (tce_value & (TCE_PCI_WRITE | TCE_PCI_READ))
                return H_PARAMETER;
 
+       list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
+               unsigned long entry = ioba >> stit->tbl->it_page_shift;
+
+               for (i = 0; i < npages; ++i) {
+                       ret = kvmppc_rm_tce_iommu_unmap(vcpu->kvm,
+                                       stit->tbl, entry + i);
+
+                       if (ret == H_SUCCESS)
+                               continue;
+
+                       if (ret == H_TOO_HARD)
+                               return ret;
+
+                       WARN_ON_ONCE_RM(1);
+                       kvmppc_rm_clear_tce(stit->tbl, entry);
+               }
+       }
+
        for (i = 0; i < npages; ++i, ioba += (1ULL << stt->page_shift))
                kvmppc_tce_put(stt, ioba >> stt->page_shift, tce_value);
 
@@ -322,12 +530,13 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu,
 long kvmppc_h_get_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
                      unsigned long ioba)
 {
-       struct kvmppc_spapr_tce_table *stt = kvmppc_find_table(vcpu, liobn);
+       struct kvmppc_spapr_tce_table *stt;
        long ret;
        unsigned long idx;
        struct page *page;
        u64 *tbl;
 
+       stt = kvmppc_find_table(vcpu->kvm, liobn);
        if (!stt)
                return H_TOO_HARD;
 
index 8359752b3efcc616aa2509ee1bc0ae68f58deb44..68d68983948e13813221627e0dcfe743d1c91037 100644 (file)
@@ -503,10 +503,18 @@ int kvmppc_core_emulate_mtspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
                break;
 unprivileged:
        default:
-               printk(KERN_INFO "KVM: invalid SPR write: %d\n", sprn);
-#ifndef DEBUG_SPR
-               emulated = EMULATE_FAIL;
-#endif
+               pr_info_ratelimited("KVM: invalid SPR write: %d\n", sprn);
+               if (sprn & 0x10) {
+                       if (kvmppc_get_msr(vcpu) & MSR_PR) {
+                               kvmppc_core_queue_program(vcpu, SRR1_PROGPRIV);
+                               emulated = EMULATE_AGAIN;
+                       }
+               } else {
+                       if ((kvmppc_get_msr(vcpu) & MSR_PR) || sprn == 0) {
+                               kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
+                               emulated = EMULATE_AGAIN;
+                       }
+               }
                break;
        }
 
@@ -648,10 +656,20 @@ int kvmppc_core_emulate_mfspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val
                break;
        default:
 unprivileged:
-               printk(KERN_INFO "KVM: invalid SPR read: %d\n", sprn);
-#ifndef DEBUG_SPR
-               emulated = EMULATE_FAIL;
-#endif
+               pr_info_ratelimited("KVM: invalid SPR read: %d\n", sprn);
+               if (sprn & 0x10) {
+                       if (kvmppc_get_msr(vcpu) & MSR_PR) {
+                               kvmppc_core_queue_program(vcpu, SRR1_PROGPRIV);
+                               emulated = EMULATE_AGAIN;
+                       }
+               } else {
+                       if ((kvmppc_get_msr(vcpu) & MSR_PR) || sprn == 0 ||
+                           sprn == 4 || sprn == 5 || sprn == 6) {
+                               kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
+                               emulated = EMULATE_AGAIN;
+                       }
+               }
+
                break;
        }
 
index 1ec86d9e2a82a32b28cd175951048b3d2cef118e..06b7d8ae27e52240ed2670d0dae759165e48afb6 100644 (file)
@@ -3624,11 +3624,9 @@ static int kvmppc_clr_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi)
                return -EIO;
 
        mutex_lock(&kvm->lock);
+       if (!kvm->arch.pimap)
+               goto unlock;
 
-       if (kvm->arch.pimap == NULL) {
-               mutex_unlock(&kvm->lock);
-               return 0;
-       }
        pimap = kvm->arch.pimap;
 
        for (i = 0; i < pimap->n_mapped; i++) {
@@ -3650,7 +3648,7 @@ static int kvmppc_clr_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi)
         * We don't free this structure even when the count goes to
         * zero. The structure is freed when we destroy the VM.
         */
-
+ unlock:
        mutex_unlock(&kvm->lock);
        return 0;
 }
index d4dfc0ca2a4444f56b0b47a38ae50e5f1fe23067..f026b062c0ed22881b000d5bff5605f23bb8f5ed 100644 (file)
@@ -537,8 +537,7 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
        int r = RESUME_GUEST;
        int relocated;
        int page_found = 0;
-       struct kvmppc_pte pte;
-       bool is_mmio = false;
+       struct kvmppc_pte pte = { 0 };
        bool dr = (kvmppc_get_msr(vcpu) & MSR_DR) ? true : false;
        bool ir = (kvmppc_get_msr(vcpu) & MSR_IR) ? true : false;
        u64 vsid;
@@ -616,8 +615,7 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
                /* Page not found in guest SLB */
                kvmppc_set_dar(vcpu, kvmppc_get_fault_dar(vcpu));
                kvmppc_book3s_queue_irqprio(vcpu, vec + 0x80);
-       } else if (!is_mmio &&
-                  kvmppc_visible_gpa(vcpu, pte.raddr)) {
+       } else if (kvmppc_visible_gpa(vcpu, pte.raddr)) {
                if (data && !(vcpu->arch.fault_dsisr & DSISR_NOHPTE)) {
                        /*
                         * There is already a host HPTE there, presumably
@@ -627,7 +625,11 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
                        kvmppc_mmu_unmap_page(vcpu, &pte);
                }
                /* The guest's PTE is not mapped yet. Map on the host */
-               kvmppc_mmu_map_page(vcpu, &pte, iswrite);
+               if (kvmppc_mmu_map_page(vcpu, &pte, iswrite) == -EIO) {
+                       /* Exit KVM if mapping failed */
+                       run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+                       return RESUME_HOST;
+               }
                if (data)
                        vcpu->stat.sp_storage++;
                else if (vcpu->arch.mmu.is_dcbz32(vcpu) &&
index 0514cbd4e533ac01834df77ba12090495ddb5f09..3c296c2eacf8b91404507044b9ebe3da5e78f81b 100644 (file)
@@ -300,6 +300,11 @@ void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong esr_flags)
        kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_PROGRAM);
 }
 
+void kvmppc_core_queue_fpunavail(struct kvm_vcpu *vcpu)
+{
+       kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_FP_UNAVAIL);
+}
+
 void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu)
 {
        kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DECREMENTER);
index 0fda4230f6c0f878f7cf2e0957ae941d489326fd..77fd043b3ecc50857ac25cc266fd559be715e9de 100644 (file)
@@ -797,9 +797,8 @@ int e500_mmu_host_init(struct kvmppc_vcpu_e500 *vcpu_e500)
        host_tlb_params[0].sets =
                host_tlb_params[0].entries / host_tlb_params[0].ways;
        host_tlb_params[1].sets = 1;
-
-       vcpu_e500->h2g_tlb1_rmap = kzalloc(sizeof(unsigned int) *
-                                          host_tlb_params[1].entries,
+       vcpu_e500->h2g_tlb1_rmap = kcalloc(host_tlb_params[1].entries,
+                                          sizeof(*vcpu_e500->h2g_tlb1_rmap),
                                           GFP_KERNEL);
        if (!vcpu_e500->h2g_tlb1_rmap)
                return -EINVAL;
index b379146de55bf13d76aa5882dd342148db687fb8..c873ffe553624f32ec8e5619981cd12e68fcb61c 100644 (file)
@@ -259,10 +259,18 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
 
                case OP_31_XOP_MFSPR:
                        emulated = kvmppc_emulate_mfspr(vcpu, sprn, rt);
+                       if (emulated == EMULATE_AGAIN) {
+                               emulated = EMULATE_DONE;
+                               advance = 0;
+                       }
                        break;
 
                case OP_31_XOP_MTSPR:
                        emulated = kvmppc_emulate_mtspr(vcpu, sprn, rs);
+                       if (emulated == EMULATE_AGAIN) {
+                               emulated = EMULATE_DONE;
+                               advance = 0;
+                       }
                        break;
 
                case OP_31_XOP_TLBSYNC:
index 6d3c0ee1d744bbd37c3f7052f9415afee687915e..af833531af31912e727e39c5146d966c3b4609ee 100644 (file)
 #include "timing.h"
 #include "trace.h"
 
-/* XXX to do:
- * lhax
- * lhaux
- * lswx
- * lswi
- * stswx
- * stswi
- * lha
- * lhau
- * lmw
- * stmw
+#ifdef CONFIG_PPC_FPU
+static bool kvmppc_check_fp_disabled(struct kvm_vcpu *vcpu)
+{
+       if (!(kvmppc_get_msr(vcpu) & MSR_FP)) {
+               kvmppc_core_queue_fpunavail(vcpu);
+               return true;
+       }
+
+       return false;
+}
+#endif /* CONFIG_PPC_FPU */
+
+#ifdef CONFIG_VSX
+static bool kvmppc_check_vsx_disabled(struct kvm_vcpu *vcpu)
+{
+       if (!(kvmppc_get_msr(vcpu) & MSR_VSX)) {
+               kvmppc_core_queue_vsx_unavail(vcpu);
+               return true;
+       }
+
+       return false;
+}
+#endif /* CONFIG_VSX */
+
+/*
+ * XXX to do:
+ * lfiwax, lfiwzx
+ * vector loads and stores
  *
+ * Instructions that trap when used on cache-inhibited mappings
+ * are not emulated here: multiple and string instructions,
+ * lq/stq, and the load-reserve/store-conditional instructions.
  */
 int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
 {
@@ -66,6 +86,19 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
        rs = get_rs(inst);
        rt = get_rt(inst);
 
+       /*
+        * if mmio_vsx_tx_sx_enabled == 0, copy data between
+        * VSR[0..31] and memory
+        * if mmio_vsx_tx_sx_enabled == 1, copy data between
+        * VSR[32..63] and memory
+        */
+       vcpu->arch.mmio_vsx_tx_sx_enabled = get_tx_or_sx(inst);
+       vcpu->arch.mmio_vsx_copy_nums = 0;
+       vcpu->arch.mmio_vsx_offset = 0;
+       vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_NONE;
+       vcpu->arch.mmio_sp64_extend = 0;
+       vcpu->arch.mmio_sign_extend = 0;
+
        switch (get_op(inst)) {
        case 31:
                switch (get_xop(inst)) {
@@ -73,6 +106,11 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
                        emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1);
                        break;
 
+               case OP_31_XOP_LWZUX:
+                       emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1);
+                       kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
+                       break;
+
                case OP_31_XOP_LBZX:
                        emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
                        break;
@@ -82,22 +120,36 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
                        kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
                        break;
 
+               case OP_31_XOP_STDX:
+                       emulated = kvmppc_handle_store(run, vcpu,
+                                       kvmppc_get_gpr(vcpu, rs), 8, 1);
+                       break;
+
+               case OP_31_XOP_STDUX:
+                       emulated = kvmppc_handle_store(run, vcpu,
+                                       kvmppc_get_gpr(vcpu, rs), 8, 1);
+                       kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
+                       break;
+
                case OP_31_XOP_STWX:
                        emulated = kvmppc_handle_store(run, vcpu,
-                                                      kvmppc_get_gpr(vcpu, rs),
-                                                      4, 1);
+                                       kvmppc_get_gpr(vcpu, rs), 4, 1);
+                       break;
+
+               case OP_31_XOP_STWUX:
+                       emulated = kvmppc_handle_store(run, vcpu,
+                                       kvmppc_get_gpr(vcpu, rs), 4, 1);
+                       kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
                        break;
 
                case OP_31_XOP_STBX:
                        emulated = kvmppc_handle_store(run, vcpu,
-                                                      kvmppc_get_gpr(vcpu, rs),
-                                                      1, 1);
+                                       kvmppc_get_gpr(vcpu, rs), 1, 1);
                        break;
 
                case OP_31_XOP_STBUX:
                        emulated = kvmppc_handle_store(run, vcpu,
-                                                      kvmppc_get_gpr(vcpu, rs),
-                                                      1, 1);
+                                       kvmppc_get_gpr(vcpu, rs), 1, 1);
                        kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
                        break;
 
@@ -105,6 +157,11 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
                        emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1);
                        break;
 
+               case OP_31_XOP_LHAUX:
+                       emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1);
+                       kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
+                       break;
+
                case OP_31_XOP_LHZX:
                        emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
                        break;
@@ -116,14 +173,12 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
 
                case OP_31_XOP_STHX:
                        emulated = kvmppc_handle_store(run, vcpu,
-                                                      kvmppc_get_gpr(vcpu, rs),
-                                                      2, 1);
+                                       kvmppc_get_gpr(vcpu, rs), 2, 1);
                        break;
 
                case OP_31_XOP_STHUX:
                        emulated = kvmppc_handle_store(run, vcpu,
-                                                      kvmppc_get_gpr(vcpu, rs),
-                                                      2, 1);
+                                       kvmppc_get_gpr(vcpu, rs), 2, 1);
                        kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
                        break;
 
@@ -143,8 +198,7 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
 
                case OP_31_XOP_STWBRX:
                        emulated = kvmppc_handle_store(run, vcpu,
-                                                      kvmppc_get_gpr(vcpu, rs),
-                                                      4, 0);
+                                       kvmppc_get_gpr(vcpu, rs), 4, 0);
                        break;
 
                case OP_31_XOP_LHBRX:
@@ -153,10 +207,258 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
 
                case OP_31_XOP_STHBRX:
                        emulated = kvmppc_handle_store(run, vcpu,
-                                                      kvmppc_get_gpr(vcpu, rs),
-                                                      2, 0);
+                                       kvmppc_get_gpr(vcpu, rs), 2, 0);
+                       break;
+
+               case OP_31_XOP_LDBRX:
+                       emulated = kvmppc_handle_load(run, vcpu, rt, 8, 0);
+                       break;
+
+               case OP_31_XOP_STDBRX:
+                       emulated = kvmppc_handle_store(run, vcpu,
+                                       kvmppc_get_gpr(vcpu, rs), 8, 0);
+                       break;
+
+               case OP_31_XOP_LDX:
+                       emulated = kvmppc_handle_load(run, vcpu, rt, 8, 1);
+                       break;
+
+               case OP_31_XOP_LDUX:
+                       emulated = kvmppc_handle_load(run, vcpu, rt, 8, 1);
+                       kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
+                       break;
+
+               case OP_31_XOP_LWAX:
+                       emulated = kvmppc_handle_loads(run, vcpu, rt, 4, 1);
+                       break;
+
+               case OP_31_XOP_LWAUX:
+                       emulated = kvmppc_handle_loads(run, vcpu, rt, 4, 1);
+                       kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
+                       break;
+
+#ifdef CONFIG_PPC_FPU
+               case OP_31_XOP_LFSX:
+                       if (kvmppc_check_fp_disabled(vcpu))
+                               return EMULATE_DONE;
+                       vcpu->arch.mmio_sp64_extend = 1;
+                       emulated = kvmppc_handle_load(run, vcpu,
+                               KVM_MMIO_REG_FPR|rt, 4, 1);
+                       break;
+
+               case OP_31_XOP_LFSUX:
+                       if (kvmppc_check_fp_disabled(vcpu))
+                               return EMULATE_DONE;
+                       vcpu->arch.mmio_sp64_extend = 1;
+                       emulated = kvmppc_handle_load(run, vcpu,
+                               KVM_MMIO_REG_FPR|rt, 4, 1);
+                       kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
+                       break;
+
+               case OP_31_XOP_LFDX:
+                       if (kvmppc_check_fp_disabled(vcpu))
+                               return EMULATE_DONE;
+                       emulated = kvmppc_handle_load(run, vcpu,
+                               KVM_MMIO_REG_FPR|rt, 8, 1);
+                       break;
+
+               case OP_31_XOP_LFDUX:
+                       if (kvmppc_check_fp_disabled(vcpu))
+                               return EMULATE_DONE;
+                       emulated = kvmppc_handle_load(run, vcpu,
+                               KVM_MMIO_REG_FPR|rt, 8, 1);
+                       kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
+                       break;
+
+               case OP_31_XOP_LFIWAX:
+                       if (kvmppc_check_fp_disabled(vcpu))
+                               return EMULATE_DONE;
+                       emulated = kvmppc_handle_loads(run, vcpu,
+                               KVM_MMIO_REG_FPR|rt, 4, 1);
+                       break;
+
+               case OP_31_XOP_LFIWZX:
+                       if (kvmppc_check_fp_disabled(vcpu))
+                               return EMULATE_DONE;
+                       emulated = kvmppc_handle_load(run, vcpu,
+                               KVM_MMIO_REG_FPR|rt, 4, 1);
+                       break;
+
+               case OP_31_XOP_STFSX:
+                       if (kvmppc_check_fp_disabled(vcpu))
+                               return EMULATE_DONE;
+                       vcpu->arch.mmio_sp64_extend = 1;
+                       emulated = kvmppc_handle_store(run, vcpu,
+                               VCPU_FPR(vcpu, rs), 4, 1);
+                       break;
+
+               case OP_31_XOP_STFSUX:
+                       if (kvmppc_check_fp_disabled(vcpu))
+                               return EMULATE_DONE;
+                       vcpu->arch.mmio_sp64_extend = 1;
+                       emulated = kvmppc_handle_store(run, vcpu,
+                               VCPU_FPR(vcpu, rs), 4, 1);
+                       kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
+                       break;
+
+               case OP_31_XOP_STFDX:
+                       if (kvmppc_check_fp_disabled(vcpu))
+                               return EMULATE_DONE;
+                       emulated = kvmppc_handle_store(run, vcpu,
+                               VCPU_FPR(vcpu, rs), 8, 1);
+                       break;
+
+               case OP_31_XOP_STFDUX:
+                       if (kvmppc_check_fp_disabled(vcpu))
+                               return EMULATE_DONE;
+                       emulated = kvmppc_handle_store(run, vcpu,
+                               VCPU_FPR(vcpu, rs), 8, 1);
+                       kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
+                       break;
+
+               case OP_31_XOP_STFIWX:
+                       if (kvmppc_check_fp_disabled(vcpu))
+                               return EMULATE_DONE;
+                       emulated = kvmppc_handle_store(run, vcpu,
+                               VCPU_FPR(vcpu, rs), 4, 1);
+                       break;
+#endif
+
+#ifdef CONFIG_VSX
+               case OP_31_XOP_LXSDX:
+                       if (kvmppc_check_vsx_disabled(vcpu))
+                               return EMULATE_DONE;
+                       vcpu->arch.mmio_vsx_copy_nums = 1;
+                       vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_DWORD;
+                       emulated = kvmppc_handle_vsx_load(run, vcpu,
+                               KVM_MMIO_REG_VSX|rt, 8, 1, 0);
+                       break;
+
+               case OP_31_XOP_LXSSPX:
+                       if (kvmppc_check_vsx_disabled(vcpu))
+                               return EMULATE_DONE;
+                       vcpu->arch.mmio_vsx_copy_nums = 1;
+                       vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_DWORD;
+                       vcpu->arch.mmio_sp64_extend = 1;
+                       emulated = kvmppc_handle_vsx_load(run, vcpu,
+                               KVM_MMIO_REG_VSX|rt, 4, 1, 0);
+                       break;
+
+               case OP_31_XOP_LXSIWAX:
+                       if (kvmppc_check_vsx_disabled(vcpu))
+                               return EMULATE_DONE;
+                       vcpu->arch.mmio_vsx_copy_nums = 1;
+                       vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_DWORD;
+                       emulated = kvmppc_handle_vsx_load(run, vcpu,
+                               KVM_MMIO_REG_VSX|rt, 4, 1, 1);
+                       break;
+
+               case OP_31_XOP_LXSIWZX:
+                       if (kvmppc_check_vsx_disabled(vcpu))
+                               return EMULATE_DONE;
+                       vcpu->arch.mmio_vsx_copy_nums = 1;
+                       vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_DWORD;
+                       emulated = kvmppc_handle_vsx_load(run, vcpu,
+                               KVM_MMIO_REG_VSX|rt, 4, 1, 0);
+                       break;
+
+               case OP_31_XOP_LXVD2X:
+               /*
+                * In this case, the official load/store process is like this:
+                * Step1, exit from vm by page fault isr, then kvm save vsr.
+                * Please see guest_exit_cont->store_fp_state->SAVE_32VSRS
+                * as reference.
+                *
+                * Step2, copy data between memory and VCPU
+                * Notice: for LXVD2X/STXVD2X/LXVW4X/STXVW4X, we use
+                * 2copies*8bytes or 4copies*4bytes
+                * to simulate one copy of 16bytes.
+                * Also there is an endian issue here, we should notice the
+                * layout of memory.
+                * Please see MARCO of LXVD2X_ROT/STXVD2X_ROT as more reference.
+                * If host is little-endian, kvm will call XXSWAPD for
+                * LXVD2X_ROT/STXVD2X_ROT.
+                * So, if host is little-endian,
+                * the postion of memeory should be swapped.
+                *
+                * Step3, return to guest, kvm reset register.
+                * Please see kvmppc_hv_entry->load_fp_state->REST_32VSRS
+                * as reference.
+                */
+                       if (kvmppc_check_vsx_disabled(vcpu))
+                               return EMULATE_DONE;
+                       vcpu->arch.mmio_vsx_copy_nums = 2;
+                       vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_DWORD;
+                       emulated = kvmppc_handle_vsx_load(run, vcpu,
+                               KVM_MMIO_REG_VSX|rt, 8, 1, 0);
+                       break;
+
+               case OP_31_XOP_LXVW4X:
+                       if (kvmppc_check_vsx_disabled(vcpu))
+                               return EMULATE_DONE;
+                       vcpu->arch.mmio_vsx_copy_nums = 4;
+                       vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_WORD;
+                       emulated = kvmppc_handle_vsx_load(run, vcpu,
+                               KVM_MMIO_REG_VSX|rt, 4, 1, 0);
+                       break;
+
+               case OP_31_XOP_LXVDSX:
+                       if (kvmppc_check_vsx_disabled(vcpu))
+                               return EMULATE_DONE;
+                       vcpu->arch.mmio_vsx_copy_nums = 1;
+                       vcpu->arch.mmio_vsx_copy_type =
+                                KVMPPC_VSX_COPY_DWORD_LOAD_DUMP;
+                       emulated = kvmppc_handle_vsx_load(run, vcpu,
+                               KVM_MMIO_REG_VSX|rt, 8, 1, 0);
+                       break;
+
+               case OP_31_XOP_STXSDX:
+                       if (kvmppc_check_vsx_disabled(vcpu))
+                               return EMULATE_DONE;
+                       vcpu->arch.mmio_vsx_copy_nums = 1;
+                       vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_DWORD;
+                       emulated = kvmppc_handle_vsx_store(run, vcpu,
+                                                rs, 8, 1);
                        break;
 
+               case OP_31_XOP_STXSSPX:
+                       if (kvmppc_check_vsx_disabled(vcpu))
+                               return EMULATE_DONE;
+                       vcpu->arch.mmio_vsx_copy_nums = 1;
+                       vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_DWORD;
+                       vcpu->arch.mmio_sp64_extend = 1;
+                       emulated = kvmppc_handle_vsx_store(run, vcpu,
+                                                rs, 4, 1);
+                       break;
+
+               case OP_31_XOP_STXSIWX:
+                       if (kvmppc_check_vsx_disabled(vcpu))
+                               return EMULATE_DONE;
+                       vcpu->arch.mmio_vsx_offset = 1;
+                       vcpu->arch.mmio_vsx_copy_nums = 1;
+                       vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_WORD;
+                       emulated = kvmppc_handle_vsx_store(run, vcpu,
+                                                        rs, 4, 1);
+                       break;
+
+               case OP_31_XOP_STXVD2X:
+                       if (kvmppc_check_vsx_disabled(vcpu))
+                               return EMULATE_DONE;
+                       vcpu->arch.mmio_vsx_copy_nums = 2;
+                       vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_DWORD;
+                       emulated = kvmppc_handle_vsx_store(run, vcpu,
+                                                        rs, 8, 1);
+                       break;
+
+               case OP_31_XOP_STXVW4X:
+                       if (kvmppc_check_vsx_disabled(vcpu))
+                               return EMULATE_DONE;
+                       vcpu->arch.mmio_vsx_copy_nums = 4;
+                       vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_WORD;
+                       emulated = kvmppc_handle_vsx_store(run, vcpu,
+                                                        rs, 4, 1);
+                       break;
+#endif /* CONFIG_VSX */
                default:
                        emulated = EMULATE_FAIL;
                        break;
@@ -167,10 +469,60 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
                emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1);
                break;
 
-       /* TBD: Add support for other 64 bit load variants like ldu, ldux, ldx etc. */
+#ifdef CONFIG_PPC_FPU
+       case OP_STFS:
+               if (kvmppc_check_fp_disabled(vcpu))
+                       return EMULATE_DONE;
+               vcpu->arch.mmio_sp64_extend = 1;
+               emulated = kvmppc_handle_store(run, vcpu,
+                       VCPU_FPR(vcpu, rs),
+                       4, 1);
+               break;
+
+       case OP_STFSU:
+               if (kvmppc_check_fp_disabled(vcpu))
+                       return EMULATE_DONE;
+               vcpu->arch.mmio_sp64_extend = 1;
+               emulated = kvmppc_handle_store(run, vcpu,
+                       VCPU_FPR(vcpu, rs),
+                       4, 1);
+               kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
+               break;
+
+       case OP_STFD:
+               if (kvmppc_check_fp_disabled(vcpu))
+                       return EMULATE_DONE;
+               emulated = kvmppc_handle_store(run, vcpu,
+                       VCPU_FPR(vcpu, rs),
+                                      8, 1);
+               break;
+
+       case OP_STFDU:
+               if (kvmppc_check_fp_disabled(vcpu))
+                       return EMULATE_DONE;
+               emulated = kvmppc_handle_store(run, vcpu,
+                       VCPU_FPR(vcpu, rs),
+                                      8, 1);
+               kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
+               break;
+#endif
+
        case OP_LD:
                rt = get_rt(inst);
-               emulated = kvmppc_handle_load(run, vcpu, rt, 8, 1);
+               switch (inst & 3) {
+               case 0: /* ld */
+                       emulated = kvmppc_handle_load(run, vcpu, rt, 8, 1);
+                       break;
+               case 1: /* ldu */
+                       emulated = kvmppc_handle_load(run, vcpu, rt, 8, 1);
+                       kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
+                       break;
+               case 2: /* lwa */
+                       emulated = kvmppc_handle_loads(run, vcpu, rt, 4, 1);
+                       break;
+               default:
+                       emulated = EMULATE_FAIL;
+               }
                break;
 
        case OP_LWZU:
@@ -193,31 +545,37 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
                                               4, 1);
                break;
 
-       /* TBD: Add support for other 64 bit store variants like stdu, stdux, stdx etc. */
        case OP_STD:
                rs = get_rs(inst);
-               emulated = kvmppc_handle_store(run, vcpu,
-                                              kvmppc_get_gpr(vcpu, rs),
-                                              8, 1);
+               switch (inst & 3) {
+               case 0: /* std */
+                       emulated = kvmppc_handle_store(run, vcpu,
+                               kvmppc_get_gpr(vcpu, rs), 8, 1);
+                       break;
+               case 1: /* stdu */
+                       emulated = kvmppc_handle_store(run, vcpu,
+                               kvmppc_get_gpr(vcpu, rs), 8, 1);
+                       kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
+                       break;
+               default:
+                       emulated = EMULATE_FAIL;
+               }
                break;
 
        case OP_STWU:
                emulated = kvmppc_handle_store(run, vcpu,
-                                              kvmppc_get_gpr(vcpu, rs),
-                                              4, 1);
+                               kvmppc_get_gpr(vcpu, rs), 4, 1);
                kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
                break;
 
        case OP_STB:
                emulated = kvmppc_handle_store(run, vcpu,
-                                              kvmppc_get_gpr(vcpu, rs),
-                                              1, 1);
+                               kvmppc_get_gpr(vcpu, rs), 1, 1);
                break;
 
        case OP_STBU:
                emulated = kvmppc_handle_store(run, vcpu,
-                                              kvmppc_get_gpr(vcpu, rs),
-                                              1, 1);
+                               kvmppc_get_gpr(vcpu, rs), 1, 1);
                kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
                break;
 
@@ -241,16 +599,48 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
 
        case OP_STH:
                emulated = kvmppc_handle_store(run, vcpu,
-                                              kvmppc_get_gpr(vcpu, rs),
-                                              2, 1);
+                               kvmppc_get_gpr(vcpu, rs), 2, 1);
                break;
 
        case OP_STHU:
                emulated = kvmppc_handle_store(run, vcpu,
-                                              kvmppc_get_gpr(vcpu, rs),
-                                              2, 1);
+                               kvmppc_get_gpr(vcpu, rs), 2, 1);
+               kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
+               break;
+
+#ifdef CONFIG_PPC_FPU
+       case OP_LFS:
+               if (kvmppc_check_fp_disabled(vcpu))
+                       return EMULATE_DONE;
+               vcpu->arch.mmio_sp64_extend = 1;
+               emulated = kvmppc_handle_load(run, vcpu,
+                       KVM_MMIO_REG_FPR|rt, 4, 1);
+               break;
+
+       case OP_LFSU:
+               if (kvmppc_check_fp_disabled(vcpu))
+                       return EMULATE_DONE;
+               vcpu->arch.mmio_sp64_extend = 1;
+               emulated = kvmppc_handle_load(run, vcpu,
+                       KVM_MMIO_REG_FPR|rt, 4, 1);
+               kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
+               break;
+
+       case OP_LFD:
+               if (kvmppc_check_fp_disabled(vcpu))
+                       return EMULATE_DONE;
+               emulated = kvmppc_handle_load(run, vcpu,
+                       KVM_MMIO_REG_FPR|rt, 8, 1);
+               break;
+
+       case OP_LFDU:
+               if (kvmppc_check_fp_disabled(vcpu))
+                       return EMULATE_DONE;
+               emulated = kvmppc_handle_load(run, vcpu,
+                       KVM_MMIO_REG_FPR|rt, 8, 1);
                kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
                break;
+#endif
 
        default:
                emulated = EMULATE_FAIL;
index 0e42aa8a279f323d7728c993251c65b788ef27ae..cf725c580fc5771743bdf3b66dfd7569f78bfc87 100644 (file)
@@ -37,6 +37,7 @@
 #include <asm/cputhreads.h>
 #include <asm/irqflags.h>
 #include <asm/iommu.h>
+#include <asm/switch_to.h>
 #include "timing.h"
 #include "irq.h"
 #include "../mm/mmu_decl.h"
@@ -533,6 +534,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 #ifdef CONFIG_PPC_BOOK3S_64
        case KVM_CAP_SPAPR_TCE:
        case KVM_CAP_SPAPR_TCE_64:
+               /* fallthrough */
+       case KVM_CAP_SPAPR_TCE_VFIO:
        case KVM_CAP_PPC_RTAS:
        case KVM_CAP_PPC_FIXUP_HCALL:
        case KVM_CAP_PPC_ENABLE_HCALL:
@@ -801,6 +804,129 @@ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
                kvm->arch.kvm_ops->irq_bypass_del_producer(cons, prod);
 }
 
+#ifdef CONFIG_VSX
+static inline int kvmppc_get_vsr_dword_offset(int index)
+{
+       int offset;
+
+       if ((index != 0) && (index != 1))
+               return -1;
+
+#ifdef __BIG_ENDIAN
+       offset =  index;
+#else
+       offset = 1 - index;
+#endif
+
+       return offset;
+}
+
+static inline int kvmppc_get_vsr_word_offset(int index)
+{
+       int offset;
+
+       if ((index > 3) || (index < 0))
+               return -1;
+
+#ifdef __BIG_ENDIAN
+       offset = index;
+#else
+       offset = 3 - index;
+#endif
+       return offset;
+}
+
+static inline void kvmppc_set_vsr_dword(struct kvm_vcpu *vcpu,
+       u64 gpr)
+{
+       union kvmppc_one_reg val;
+       int offset = kvmppc_get_vsr_dword_offset(vcpu->arch.mmio_vsx_offset);
+       int index = vcpu->arch.io_gpr & KVM_MMIO_REG_MASK;
+
+       if (offset == -1)
+               return;
+
+       if (vcpu->arch.mmio_vsx_tx_sx_enabled) {
+               val.vval = VCPU_VSX_VR(vcpu, index);
+               val.vsxval[offset] = gpr;
+               VCPU_VSX_VR(vcpu, index) = val.vval;
+       } else {
+               VCPU_VSX_FPR(vcpu, index, offset) = gpr;
+       }
+}
+
+static inline void kvmppc_set_vsr_dword_dump(struct kvm_vcpu *vcpu,
+       u64 gpr)
+{
+       union kvmppc_one_reg val;
+       int index = vcpu->arch.io_gpr & KVM_MMIO_REG_MASK;
+
+       if (vcpu->arch.mmio_vsx_tx_sx_enabled) {
+               val.vval = VCPU_VSX_VR(vcpu, index);
+               val.vsxval[0] = gpr;
+               val.vsxval[1] = gpr;
+               VCPU_VSX_VR(vcpu, index) = val.vval;
+       } else {
+               VCPU_VSX_FPR(vcpu, index, 0) = gpr;
+               VCPU_VSX_FPR(vcpu, index, 1) = gpr;
+       }
+}
+
+static inline void kvmppc_set_vsr_word(struct kvm_vcpu *vcpu,
+       u32 gpr32)
+{
+       union kvmppc_one_reg val;
+       int offset = kvmppc_get_vsr_word_offset(vcpu->arch.mmio_vsx_offset);
+       int index = vcpu->arch.io_gpr & KVM_MMIO_REG_MASK;
+       int dword_offset, word_offset;
+
+       if (offset == -1)
+               return;
+
+       if (vcpu->arch.mmio_vsx_tx_sx_enabled) {
+               val.vval = VCPU_VSX_VR(vcpu, index);
+               val.vsx32val[offset] = gpr32;
+               VCPU_VSX_VR(vcpu, index) = val.vval;
+       } else {
+               dword_offset = offset / 2;
+               word_offset = offset % 2;
+               val.vsxval[0] = VCPU_VSX_FPR(vcpu, index, dword_offset);
+               val.vsx32val[word_offset] = gpr32;
+               VCPU_VSX_FPR(vcpu, index, dword_offset) = val.vsxval[0];
+       }
+}
+#endif /* CONFIG_VSX */
+
+#ifdef CONFIG_PPC_FPU
+static inline u64 sp_to_dp(u32 fprs)
+{
+       u64 fprd;
+
+       preempt_disable();
+       enable_kernel_fp();
+       asm ("lfs%U1%X1 0,%1; stfd%U0%X0 0,%0" : "=m" (fprd) : "m" (fprs)
+            : "fr0");
+       preempt_enable();
+       return fprd;
+}
+
+static inline u32 dp_to_sp(u64 fprd)
+{
+       u32 fprs;
+
+       preempt_disable();
+       enable_kernel_fp();
+       asm ("lfd%U1%X1 0,%1; stfs%U0%X0 0,%0" : "=m" (fprs) : "m" (fprd)
+            : "fr0");
+       preempt_enable();
+       return fprs;
+}
+
+#else
+#define sp_to_dp(x)    (x)
+#define dp_to_sp(x)    (x)
+#endif /* CONFIG_PPC_FPU */
+
 static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu,
                                       struct kvm_run *run)
 {
@@ -827,6 +953,10 @@ static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu,
                }
        }
 
+       /* conversion between single and double precision */
+       if ((vcpu->arch.mmio_sp64_extend) && (run->mmio.len == 4))
+               gpr = sp_to_dp(gpr);
+
        if (vcpu->arch.mmio_sign_extend) {
                switch (run->mmio.len) {
 #ifdef CONFIG_PPC64
@@ -843,8 +973,6 @@ static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu,
                }
        }
 
-       kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, gpr);
-
        switch (vcpu->arch.io_gpr & KVM_MMIO_REG_EXT_MASK) {
        case KVM_MMIO_REG_GPR:
                kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, gpr);
@@ -860,6 +988,17 @@ static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu,
                VCPU_FPR(vcpu, vcpu->arch.io_gpr & KVM_MMIO_REG_MASK) = gpr;
                vcpu->arch.qpr[vcpu->arch.io_gpr & KVM_MMIO_REG_MASK] = gpr;
                break;
+#endif
+#ifdef CONFIG_VSX
+       case KVM_MMIO_REG_VSX:
+               if (vcpu->arch.mmio_vsx_copy_type == KVMPPC_VSX_COPY_DWORD)
+                       kvmppc_set_vsr_dword(vcpu, gpr);
+               else if (vcpu->arch.mmio_vsx_copy_type == KVMPPC_VSX_COPY_WORD)
+                       kvmppc_set_vsr_word(vcpu, gpr);
+               else if (vcpu->arch.mmio_vsx_copy_type ==
+                               KVMPPC_VSX_COPY_DWORD_LOAD_DUMP)
+                       kvmppc_set_vsr_dword_dump(vcpu, gpr);
+               break;
 #endif
        default:
                BUG();
@@ -927,6 +1066,35 @@ int kvmppc_handle_loads(struct kvm_run *run, struct kvm_vcpu *vcpu,
        return __kvmppc_handle_load(run, vcpu, rt, bytes, is_default_endian, 1);
 }
 
+#ifdef CONFIG_VSX
+int kvmppc_handle_vsx_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
+                       unsigned int rt, unsigned int bytes,
+                       int is_default_endian, int mmio_sign_extend)
+{
+       enum emulation_result emulated = EMULATE_DONE;
+
+       /* Currently, mmio_vsx_copy_nums only allowed to be less than 4 */
+       if ( (vcpu->arch.mmio_vsx_copy_nums > 4) ||
+               (vcpu->arch.mmio_vsx_copy_nums < 0) ) {
+               return EMULATE_FAIL;
+       }
+
+       while (vcpu->arch.mmio_vsx_copy_nums) {
+               emulated = __kvmppc_handle_load(run, vcpu, rt, bytes,
+                       is_default_endian, mmio_sign_extend);
+
+               if (emulated != EMULATE_DONE)
+                       break;
+
+               vcpu->arch.paddr_accessed += run->mmio.len;
+
+               vcpu->arch.mmio_vsx_copy_nums--;
+               vcpu->arch.mmio_vsx_offset++;
+       }
+       return emulated;
+}
+#endif /* CONFIG_VSX */
+
 int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
                        u64 val, unsigned int bytes, int is_default_endian)
 {
@@ -952,6 +1120,9 @@ int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
        vcpu->mmio_needed = 1;
        vcpu->mmio_is_write = 1;
 
+       if ((vcpu->arch.mmio_sp64_extend) && (bytes == 4))
+               val = dp_to_sp(val);
+
        /* Store the value at the lowest bytes in 'data'. */
        if (!host_swabbed) {
                switch (bytes) {
@@ -985,6 +1156,129 @@ int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
 }
 EXPORT_SYMBOL_GPL(kvmppc_handle_store);
 
+#ifdef CONFIG_VSX
+static inline int kvmppc_get_vsr_data(struct kvm_vcpu *vcpu, int rs, u64 *val)
+{
+       u32 dword_offset, word_offset;
+       union kvmppc_one_reg reg;
+       int vsx_offset = 0;
+       int copy_type = vcpu->arch.mmio_vsx_copy_type;
+       int result = 0;
+
+       switch (copy_type) {
+       case KVMPPC_VSX_COPY_DWORD:
+               vsx_offset =
+                       kvmppc_get_vsr_dword_offset(vcpu->arch.mmio_vsx_offset);
+
+               if (vsx_offset == -1) {
+                       result = -1;
+                       break;
+               }
+
+               if (!vcpu->arch.mmio_vsx_tx_sx_enabled) {
+                       *val = VCPU_VSX_FPR(vcpu, rs, vsx_offset);
+               } else {
+                       reg.vval = VCPU_VSX_VR(vcpu, rs);
+                       *val = reg.vsxval[vsx_offset];
+               }
+               break;
+
+       case KVMPPC_VSX_COPY_WORD:
+               vsx_offset =
+                       kvmppc_get_vsr_word_offset(vcpu->arch.mmio_vsx_offset);
+
+               if (vsx_offset == -1) {
+                       result = -1;
+                       break;
+               }
+
+               if (!vcpu->arch.mmio_vsx_tx_sx_enabled) {
+                       dword_offset = vsx_offset / 2;
+                       word_offset = vsx_offset % 2;
+                       reg.vsxval[0] = VCPU_VSX_FPR(vcpu, rs, dword_offset);
+                       *val = reg.vsx32val[word_offset];
+               } else {
+                       reg.vval = VCPU_VSX_VR(vcpu, rs);
+                       *val = reg.vsx32val[vsx_offset];
+               }
+               break;
+
+       default:
+               result = -1;
+               break;
+       }
+
+       return result;
+}
+
+int kvmppc_handle_vsx_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
+                       int rs, unsigned int bytes, int is_default_endian)
+{
+       u64 val;
+       enum emulation_result emulated = EMULATE_DONE;
+
+       vcpu->arch.io_gpr = rs;
+
+       /* Currently, mmio_vsx_copy_nums only allowed to be less than 4 */
+       if ( (vcpu->arch.mmio_vsx_copy_nums > 4) ||
+               (vcpu->arch.mmio_vsx_copy_nums < 0) ) {
+               return EMULATE_FAIL;
+       }
+
+       while (vcpu->arch.mmio_vsx_copy_nums) {
+               if (kvmppc_get_vsr_data(vcpu, rs, &val) == -1)
+                       return EMULATE_FAIL;
+
+               emulated = kvmppc_handle_store(run, vcpu,
+                        val, bytes, is_default_endian);
+
+               if (emulated != EMULATE_DONE)
+                       break;
+
+               vcpu->arch.paddr_accessed += run->mmio.len;
+
+               vcpu->arch.mmio_vsx_copy_nums--;
+               vcpu->arch.mmio_vsx_offset++;
+       }
+
+       return emulated;
+}
+
+static int kvmppc_emulate_mmio_vsx_loadstore(struct kvm_vcpu *vcpu,
+                       struct kvm_run *run)
+{
+       enum emulation_result emulated = EMULATE_FAIL;
+       int r;
+
+       vcpu->arch.paddr_accessed += run->mmio.len;
+
+       if (!vcpu->mmio_is_write) {
+               emulated = kvmppc_handle_vsx_load(run, vcpu, vcpu->arch.io_gpr,
+                        run->mmio.len, 1, vcpu->arch.mmio_sign_extend);
+       } else {
+               emulated = kvmppc_handle_vsx_store(run, vcpu,
+                        vcpu->arch.io_gpr, run->mmio.len, 1);
+       }
+
+       switch (emulated) {
+       case EMULATE_DO_MMIO:
+               run->exit_reason = KVM_EXIT_MMIO;
+               r = RESUME_HOST;
+               break;
+       case EMULATE_FAIL:
+               pr_info("KVM: MMIO emulation failed (VSX repeat)\n");
+               run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+               run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
+               r = RESUME_HOST;
+               break;
+       default:
+               r = RESUME_GUEST;
+               break;
+       }
+       return r;
+}
+#endif /* CONFIG_VSX */
+
 int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
 {
        int r = 0;
@@ -1087,13 +1381,24 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
        int r;
        sigset_t sigsaved;
 
-       if (vcpu->sigset_active)
-               sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
-
        if (vcpu->mmio_needed) {
+               vcpu->mmio_needed = 0;
                if (!vcpu->mmio_is_write)
                        kvmppc_complete_mmio_load(vcpu, run);
-               vcpu->mmio_needed = 0;
+#ifdef CONFIG_VSX
+               if (vcpu->arch.mmio_vsx_copy_nums > 0) {
+                       vcpu->arch.mmio_vsx_copy_nums--;
+                       vcpu->arch.mmio_vsx_offset++;
+               }
+
+               if (vcpu->arch.mmio_vsx_copy_nums > 0) {
+                       r = kvmppc_emulate_mmio_vsx_loadstore(vcpu, run);
+                       if (r == RESUME_HOST) {
+                               vcpu->mmio_needed = 1;
+                               return r;
+                       }
+               }
+#endif
        } else if (vcpu->arch.osi_needed) {
                u64 *gprs = run->osi.gprs;
                int i;
@@ -1115,6 +1420,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 #endif
        }
 
+       if (vcpu->sigset_active)
+               sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
+
        if (run->immediate_exit)
                r = -EINTR;
        else
index 497130c5c74203f9988d9d8efbb6bbc76757704c..fc67bd766eaf967df44dc172d8f1792a09d0608b 100644 (file)
@@ -314,6 +314,25 @@ struct mm_iommu_table_group_mem_t *mm_iommu_lookup(struct mm_struct *mm,
 }
 EXPORT_SYMBOL_GPL(mm_iommu_lookup);
 
+struct mm_iommu_table_group_mem_t *mm_iommu_lookup_rm(struct mm_struct *mm,
+               unsigned long ua, unsigned long size)
+{
+       struct mm_iommu_table_group_mem_t *mem, *ret = NULL;
+
+       list_for_each_entry_lockless(mem, &mm->context.iommu_group_mem_list,
+                       next) {
+               if ((mem->ua <= ua) &&
+                               (ua + size <= mem->ua +
+                                (mem->entries << PAGE_SHIFT))) {
+                       ret = mem;
+                       break;
+               }
+       }
+
+       return ret;
+}
+EXPORT_SYMBOL_GPL(mm_iommu_lookup_rm);
+
 struct mm_iommu_table_group_mem_t *mm_iommu_find(struct mm_struct *mm,
                unsigned long ua, unsigned long entries)
 {
@@ -345,6 +364,26 @@ long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem,
 }
 EXPORT_SYMBOL_GPL(mm_iommu_ua_to_hpa);
 
+long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem,
+               unsigned long ua, unsigned long *hpa)
+{
+       const long entry = (ua - mem->ua) >> PAGE_SHIFT;
+       void *va = &mem->hpas[entry];
+       unsigned long *pa;
+
+       if (entry >= mem->entries)
+               return -EFAULT;
+
+       pa = (void *) vmalloc_to_phys(va);
+       if (!pa)
+               return -EFAULT;
+
+       *hpa = *pa | (ua & ~PAGE_MASK);
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(mm_iommu_ua_to_hpa_rm);
+
 long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem)
 {
        if (atomic64_inc_not_zero(&mem->mapped))
index e36738291c320575523422e139d4642e04142bd5..ee4cdb5b893f2acd0a20c953f0da0e4283889fa3 100644 (file)
@@ -1424,8 +1424,7 @@ static void pnv_pci_ioda2_release_dma_pe(struct pci_dev *dev, struct pnv_ioda_pe
                iommu_group_put(pe->table_group.group);
                BUG_ON(pe->table_group.group);
        }
-       pnv_pci_ioda2_table_free_pages(tbl);
-       iommu_free_table(tbl, of_node_full_name(dev->dev.of_node));
+       iommu_tce_table_put(tbl);
 }
 
 static void pnv_ioda_release_vf_PE(struct pci_dev *pdev)
@@ -1860,6 +1859,17 @@ static int pnv_ioda1_tce_xchg(struct iommu_table *tbl, long index,
 
        return ret;
 }
+
+static int pnv_ioda1_tce_xchg_rm(struct iommu_table *tbl, long index,
+               unsigned long *hpa, enum dma_data_direction *direction)
+{
+       long ret = pnv_tce_xchg(tbl, index, hpa, direction);
+
+       if (!ret)
+               pnv_pci_p7ioc_tce_invalidate(tbl, index, 1, true);
+
+       return ret;
+}
 #endif
 
 static void pnv_ioda1_tce_free(struct iommu_table *tbl, long index,
@@ -1874,6 +1884,7 @@ static struct iommu_table_ops pnv_ioda1_iommu_ops = {
        .set = pnv_ioda1_tce_build,
 #ifdef CONFIG_IOMMU_API
        .exchange = pnv_ioda1_tce_xchg,
+       .exchange_rm = pnv_ioda1_tce_xchg_rm,
 #endif
        .clear = pnv_ioda1_tce_free,
        .get = pnv_tce_get,
@@ -1948,7 +1959,7 @@ static void pnv_pci_ioda2_tce_invalidate(struct iommu_table *tbl,
 {
        struct iommu_table_group_link *tgl;
 
-       list_for_each_entry_rcu(tgl, &tbl->it_group_list, next) {
+       list_for_each_entry_lockless(tgl, &tbl->it_group_list, next) {
                struct pnv_ioda_pe *pe = container_of(tgl->table_group,
                                struct pnv_ioda_pe, table_group);
                struct pnv_phb *phb = pe->phb;
@@ -2004,6 +2015,17 @@ static int pnv_ioda2_tce_xchg(struct iommu_table *tbl, long index,
 
        return ret;
 }
+
+static int pnv_ioda2_tce_xchg_rm(struct iommu_table *tbl, long index,
+               unsigned long *hpa, enum dma_data_direction *direction)
+{
+       long ret = pnv_tce_xchg(tbl, index, hpa, direction);
+
+       if (!ret)
+               pnv_pci_ioda2_tce_invalidate(tbl, index, 1, true);
+
+       return ret;
+}
 #endif
 
 static void pnv_ioda2_tce_free(struct iommu_table *tbl, long index,
@@ -2017,13 +2039,13 @@ static void pnv_ioda2_tce_free(struct iommu_table *tbl, long index,
 static void pnv_ioda2_table_free(struct iommu_table *tbl)
 {
        pnv_pci_ioda2_table_free_pages(tbl);
-       iommu_free_table(tbl, "pnv");
 }
 
 static struct iommu_table_ops pnv_ioda2_iommu_ops = {
        .set = pnv_ioda2_tce_build,
 #ifdef CONFIG_IOMMU_API
        .exchange = pnv_ioda2_tce_xchg,
+       .exchange_rm = pnv_ioda2_tce_xchg_rm,
 #endif
        .clear = pnv_ioda2_tce_free,
        .get = pnv_tce_get,
@@ -2203,7 +2225,7 @@ found:
                __free_pages(tce_mem, get_order(tce32_segsz * segs));
        if (tbl) {
                pnv_pci_unlink_table_and_group(tbl, &pe->table_group);
-               iommu_free_table(tbl, "pnv");
+               iommu_tce_table_put(tbl);
        }
 }
 
@@ -2293,16 +2315,16 @@ static long pnv_pci_ioda2_create_table(struct iommu_table_group *table_group,
        if (!tbl)
                return -ENOMEM;
 
+       tbl->it_ops = &pnv_ioda2_iommu_ops;
+
        ret = pnv_pci_ioda2_table_alloc_pages(nid,
                        bus_offset, page_shift, window_size,
                        levels, tbl);
        if (ret) {
-               iommu_free_table(tbl, "pnv");
+               iommu_tce_table_put(tbl);
                return ret;
        }
 
-       tbl->it_ops = &pnv_ioda2_iommu_ops;
-
        *ptbl = tbl;
 
        return 0;
@@ -2343,7 +2365,7 @@ static long pnv_pci_ioda2_setup_default_config(struct pnv_ioda_pe *pe)
        if (rc) {
                pe_err(pe, "Failed to configure 32-bit TCE table, err %ld\n",
                                rc);
-               pnv_ioda2_table_free(tbl);
+               iommu_tce_table_put(tbl);
                return rc;
        }
 
@@ -2431,7 +2453,7 @@ static void pnv_ioda2_take_ownership(struct iommu_table_group *table_group)
        pnv_pci_ioda2_unset_window(&pe->table_group, 0);
        if (pe->pbus)
                pnv_ioda_setup_bus_dma(pe, pe->pbus, false);
-       pnv_ioda2_table_free(tbl);
+       iommu_tce_table_put(tbl);
 }
 
 static void pnv_ioda2_release_ownership(struct iommu_table_group *table_group)
@@ -3406,7 +3428,7 @@ static void pnv_pci_ioda1_release_pe_dma(struct pnv_ioda_pe *pe)
        }
 
        free_pages(tbl->it_base, get_order(tbl->it_size << 3));
-       iommu_free_table(tbl, "pnv");
+       iommu_tce_table_put(tbl);
 }
 
 static void pnv_pci_ioda2_release_pe_dma(struct pnv_ioda_pe *pe)
@@ -3433,7 +3455,7 @@ static void pnv_pci_ioda2_release_pe_dma(struct pnv_ioda_pe *pe)
        }
 
        pnv_pci_ioda2_table_free_pages(tbl);
-       iommu_free_table(tbl, "pnv");
+       iommu_tce_table_put(tbl);
 }
 
 static void pnv_ioda_free_pe_seg(struct pnv_ioda_pe *pe,
index eb835e977e33a046a9f99b6960dfd03ba46298b9..204a829ff506e6a688d3c530ef90029a8e91659b 100644 (file)
@@ -767,6 +767,7 @@ struct iommu_table *pnv_pci_table_alloc(int nid)
 
        tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, nid);
        INIT_LIST_HEAD_RCU(&tbl->it_group_list);
+       kref_init(&tbl->it_kref);
 
        return tbl;
 }
index 4d757eaa46bf70cf2d1b757e6abe987fa7aa5647..7ce5db209abfb107e9584850afd753f9b6b2cf23 100644 (file)
@@ -74,6 +74,7 @@ static struct iommu_table_group *iommu_pseries_alloc_group(int node)
                goto fail_exit;
 
        INIT_LIST_HEAD_RCU(&tbl->it_group_list);
+       kref_init(&tbl->it_kref);
        tgl->table_group = table_group;
        list_add_rcu(&tgl->next, &tbl->it_group_list);
 
@@ -115,7 +116,7 @@ static void iommu_pseries_free_group(struct iommu_table_group *table_group,
                BUG_ON(table_group->group);
        }
 #endif
-       iommu_free_table(tbl, node_name);
+       iommu_tce_table_put(tbl);
 
        kfree(table_group);
 }
index 7204939324863566ef98ea6d387248a362cd1a87..28b09fd797ec649a468599d32ce126b7c0925d6d 100644 (file)
@@ -1318,7 +1318,7 @@ static void vio_dev_release(struct device *dev)
        struct iommu_table *tbl = get_iommu_table_base(dev);
 
        if (tbl)
-               iommu_free_table(tbl, of_node_full_name(dev->of_node));
+               iommu_tce_table_put(tbl);
        of_node_put(dev->of_node);
        kfree(to_vio_dev(dev));
 }
index de5d572225f3adb4c1cdab6e30c2dc3383ae02c1..cd1fa97776c302e03aa60267624e5d8bb56c2092 100644 (file)
@@ -302,8 +302,8 @@ extern int ignore_sigio_fd(int fd);
 extern void maybe_sigio_broken(int fd, int read);
 extern void sigio_broken(int fd, int read);
 
-/* sys-x86_64/prctl.c */
-extern int os_arch_prctl(int pid, int code, unsigned long *addr);
+/* prctl.c */
+extern int os_arch_prctl(int pid, int option, unsigned long *arg2);
 
 /* tty.c */
 extern int get_pty(void);
index 9ba050fe47f30e6eff1d119ed33fa1505b2a661d..0af59fa789ea6fd250f79125cad3aef01468a39b 100644 (file)
 381    i386    pkey_alloc              sys_pkey_alloc
 382    i386    pkey_free               sys_pkey_free
 383    i386    statx                   sys_statx
+384    i386    arch_prctl              sys_arch_prctl                  compat_sys_arch_prctl
index b04bb6dfed7f8464c1425df50c0fa9d1481dcee2..0fe00446f9cac8c16e1ae3fcabc4a469b772597b 100644 (file)
  * Reuse free bits when adding new feature flags!
  */
 #define X86_FEATURE_RING3MWAIT ( 7*32+ 0) /* Ring 3 MONITOR/MWAIT */
+#define X86_FEATURE_CPUID_FAULT ( 7*32+ 1) /* Intel CPUID faulting */
 #define X86_FEATURE_CPB                ( 7*32+ 2) /* AMD Core Performance Boost */
 #define X86_FEATURE_EPB                ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */
 #define X86_FEATURE_CAT_L3     ( 7*32+ 4) /* Cache Allocation Technology L3 */
index d962fa998a6fc523e0e9ed5c90aad9d452c6601d..f5c942edbc86c050ca595aa450b3a254c15f2233 100644 (file)
@@ -611,6 +611,8 @@ struct kvm_vcpu_arch {
        unsigned long dr7;
        unsigned long eff_db[KVM_NR_DB_REGS];
        unsigned long guest_debug_dr7;
+       u64 msr_platform_info;
+       u64 msr_misc_features_enables;
 
        u64 mcg_cap;
        u64 mcg_status;
@@ -726,6 +728,7 @@ struct kvm_hv {
 
 enum kvm_irqchip_mode {
        KVM_IRQCHIP_NONE,
+       KVM_IRQCHIP_INIT_IN_PROGRESS, /* temporarily set during creation */
        KVM_IRQCHIP_KERNEL,       /* created with KVM_CREATE_IRQCHIP */
        KVM_IRQCHIP_SPLIT,        /* created with KVM_CAP_SPLIT_IRQCHIP */
 };
index d8b5f8ab8ef9e79fb76586bc3ca0f7f27325123e..673f9ac50f6d12612612e8efcce4eab0ef98bcbb 100644 (file)
@@ -45,6 +45,8 @@
 #define MSR_IA32_PERFCTR1              0x000000c2
 #define MSR_FSB_FREQ                   0x000000cd
 #define MSR_PLATFORM_INFO              0x000000ce
+#define MSR_PLATFORM_INFO_CPUID_FAULT_BIT      31
+#define MSR_PLATFORM_INFO_CPUID_FAULT          BIT_ULL(MSR_PLATFORM_INFO_CPUID_FAULT_BIT)
 
 #define MSR_PKG_CST_CONFIG_CONTROL     0x000000e2
 #define NHM_C3_AUTO_DEMOTE             (1UL << 25)
 
 /* DEBUGCTLMSR bits (others vary by model): */
 #define DEBUGCTLMSR_LBR                        (1UL <<  0) /* last branch recording */
+#define DEBUGCTLMSR_BTF_SHIFT          1
 #define DEBUGCTLMSR_BTF                        (1UL <<  1) /* single-step on branches */
 #define DEBUGCTLMSR_TR                 (1UL <<  6)
 #define DEBUGCTLMSR_BTS                        (1UL <<  7)
 #define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE_BIT       39
 #define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE           (1ULL << MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE_BIT)
 
-/* MISC_FEATURE_ENABLES non-architectural features */
-#define MSR_MISC_FEATURE_ENABLES       0x00000140
+/* MISC_FEATURES_ENABLES non-architectural features */
+#define MSR_MISC_FEATURES_ENABLES      0x00000140
 
-#define MSR_MISC_FEATURE_ENABLES_RING3MWAIT_BIT                1
+#define MSR_MISC_FEATURES_ENABLES_CPUID_FAULT_BIT      0
+#define MSR_MISC_FEATURES_ENABLES_CPUID_FAULT          BIT_ULL(MSR_MISC_FEATURES_ENABLES_CPUID_FAULT_BIT)
+#define MSR_MISC_FEATURES_ENABLES_RING3MWAIT_BIT       1
 
 #define MSR_IA32_TSC_DEADLINE          0x000006E0
 
index f385eca5407a0f47770564506bd918ac686102c4..a80c1b3997ed00047c93af99a68ef0b10aedb2e7 100644 (file)
@@ -884,6 +884,8 @@ extern void start_thread(struct pt_regs *regs, unsigned long new_ip,
 extern int get_tsc_mode(unsigned long adr);
 extern int set_tsc_mode(unsigned int val);
 
+DECLARE_PER_CPU(u64, msr_misc_features_shadow);
+
 /* Register/unregister a process' MPX related resource */
 #define MPX_ENABLE_MANAGEMENT()        mpx_enable_management()
 #define MPX_DISABLE_MANAGEMENT()       mpx_disable_management()
index 9b9b30b1944187c8c3de3ed5c4ef1d82ea7c422d..8d3964fc5f915c37ec7bf74706446adc8ad2e93b 100644 (file)
@@ -9,6 +9,7 @@ void syscall_init(void);
 
 #ifdef CONFIG_X86_64
 void entry_SYSCALL_64(void);
+long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2);
 #endif
 
 #ifdef CONFIG_X86_32
@@ -30,6 +31,7 @@ void x86_report_nx(void);
 
 extern int reboot_force;
 
-long do_arch_prctl(struct task_struct *task, int code, unsigned long addr);
+long do_arch_prctl_common(struct task_struct *task, int option,
+                         unsigned long cpuid_enabled);
 
 #endif /* _ASM_X86_PROTO_H */
index ad6f5eb07a95bd221fe4e13c8cdb3af0cd27aa37..9fc44b95f7cb1097f3c5b55d6f40a7e89f8f9175 100644 (file)
@@ -87,6 +87,7 @@ struct thread_info {
 #define TIF_SECCOMP            8       /* secure computing */
 #define TIF_USER_RETURN_NOTIFY 11      /* notify kernel of userspace return */
 #define TIF_UPROBE             12      /* breakpointed or singlestepping */
+#define TIF_NOCPUID            15      /* CPUID is not accessible in userland */
 #define TIF_NOTSC              16      /* TSC is not accessible in userland */
 #define TIF_IA32               17      /* IA32 compatibility process */
 #define TIF_NOHZ               19      /* in adaptive nohz mode */
@@ -110,6 +111,7 @@ struct thread_info {
 #define _TIF_SECCOMP           (1 << TIF_SECCOMP)
 #define _TIF_USER_RETURN_NOTIFY        (1 << TIF_USER_RETURN_NOTIFY)
 #define _TIF_UPROBE            (1 << TIF_UPROBE)
+#define _TIF_NOCPUID           (1 << TIF_NOCPUID)
 #define _TIF_NOTSC             (1 << TIF_NOTSC)
 #define _TIF_IA32              (1 << TIF_IA32)
 #define _TIF_NOHZ              (1 << TIF_NOHZ)
@@ -138,7 +140,7 @@ struct thread_info {
 
 /* flags to check in __switch_to() */
 #define _TIF_WORK_CTXSW                                                        \
-       (_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP)
+       (_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP)
 
 #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY)
 #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW)
@@ -239,6 +241,8 @@ static inline int arch_within_stack_frames(const void * const stack,
 extern void arch_task_cache_init(void);
 extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
 extern void arch_release_task_struct(struct task_struct *tsk);
+extern void arch_setup_new_exec(void);
+#define arch_setup_new_exec arch_setup_new_exec
 #endif /* !__ASSEMBLY__ */
 
 #endif /* _ASM_X86_THREAD_INFO_H */
index fc5abff9b7fd63d6b3a01a18061be8b3f752d109..75d002bdb3f35bcc12c06ec4acc213bc49241ae9 100644 (file)
@@ -110,6 +110,16 @@ static inline void cr4_clear_bits(unsigned long mask)
        }
 }
 
+static inline void cr4_toggle_bits(unsigned long mask)
+{
+       unsigned long cr4;
+
+       cr4 = this_cpu_read(cpu_tlbstate.cr4);
+       cr4 ^= mask;
+       this_cpu_write(cpu_tlbstate.cr4, cr4);
+       __write_cr4(cr4);
+}
+
 /* Read the CR4 shadow. */
 static inline unsigned long cr4_read_shadow(void)
 {
index 835aa51c7f6ebb914592752373f55287ca8cc235..c4576551709216cd51c17e42af20c36a19cd5e7a 100644 (file)
@@ -1,10 +1,13 @@
 #ifndef _ASM_X86_PRCTL_H
 #define _ASM_X86_PRCTL_H
 
-#define ARCH_SET_GS 0x1001
-#define ARCH_SET_FS 0x1002
-#define ARCH_GET_FS 0x1003
-#define ARCH_GET_GS 0x1004
+#define ARCH_SET_GS            0x1001
+#define ARCH_SET_FS            0x1002
+#define ARCH_GET_FS            0x1003
+#define ARCH_GET_GS            0x1004
+
+#define ARCH_GET_CPUID         0x1011
+#define ARCH_SET_CPUID         0x1012
 
 #define ARCH_MAP_VDSO_X32      0x2001
 #define ARCH_MAP_VDSO_32       0x2002
index 063197771b8d7ba08f2eafe474cacb0efe9e79d3..dfa90a3a5145d784dafdcd201243d2bcde537897 100644 (file)
@@ -90,16 +90,12 @@ static void probe_xeon_phi_r3mwait(struct cpuinfo_x86 *c)
                return;
        }
 
-       if (ring3mwait_disabled) {
-               msr_clear_bit(MSR_MISC_FEATURE_ENABLES,
-                             MSR_MISC_FEATURE_ENABLES_RING3MWAIT_BIT);
+       if (ring3mwait_disabled)
                return;
-       }
-
-       msr_set_bit(MSR_MISC_FEATURE_ENABLES,
-                   MSR_MISC_FEATURE_ENABLES_RING3MWAIT_BIT);
 
        set_cpu_cap(c, X86_FEATURE_RING3MWAIT);
+       this_cpu_or(msr_misc_features_shadow,
+                   1UL << MSR_MISC_FEATURES_ENABLES_RING3MWAIT_BIT);
 
        if (c == &boot_cpu_data)
                ELF_HWCAP2 |= HWCAP2_RING3MWAIT;
@@ -488,6 +484,34 @@ static void intel_bsp_resume(struct cpuinfo_x86 *c)
        init_intel_energy_perf(c);
 }
 
+static void init_cpuid_fault(struct cpuinfo_x86 *c)
+{
+       u64 msr;
+
+       if (!rdmsrl_safe(MSR_PLATFORM_INFO, &msr)) {
+               if (msr & MSR_PLATFORM_INFO_CPUID_FAULT)
+                       set_cpu_cap(c, X86_FEATURE_CPUID_FAULT);
+       }
+}
+
+static void init_intel_misc_features(struct cpuinfo_x86 *c)
+{
+       u64 msr;
+
+       if (rdmsrl_safe(MSR_MISC_FEATURES_ENABLES, &msr))
+               return;
+
+       /* Clear all MISC features */
+       this_cpu_write(msr_misc_features_shadow, 0);
+
+       /* Check features and update capabilities and shadow control bits */
+       init_cpuid_fault(c);
+       probe_xeon_phi_r3mwait(c);
+
+       msr = this_cpu_read(msr_misc_features_shadow);
+       wrmsrl(MSR_MISC_FEATURES_ENABLES, msr);
+}
+
 static void init_intel(struct cpuinfo_x86 *c)
 {
        unsigned int l2 = 0;
@@ -602,7 +626,7 @@ static void init_intel(struct cpuinfo_x86 *c)
 
        init_intel_energy_perf(c);
 
-       probe_xeon_phi_r3mwait(c);
+       init_intel_misc_features(c);
 }
 
 #ifdef CONFIG_X86_32
index 14f65a5f938e4f829de60e868e7ae8ec4954a6d3..da5c0978998488c612b1495a3659742b57b55071 100644 (file)
@@ -396,9 +396,9 @@ static u64 kvm_steal_clock(int cpu)
        src = &per_cpu(steal_time, cpu);
        do {
                version = src->version;
-               rmb();
+               virt_rmb();
                steal = src->steal;
-               rmb();
+               virt_rmb();
        } while ((version & 1) || (version != src->version));
 
        return steal;
index f675915617110fa4cae6c74efc35ba8ccd12eb46..0bb88428cbf2697c89a60311051cc5351ea55fde 100644 (file)
@@ -37,6 +37,7 @@
 #include <asm/vm86.h>
 #include <asm/switch_to.h>
 #include <asm/desc.h>
+#include <asm/prctl.h>
 
 /*
  * per-CPU TSS segments. Threads are completely 'soft' on Linux,
@@ -124,11 +125,6 @@ void flush_thread(void)
        fpu__clear(&tsk->thread.fpu);
 }
 
-static void hard_disable_TSC(void)
-{
-       cr4_set_bits(X86_CR4_TSD);
-}
-
 void disable_TSC(void)
 {
        preempt_disable();
@@ -137,15 +133,10 @@ void disable_TSC(void)
                 * Must flip the CPU state synchronously with
                 * TIF_NOTSC in the current running context.
                 */
-               hard_disable_TSC();
+               cr4_set_bits(X86_CR4_TSD);
        preempt_enable();
 }
 
-static void hard_enable_TSC(void)
-{
-       cr4_clear_bits(X86_CR4_TSD);
-}
-
 static void enable_TSC(void)
 {
        preempt_disable();
@@ -154,7 +145,7 @@ static void enable_TSC(void)
                 * Must flip the CPU state synchronously with
                 * TIF_NOTSC in the current running context.
                 */
-               hard_enable_TSC();
+               cr4_clear_bits(X86_CR4_TSD);
        preempt_enable();
 }
 
@@ -182,54 +173,129 @@ int set_tsc_mode(unsigned int val)
        return 0;
 }
 
-void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
-                     struct tss_struct *tss)
-{
-       struct thread_struct *prev, *next;
-
-       prev = &prev_p->thread;
-       next = &next_p->thread;
+DEFINE_PER_CPU(u64, msr_misc_features_shadow);
 
-       if (test_tsk_thread_flag(prev_p, TIF_BLOCKSTEP) ^
-           test_tsk_thread_flag(next_p, TIF_BLOCKSTEP)) {
-               unsigned long debugctl = get_debugctlmsr();
+static void set_cpuid_faulting(bool on)
+{
+       u64 msrval;
 
-               debugctl &= ~DEBUGCTLMSR_BTF;
-               if (test_tsk_thread_flag(next_p, TIF_BLOCKSTEP))
-                       debugctl |= DEBUGCTLMSR_BTF;
+       msrval = this_cpu_read(msr_misc_features_shadow);
+       msrval &= ~MSR_MISC_FEATURES_ENABLES_CPUID_FAULT;
+       msrval |= (on << MSR_MISC_FEATURES_ENABLES_CPUID_FAULT_BIT);
+       this_cpu_write(msr_misc_features_shadow, msrval);
+       wrmsrl(MSR_MISC_FEATURES_ENABLES, msrval);
+}
 
-               update_debugctlmsr(debugctl);
+static void disable_cpuid(void)
+{
+       preempt_disable();
+       if (!test_and_set_thread_flag(TIF_NOCPUID)) {
+               /*
+                * Must flip the CPU state synchronously with
+                * TIF_NOCPUID in the current running context.
+                */
+               set_cpuid_faulting(true);
        }
+       preempt_enable();
+}
 
-       if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
-           test_tsk_thread_flag(next_p, TIF_NOTSC)) {
-               /* prev and next are different */
-               if (test_tsk_thread_flag(next_p, TIF_NOTSC))
-                       hard_disable_TSC();
-               else
-                       hard_enable_TSC();
+static void enable_cpuid(void)
+{
+       preempt_disable();
+       if (test_and_clear_thread_flag(TIF_NOCPUID)) {
+               /*
+                * Must flip the CPU state synchronously with
+                * TIF_NOCPUID in the current running context.
+                */
+               set_cpuid_faulting(false);
        }
+       preempt_enable();
+}
+
+static int get_cpuid_mode(void)
+{
+       return !test_thread_flag(TIF_NOCPUID);
+}
+
+static int set_cpuid_mode(struct task_struct *task, unsigned long cpuid_enabled)
+{
+       if (!static_cpu_has(X86_FEATURE_CPUID_FAULT))
+               return -ENODEV;
+
+       if (cpuid_enabled)
+               enable_cpuid();
+       else
+               disable_cpuid();
+
+       return 0;
+}
+
+/*
+ * Called immediately after a successful exec.
+ */
+void arch_setup_new_exec(void)
+{
+       /* If cpuid was previously disabled for this task, re-enable it. */
+       if (test_thread_flag(TIF_NOCPUID))
+               enable_cpuid();
+}
 
-       if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
+static inline void switch_to_bitmap(struct tss_struct *tss,
+                                   struct thread_struct *prev,
+                                   struct thread_struct *next,
+                                   unsigned long tifp, unsigned long tifn)
+{
+       if (tifn & _TIF_IO_BITMAP) {
                /*
                 * Copy the relevant range of the IO bitmap.
                 * Normally this is 128 bytes or less:
                 */
                memcpy(tss->io_bitmap, next->io_bitmap_ptr,
                       max(prev->io_bitmap_max, next->io_bitmap_max));
-
                /*
                 * Make sure that the TSS limit is correct for the CPU
                 * to notice the IO bitmap.
                 */
                refresh_tss_limit();
-       } else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) {
+       } else if (tifp & _TIF_IO_BITMAP) {
                /*
                 * Clear any possible leftover bits:
                 */
                memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
        }
+}
+
+void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
+                     struct tss_struct *tss)
+{
+       struct thread_struct *prev, *next;
+       unsigned long tifp, tifn;
+
+       prev = &prev_p->thread;
+       next = &next_p->thread;
+
+       tifn = READ_ONCE(task_thread_info(next_p)->flags);
+       tifp = READ_ONCE(task_thread_info(prev_p)->flags);
+       switch_to_bitmap(tss, prev, next, tifp, tifn);
+
        propagate_user_return_notify(prev_p, next_p);
+
+       if ((tifp & _TIF_BLOCKSTEP || tifn & _TIF_BLOCKSTEP) &&
+           arch_has_block_step()) {
+               unsigned long debugctl, msk;
+
+               rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
+               debugctl &= ~DEBUGCTLMSR_BTF;
+               msk = tifn & _TIF_BLOCKSTEP;
+               debugctl |= (msk >> TIF_BLOCKSTEP) << DEBUGCTLMSR_BTF_SHIFT;
+               wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
+       }
+
+       if ((tifp ^ tifn) & _TIF_NOTSC)
+               cr4_toggle_bits(X86_CR4_TSD);
+
+       if ((tifp ^ tifn) & _TIF_NOCPUID)
+               set_cpuid_faulting(!!(tifn & _TIF_NOCPUID));
 }
 
 /*
@@ -550,3 +616,16 @@ out:
        put_task_stack(p);
        return ret;
 }
+
+long do_arch_prctl_common(struct task_struct *task, int option,
+                         unsigned long cpuid_enabled)
+{
+       switch (option) {
+       case ARCH_GET_CPUID:
+               return get_cpuid_mode();
+       case ARCH_SET_CPUID:
+               return set_cpuid_mode(task, cpuid_enabled);
+       }
+
+       return -EINVAL;
+}
index 4c818f8bc1352b46263b63abd111de3910e9811a..ff40e74c9181f0e009b51909a0e76ce25c1c2cf3 100644 (file)
@@ -37,6 +37,7 @@
 #include <linux/uaccess.h>
 #include <linux/io.h>
 #include <linux/kdebug.h>
+#include <linux/syscalls.h>
 
 #include <asm/pgtable.h>
 #include <asm/ldt.h>
@@ -56,6 +57,7 @@
 #include <asm/switch_to.h>
 #include <asm/vm86.h>
 #include <asm/intel_rdt.h>
+#include <asm/proto.h>
 
 void __show_regs(struct pt_regs *regs, int all)
 {
@@ -304,3 +306,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 
        return prev_p;
 }
+
+SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2)
+{
+       return do_arch_prctl_common(current, option, arg2);
+}
index d6b784a5520daf2938cd228daa2bf6bde74c421d..ea1a6180bf3999eed7e2aa27f44500cfb1dfa59f 100644 (file)
@@ -37,6 +37,7 @@
 #include <linux/uaccess.h>
 #include <linux/io.h>
 #include <linux/ftrace.h>
+#include <linux/syscalls.h>
 
 #include <asm/pgtable.h>
 #include <asm/processor.h>
@@ -204,7 +205,7 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
                                (struct user_desc __user *)tls, 0);
                else
 #endif
-                       err = do_arch_prctl(p, ARCH_SET_FS, tls);
+                       err = do_arch_prctl_64(p, ARCH_SET_FS, tls);
                if (err)
                        goto out;
        }
@@ -547,70 +548,72 @@ static long prctl_map_vdso(const struct vdso_image *image, unsigned long addr)
 }
 #endif
 
-long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
+long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2)
 {
        int ret = 0;
        int doit = task == current;
        int cpu;
 
-       switch (code) {
+       switch (option) {
        case ARCH_SET_GS:
-               if (addr >= TASK_SIZE_MAX)
+               if (arg2 >= TASK_SIZE_MAX)
                        return -EPERM;
                cpu = get_cpu();
                task->thread.gsindex = 0;
-               task->thread.gsbase = addr;
+               task->thread.gsbase = arg2;
                if (doit) {
                        load_gs_index(0);
-                       ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, addr);
+                       ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, arg2);
                }
                put_cpu();
                break;
        case ARCH_SET_FS:
                /* Not strictly needed for fs, but do it for symmetry
                   with gs */
-               if (addr >= TASK_SIZE_MAX)
+               if (arg2 >= TASK_SIZE_MAX)
                        return -EPERM;
                cpu = get_cpu();
                task->thread.fsindex = 0;
-               task->thread.fsbase = addr;
+               task->thread.fsbase = arg2;
                if (doit) {
                        /* set the selector to 0 to not confuse __switch_to */
                        loadsegment(fs, 0);
-                       ret = wrmsrl_safe(MSR_FS_BASE, addr);
+                       ret = wrmsrl_safe(MSR_FS_BASE, arg2);
                }
                put_cpu();
                break;
        case ARCH_GET_FS: {
                unsigned long base;
+
                if (doit)
                        rdmsrl(MSR_FS_BASE, base);
                else
                        base = task->thread.fsbase;
-               ret = put_user(base, (unsigned long __user *)addr);
+               ret = put_user(base, (unsigned long __user *)arg2);
                break;
        }
        case ARCH_GET_GS: {
                unsigned long base;
+
                if (doit)
                        rdmsrl(MSR_KERNEL_GS_BASE, base);
                else
                        base = task->thread.gsbase;
-               ret = put_user(base, (unsigned long __user *)addr);
+               ret = put_user(base, (unsigned long __user *)arg2);
                break;
        }
 
 #ifdef CONFIG_CHECKPOINT_RESTORE
 # ifdef CONFIG_X86_X32_ABI
        case ARCH_MAP_VDSO_X32:
-               return prctl_map_vdso(&vdso_image_x32, addr);
+               return prctl_map_vdso(&vdso_image_x32, arg2);
 # endif
 # if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
        case ARCH_MAP_VDSO_32:
-               return prctl_map_vdso(&vdso_image_32, addr);
+               return prctl_map_vdso(&vdso_image_32, arg2);
 # endif
        case ARCH_MAP_VDSO_64:
-               return prctl_map_vdso(&vdso_image_64, addr);
+               return prctl_map_vdso(&vdso_image_64, arg2);
 #endif
 
        default:
@@ -621,10 +624,23 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
        return ret;
 }
 
-long sys_arch_prctl(int code, unsigned long addr)
+SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2)
+{
+       long ret;
+
+       ret = do_arch_prctl_64(current, option, arg2);
+       if (ret == -EINVAL)
+               ret = do_arch_prctl_common(current, option, arg2);
+
+       return ret;
+}
+
+#ifdef CONFIG_IA32_EMULATION
+COMPAT_SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2)
 {
-       return do_arch_prctl(current, code, addr);
+       return do_arch_prctl_common(current, option, arg2);
 }
+#endif
 
 unsigned long KSTK_ESP(struct task_struct *task)
 {
index 2364b23ea3e52c3f5f9901a66574337ae2e23a3f..f37d18124648fb9591779e50c878c4ffd51c51b1 100644 (file)
@@ -396,12 +396,12 @@ static int putreg(struct task_struct *child,
                if (value >= TASK_SIZE_MAX)
                        return -EIO;
                /*
-                * When changing the segment base, use do_arch_prctl
+                * When changing the segment base, use do_arch_prctl_64
                 * to set either thread.fs or thread.fsindex and the
                 * corresponding GDT slot.
                 */
                if (child->thread.fsbase != value)
-                       return do_arch_prctl(child, ARCH_SET_FS, value);
+                       return do_arch_prctl_64(child, ARCH_SET_FS, value);
                return 0;
        case offsetof(struct user_regs_struct,gs_base):
                /*
@@ -410,7 +410,7 @@ static int putreg(struct task_struct *child,
                if (value >= TASK_SIZE_MAX)
                        return -EIO;
                if (child->thread.gsbase != value)
-                       return do_arch_prctl(child, ARCH_SET_GS, value);
+                       return do_arch_prctl_64(child, ARCH_SET_GS, value);
                return 0;
 #endif
        }
@@ -869,7 +869,7 @@ long arch_ptrace(struct task_struct *child, long request,
                   Works just like arch_prctl, except that the arguments
                   are reversed. */
        case PTRACE_ARCH_PRCTL:
-               ret = do_arch_prctl(child, data, addr);
+               ret = do_arch_prctl_64(child, data, addr);
                break;
 #endif
 
index efde6cc50875184bd51e86ba08116a6095dd070b..a181ae76c71ce102f88f6a4ee9cc3233677c55e2 100644 (file)
@@ -876,6 +876,9 @@ int kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
 {
        u32 eax, ebx, ecx, edx;
 
+       if (cpuid_fault_enabled(vcpu) && !kvm_require_cpl(vcpu, 0))
+               return 1;
+
        eax = kvm_register_read(vcpu, VCPU_REGS_RAX);
        ecx = kvm_register_read(vcpu, VCPU_REGS_RCX);
        kvm_cpuid(vcpu, &eax, &ebx, &ecx, &edx);
index 35058c2c0eeabe0fd9dedd45999478d5cb61fabb..a6fd40aade7cbf9e798591382c2382267d3141fc 100644 (file)
@@ -205,4 +205,15 @@ static inline int guest_cpuid_stepping(struct kvm_vcpu *vcpu)
        return x86_stepping(best->eax);
 }
 
+static inline bool supports_cpuid_fault(struct kvm_vcpu *vcpu)
+{
+       return vcpu->arch.msr_platform_info & MSR_PLATFORM_INFO_CPUID_FAULT;
+}
+
+static inline bool cpuid_fault_enabled(struct kvm_vcpu *vcpu)
+{
+       return vcpu->arch.msr_misc_features_enables &
+                 MSR_MISC_FEATURES_ENABLES_CPUID_FAULT;
+}
+
 #endif
index 45c7306c8780b23494f22d8a775f1486ea29eafc..6a2ea945d01f124c0ecd254bf29d047908bb8167 100644 (file)
@@ -3854,6 +3854,13 @@ static int em_sti(struct x86_emulate_ctxt *ctxt)
 static int em_cpuid(struct x86_emulate_ctxt *ctxt)
 {
        u32 eax, ebx, ecx, edx;
+       u64 msr = 0;
+
+       ctxt->ops->get_msr(ctxt, MSR_MISC_FEATURES_ENABLES, &msr);
+       if (msr & MSR_MISC_FEATURES_ENABLES_CPUID_FAULT &&
+           ctxt->ops->cpl(ctxt)) {
+               return emulate_gp(ctxt, 0);
+       }
 
        eax = reg_read(ctxt, VCPU_REGS_RAX);
        ecx = reg_read(ctxt, VCPU_REGS_RCX);
index 047b17a26269610b9cc083899cafaa6ca236eb5b..bdcd4139eca9233bbd9e82615a1ed3c45c2ad060 100644 (file)
@@ -49,7 +49,7 @@ static void pic_unlock(struct kvm_pic *s)
        __releases(&s->lock)
 {
        bool wakeup = s->wakeup_needed;
-       struct kvm_vcpu *vcpu, *found = NULL;
+       struct kvm_vcpu *vcpu;
        int i;
 
        s->wakeup_needed = false;
@@ -59,16 +59,11 @@ static void pic_unlock(struct kvm_pic *s)
        if (wakeup) {
                kvm_for_each_vcpu(i, vcpu, s->kvm) {
                        if (kvm_apic_accept_pic_intr(vcpu)) {
-                               found = vcpu;
-                               break;
+                               kvm_make_request(KVM_REQ_EVENT, vcpu);
+                               kvm_vcpu_kick(vcpu);
+                               return;
                        }
                }
-
-               if (!found)
-                       return;
-
-               kvm_make_request(KVM_REQ_EVENT, found);
-               kvm_vcpu_kick(found);
        }
 }
 
@@ -239,7 +234,7 @@ static inline void pic_intack(struct kvm_kpic_state *s, int irq)
 int kvm_pic_read_irq(struct kvm *kvm)
 {
        int irq, irq2, intno;
-       struct kvm_pic *s = pic_irqchip(kvm);
+       struct kvm_pic *s = kvm->arch.vpic;
 
        s->output = 0;
 
@@ -273,7 +268,7 @@ int kvm_pic_read_irq(struct kvm *kvm)
        return intno;
 }
 
-void kvm_pic_reset(struct kvm_kpic_state *s)
+static void kvm_pic_reset(struct kvm_kpic_state *s)
 {
        int irq, i;
        struct kvm_vcpu *vcpu;
@@ -422,19 +417,16 @@ static u32 pic_poll_read(struct kvm_kpic_state *s, u32 addr1)
        return ret;
 }
 
-static u32 pic_ioport_read(void *opaque, u32 addr1)
+static u32 pic_ioport_read(void *opaque, u32 addr)
 {
        struct kvm_kpic_state *s = opaque;
-       unsigned int addr;
        int ret;
 
-       addr = addr1;
-       addr &= 1;
        if (s->poll) {
-               ret = pic_poll_read(s, addr1);
+               ret = pic_poll_read(s, addr);
                s->poll = 0;
        } else
-               if (addr == 0)
+               if ((addr & 1) == 0)
                        if (s->read_reg_select)
                                ret = s->isr;
                        else
@@ -456,76 +448,64 @@ static u32 elcr_ioport_read(void *opaque, u32 addr1)
        return s->elcr;
 }
 
-static int picdev_in_range(gpa_t addr)
-{
-       switch (addr) {
-       case 0x20:
-       case 0x21:
-       case 0xa0:
-       case 0xa1:
-       case 0x4d0:
-       case 0x4d1:
-               return 1;
-       default:
-               return 0;
-       }
-}
-
 static int picdev_write(struct kvm_pic *s,
                         gpa_t addr, int len, const void *val)
 {
        unsigned char data = *(unsigned char *)val;
-       if (!picdev_in_range(addr))
-               return -EOPNOTSUPP;
 
        if (len != 1) {
                pr_pic_unimpl("non byte write\n");
                return 0;
        }
-       pic_lock(s);
        switch (addr) {
        case 0x20:
        case 0x21:
        case 0xa0:
        case 0xa1:
+               pic_lock(s);
                pic_ioport_write(&s->pics[addr >> 7], addr, data);
+               pic_unlock(s);
                break;
        case 0x4d0:
        case 0x4d1:
+               pic_lock(s);
                elcr_ioport_write(&s->pics[addr & 1], addr, data);
+               pic_unlock(s);
                break;
+       default:
+               return -EOPNOTSUPP;
        }
-       pic_unlock(s);
        return 0;
 }
 
 static int picdev_read(struct kvm_pic *s,
                       gpa_t addr, int len, void *val)
 {
-       unsigned char data = 0;
-       if (!picdev_in_range(addr))
-               return -EOPNOTSUPP;
+       unsigned char *data = (unsigned char *)val;
 
        if (len != 1) {
                memset(val, 0, len);
                pr_pic_unimpl("non byte read\n");
                return 0;
        }
-       pic_lock(s);
        switch (addr) {
        case 0x20:
        case 0x21:
        case 0xa0:
        case 0xa1:
-               data = pic_ioport_read(&s->pics[addr >> 7], addr);
+               pic_lock(s);
+               *data = pic_ioport_read(&s->pics[addr >> 7], addr);
+               pic_unlock(s);
                break;
        case 0x4d0:
        case 0x4d1:
-               data = elcr_ioport_read(&s->pics[addr & 1], addr);
+               pic_lock(s);
+               *data = elcr_ioport_read(&s->pics[addr & 1], addr);
+               pic_unlock(s);
                break;
+       default:
+               return -EOPNOTSUPP;
        }
-       *(unsigned char *)val = data;
-       pic_unlock(s);
        return 0;
 }
 
@@ -576,7 +556,7 @@ static int picdev_eclr_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
  */
 static void pic_irq_request(struct kvm *kvm, int level)
 {
-       struct kvm_pic *s = pic_irqchip(kvm);
+       struct kvm_pic *s = kvm->arch.vpic;
 
        if (!s->output)
                s->wakeup_needed = true;
@@ -660,9 +640,11 @@ void kvm_pic_destroy(struct kvm *kvm)
        if (!vpic)
                return;
 
+       mutex_lock(&kvm->slots_lock);
        kvm_io_bus_unregister_dev(vpic->kvm, KVM_PIO_BUS, &vpic->dev_master);
        kvm_io_bus_unregister_dev(vpic->kvm, KVM_PIO_BUS, &vpic->dev_slave);
        kvm_io_bus_unregister_dev(vpic->kvm, KVM_PIO_BUS, &vpic->dev_eclr);
+       mutex_unlock(&kvm->slots_lock);
 
        kvm->arch.vpic = NULL;
        kfree(vpic);
index 289270a6aecbb478ea14cc786c72fcfdf5058350..bdff437acbcb7ebc3307523edd848fb7db009c39 100644 (file)
@@ -266,11 +266,9 @@ void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, ulong *ioapic_handled_vectors)
        spin_unlock(&ioapic->lock);
 }
 
-void kvm_vcpu_request_scan_ioapic(struct kvm *kvm)
+void kvm_arch_post_irq_ack_notifier_list_update(struct kvm *kvm)
 {
-       struct kvm_ioapic *ioapic = kvm->arch.vioapic;
-
-       if (!ioapic)
+       if (!ioapic_in_kernel(kvm))
                return;
        kvm_make_scan_ioapic_request(kvm);
 }
@@ -315,7 +313,7 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
                if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG
                    && ioapic->irr & (1 << index))
                        ioapic_service(ioapic, index, false);
-               kvm_vcpu_request_scan_ioapic(ioapic->kvm);
+               kvm_make_scan_ioapic_request(ioapic->kvm);
                break;
        }
 }
@@ -624,10 +622,8 @@ int kvm_ioapic_init(struct kvm *kvm)
        if (ret < 0) {
                kvm->arch.vioapic = NULL;
                kfree(ioapic);
-               return ret;
        }
 
-       kvm_vcpu_request_scan_ioapic(kvm);
        return ret;
 }
 
@@ -639,36 +635,32 @@ void kvm_ioapic_destroy(struct kvm *kvm)
                return;
 
        cancel_delayed_work_sync(&ioapic->eoi_inject);
+       mutex_lock(&kvm->slots_lock);
        kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &ioapic->dev);
+       mutex_unlock(&kvm->slots_lock);
        kvm->arch.vioapic = NULL;
        kfree(ioapic);
 }
 
-int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state)
+void kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state)
 {
-       struct kvm_ioapic *ioapic = ioapic_irqchip(kvm);
-       if (!ioapic)
-               return -EINVAL;
+       struct kvm_ioapic *ioapic = kvm->arch.vioapic;
 
        spin_lock(&ioapic->lock);
        memcpy(state, ioapic, sizeof(struct kvm_ioapic_state));
        state->irr &= ~ioapic->irr_delivered;
        spin_unlock(&ioapic->lock);
-       return 0;
 }
 
-int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state)
+void kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state)
 {
-       struct kvm_ioapic *ioapic = ioapic_irqchip(kvm);
-       if (!ioapic)
-               return -EINVAL;
+       struct kvm_ioapic *ioapic = kvm->arch.vioapic;
 
        spin_lock(&ioapic->lock);
        memcpy(ioapic, state, sizeof(struct kvm_ioapic_state));
        ioapic->irr = 0;
        ioapic->irr_delivered = 0;
-       kvm_vcpu_request_scan_ioapic(kvm);
+       kvm_make_scan_ioapic_request(kvm);
        kvm_ioapic_inject_all(ioapic, state->irr);
        spin_unlock(&ioapic->lock);
-       return 0;
 }
index 1cc6e54436dbaa71e4a68943456b9beaceec00f6..29ce19732ccf8e2f2b22ff91a6991818936d63fd 100644 (file)
@@ -105,17 +105,13 @@ do {                                                                      \
 #define ASSERT(x) do { } while (0)
 #endif
 
-static inline struct kvm_ioapic *ioapic_irqchip(struct kvm *kvm)
-{
-       return kvm->arch.vioapic;
-}
-
 static inline int ioapic_in_kernel(struct kvm *kvm)
 {
-       int ret;
+       int mode = kvm->arch.irqchip_mode;
 
-       ret = (ioapic_irqchip(kvm) != NULL);
-       return ret;
+       /* Matches smp_wmb() when setting irqchip_mode */
+       smp_rmb();
+       return mode == KVM_IRQCHIP_KERNEL;
 }
 
 void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu);
@@ -132,8 +128,8 @@ void kvm_ioapic_clear_all(struct kvm_ioapic *ioapic, int irq_source_id);
 int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
                             struct kvm_lapic_irq *irq,
                             struct dest_map *dest_map);
-int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
-int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
+void kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
+void kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
 void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu,
                           ulong *ioapic_handled_vectors);
 void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu,
index 60d91c9d160c5647e36a9a5253b72c96d468c3eb..5c24811e8b0bcac141dfd1e58dcc052b2fca69d7 100644 (file)
@@ -60,7 +60,7 @@ static int kvm_cpu_has_extint(struct kvm_vcpu *v)
                if (irqchip_split(v->kvm))
                        return pending_userspace_extint(v);
                else
-                       return pic_irqchip(v->kvm)->output;
+                       return v->kvm->arch.vpic->output;
        } else
                return 0;
 }
index 40d5b2cf60611ec871b64990b42a38579c269e8d..0edd22c3344c9e80fc3f8a609f9c102feb628b0a 100644 (file)
@@ -78,40 +78,42 @@ void kvm_pic_destroy(struct kvm *kvm);
 int kvm_pic_read_irq(struct kvm *kvm);
 void kvm_pic_update_irq(struct kvm_pic *s);
 
-static inline struct kvm_pic *pic_irqchip(struct kvm *kvm)
-{
-       return kvm->arch.vpic;
-}
-
 static inline int pic_in_kernel(struct kvm *kvm)
 {
-       int ret;
+       int mode = kvm->arch.irqchip_mode;
 
-       ret = (pic_irqchip(kvm) != NULL);
-       return ret;
+       /* Matches smp_wmb() when setting irqchip_mode */
+       smp_rmb();
+       return mode == KVM_IRQCHIP_KERNEL;
 }
 
 static inline int irqchip_split(struct kvm *kvm)
 {
-       return kvm->arch.irqchip_mode == KVM_IRQCHIP_SPLIT;
+       int mode = kvm->arch.irqchip_mode;
+
+       /* Matches smp_wmb() when setting irqchip_mode */
+       smp_rmb();
+       return mode == KVM_IRQCHIP_SPLIT;
 }
 
 static inline int irqchip_kernel(struct kvm *kvm)
 {
-       return kvm->arch.irqchip_mode == KVM_IRQCHIP_KERNEL;
+       int mode = kvm->arch.irqchip_mode;
+
+       /* Matches smp_wmb() when setting irqchip_mode */
+       smp_rmb();
+       return mode == KVM_IRQCHIP_KERNEL;
 }
 
 static inline int irqchip_in_kernel(struct kvm *kvm)
 {
-       bool ret = kvm->arch.irqchip_mode != KVM_IRQCHIP_NONE;
+       int mode = kvm->arch.irqchip_mode;
 
-       /* Matches with wmb after initializing kvm->irq_routing. */
+       /* Matches smp_wmb() when setting irqchip_mode */
        smp_rmb();
-       return ret;
+       return mode > KVM_IRQCHIP_INIT_IN_PROGRESS;
 }
 
-void kvm_pic_reset(struct kvm_kpic_state *s);
-
 void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu);
 void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu);
 void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu);
index 6825cd36d13b7c2af164ca9c09e51cbf58109040..4517a4c2ac3a1edd791dbf44c66b3b360aadc327 100644 (file)
@@ -42,7 +42,7 @@ static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e,
                           struct kvm *kvm, int irq_source_id, int level,
                           bool line_status)
 {
-       struct kvm_pic *pic = pic_irqchip(kvm);
+       struct kvm_pic *pic = kvm->arch.vpic;
        return kvm_pic_set_irq(pic, e->irqchip.pin, irq_source_id, level);
 }
 
@@ -232,11 +232,11 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id)
                goto unlock;
        }
        clear_bit(irq_source_id, &kvm->arch.irq_sources_bitmap);
-       if (!ioapic_in_kernel(kvm))
+       if (!irqchip_kernel(kvm))
                goto unlock;
 
        kvm_ioapic_clear_all(kvm->arch.vioapic, irq_source_id);
-       kvm_pic_clear_all(pic_irqchip(kvm), irq_source_id);
+       kvm_pic_clear_all(kvm->arch.vpic, irq_source_id);
 unlock:
        mutex_unlock(&kvm->irq_lock);
 }
@@ -278,38 +278,35 @@ int kvm_set_routing_entry(struct kvm *kvm,
                          struct kvm_kernel_irq_routing_entry *e,
                          const struct kvm_irq_routing_entry *ue)
 {
-       int r = -EINVAL;
-       int delta;
-       unsigned max_pin;
+       /* also allow creation of routes during KVM_IRQCHIP_INIT_IN_PROGRESS */
+       if (kvm->arch.irqchip_mode == KVM_IRQCHIP_NONE)
+               return -EINVAL;
 
+       /* Matches smp_wmb() when setting irqchip_mode */
+       smp_rmb();
        switch (ue->type) {
        case KVM_IRQ_ROUTING_IRQCHIP:
-               delta = 0;
+               if (irqchip_split(kvm))
+                       return -EINVAL;
+               e->irqchip.pin = ue->u.irqchip.pin;
                switch (ue->u.irqchip.irqchip) {
                case KVM_IRQCHIP_PIC_SLAVE:
-                       delta = 8;
+                       e->irqchip.pin += PIC_NUM_PINS / 2;
                        /* fall through */
                case KVM_IRQCHIP_PIC_MASTER:
-                       if (!pic_in_kernel(kvm))
-                               goto out;
-
+                       if (ue->u.irqchip.pin >= PIC_NUM_PINS / 2)
+                               return -EINVAL;
                        e->set = kvm_set_pic_irq;
-                       max_pin = PIC_NUM_PINS;
                        break;
                case KVM_IRQCHIP_IOAPIC:
-                       if (!ioapic_in_kernel(kvm))
-                               goto out;
-
-                       max_pin = KVM_IOAPIC_NUM_PINS;
+                       if (ue->u.irqchip.pin >= KVM_IOAPIC_NUM_PINS)
+                               return -EINVAL;
                        e->set = kvm_set_ioapic_irq;
                        break;
                default:
-                       goto out;
+                       return -EINVAL;
                }
                e->irqchip.irqchip = ue->u.irqchip.irqchip;
-               e->irqchip.pin = ue->u.irqchip.pin + delta;
-               if (e->irqchip.pin >= max_pin)
-                       goto out;
                break;
        case KVM_IRQ_ROUTING_MSI:
                e->set = kvm_set_msi;
@@ -318,7 +315,7 @@ int kvm_set_routing_entry(struct kvm *kvm,
                e->msi.data = ue->u.msi.data;
 
                if (kvm_msi_route_invalid(kvm, e))
-                       goto out;
+                       return -EINVAL;
                break;
        case KVM_IRQ_ROUTING_HV_SINT:
                e->set = kvm_hv_set_sint;
@@ -326,12 +323,10 @@ int kvm_set_routing_entry(struct kvm *kvm,
                e->hv_sint.sint = ue->u.hv_sint.sint;
                break;
        default:
-               goto out;
+               return -EINVAL;
        }
 
-       r = 0;
-out:
-       return r;
+       return 0;
 }
 
 bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
index 1b203abf76e1e60a75819be391e44e3cb7e0a28e..c41f03e5090ada662a2fcf3b5dfd8a4f700233d1 100644 (file)
@@ -1198,10 +1198,13 @@ static void init_vmcb(struct vcpu_svm *svm)
        set_intercept(svm, INTERCEPT_CLGI);
        set_intercept(svm, INTERCEPT_SKINIT);
        set_intercept(svm, INTERCEPT_WBINVD);
-       set_intercept(svm, INTERCEPT_MONITOR);
-       set_intercept(svm, INTERCEPT_MWAIT);
        set_intercept(svm, INTERCEPT_XSETBV);
 
+       if (!kvm_mwait_in_guest()) {
+               set_intercept(svm, INTERCEPT_MONITOR);
+               set_intercept(svm, INTERCEPT_MWAIT);
+       }
+
        control->iopm_base_pa = iopm_base;
        control->msrpm_base_pa = __pa(svm->msrpm);
        control->int_ctl = V_INTR_MASKING_MASK;
index cfdb0d9389d1f611050c0cfffa6143d383ac4657..a4ef6371810106bef15aa649badef4ff9d8ac18d 100644 (file)
@@ -84,9 +84,6 @@ module_param_named(eptad, enable_ept_ad_bits, bool, S_IRUGO);
 static bool __read_mostly emulate_invalid_guest_state = true;
 module_param(emulate_invalid_guest_state, bool, S_IRUGO);
 
-static bool __read_mostly vmm_exclusive = 1;
-module_param(vmm_exclusive, bool, S_IRUGO);
-
 static bool __read_mostly fasteoi = 1;
 module_param(fasteoi, bool, S_IRUGO);
 
@@ -910,8 +907,6 @@ static void nested_release_page_clean(struct page *page)
 
 static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu);
 static u64 construct_eptp(unsigned long root_hpa);
-static void kvm_cpu_vmxon(u64 addr);
-static void kvm_cpu_vmxoff(void);
 static bool vmx_xsaves_supported(void);
 static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr);
 static void vmx_set_segment(struct kvm_vcpu *vcpu,
@@ -2231,15 +2226,10 @@ static void decache_tsc_multiplier(struct vcpu_vmx *vmx)
 static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
-       u64 phys_addr = __pa(per_cpu(vmxarea, cpu));
        bool already_loaded = vmx->loaded_vmcs->cpu == cpu;
 
-       if (!vmm_exclusive)
-               kvm_cpu_vmxon(phys_addr);
-       else if (!already_loaded)
-               loaded_vmcs_clear(vmx->loaded_vmcs);
-
        if (!already_loaded) {
+               loaded_vmcs_clear(vmx->loaded_vmcs);
                local_irq_disable();
                crash_disable_local_vmclear(cpu);
 
@@ -2317,11 +2307,6 @@ static void vmx_vcpu_put(struct kvm_vcpu *vcpu)
        vmx_vcpu_pi_put(vcpu);
 
        __vmx_load_host_state(to_vmx(vcpu));
-       if (!vmm_exclusive) {
-               __loaded_vmcs_clear(to_vmx(vcpu)->loaded_vmcs);
-               vcpu->cpu = -1;
-               kvm_cpu_vmxoff();
-       }
 }
 
 static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu);
@@ -3416,6 +3401,7 @@ static __init int vmx_disabled_by_bios(void)
 
 static void kvm_cpu_vmxon(u64 addr)
 {
+       cr4_set_bits(X86_CR4_VMXE);
        intel_pt_handle_vmx(1);
 
        asm volatile (ASM_VMX_VMXON_RAX
@@ -3458,12 +3444,8 @@ static int hardware_enable(void)
                /* enable and lock */
                wrmsrl(MSR_IA32_FEATURE_CONTROL, old | test_bits);
        }
-       cr4_set_bits(X86_CR4_VMXE);
-
-       if (vmm_exclusive) {
-               kvm_cpu_vmxon(phys_addr);
-               ept_sync_global();
-       }
+       kvm_cpu_vmxon(phys_addr);
+       ept_sync_global();
 
        native_store_gdt(this_cpu_ptr(&host_gdt));
 
@@ -3489,15 +3471,13 @@ static void kvm_cpu_vmxoff(void)
        asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc");
 
        intel_pt_handle_vmx(0);
+       cr4_clear_bits(X86_CR4_VMXE);
 }
 
 static void hardware_disable(void)
 {
-       if (vmm_exclusive) {
-               vmclear_local_loaded_vmcss();
-               kvm_cpu_vmxoff();
-       }
-       cr4_clear_bits(X86_CR4_VMXE);
+       vmclear_local_loaded_vmcss();
+       kvm_cpu_vmxoff();
 }
 
 static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt,
@@ -3547,11 +3527,13 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
              CPU_BASED_USE_IO_BITMAPS |
              CPU_BASED_MOV_DR_EXITING |
              CPU_BASED_USE_TSC_OFFSETING |
-             CPU_BASED_MWAIT_EXITING |
-             CPU_BASED_MONITOR_EXITING |
              CPU_BASED_INVLPG_EXITING |
              CPU_BASED_RDPMC_EXITING;
 
+       if (!kvm_mwait_in_guest())
+               min |= CPU_BASED_MWAIT_EXITING |
+                       CPU_BASED_MONITOR_EXITING;
+
        opt = CPU_BASED_TPR_SHADOW |
              CPU_BASED_USE_MSR_BITMAPS |
              CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
@@ -6221,7 +6203,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
                 * page table accesses are reads or writes.
                 */
                u64 eptp = nested_ept_get_cr3(vcpu);
-               if (eptp & VMX_EPT_AD_ENABLE_BIT)
+               if (!(eptp & VMX_EPT_AD_ENABLE_BIT))
                        exit_qualification &= ~EPT_VIOLATION_ACC_WRITE;
        }
 
@@ -9170,11 +9152,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
        vmx->loaded_vmcs->shadow_vmcs = NULL;
        if (!vmx->loaded_vmcs->vmcs)
                goto free_msrs;
-       if (!vmm_exclusive)
-               kvm_cpu_vmxon(__pa(per_cpu(vmxarea, raw_smp_processor_id())));
        loaded_vmcs_init(vmx->loaded_vmcs);
-       if (!vmm_exclusive)
-               kvm_cpu_vmxoff();
 
        cpu = get_cpu();
        vmx_vcpu_load(&vmx->vcpu, cpu);
index 6bc47e2712c87dc1a752a7fdca7dea9afe387daf..f68c5b2ba627daba09d7bff5ca8c97b4d4189b90 100644 (file)
@@ -1007,6 +1007,8 @@ static u32 emulated_msrs[] = {
        MSR_IA32_MCG_CTL,
        MSR_IA32_MCG_EXT_CTL,
        MSR_IA32_SMBASE,
+       MSR_PLATFORM_INFO,
+       MSR_MISC_FEATURES_ENABLES,
 };
 
 static unsigned num_emulated_msrs;
@@ -1443,10 +1445,10 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
        struct kvm *kvm = vcpu->kvm;
        u64 offset, ns, elapsed;
        unsigned long flags;
-       s64 usdiff;
        bool matched;
        bool already_matched;
        u64 data = msr->data;
+       bool synchronizing = false;
 
        raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags);
        offset = kvm_compute_tsc_offset(vcpu, data);
@@ -1454,51 +1456,34 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
        elapsed = ns - kvm->arch.last_tsc_nsec;
 
        if (vcpu->arch.virtual_tsc_khz) {
-               int faulted = 0;
-
-               /* n.b - signed multiplication and division required */
-               usdiff = data - kvm->arch.last_tsc_write;
-#ifdef CONFIG_X86_64
-               usdiff = (usdiff * 1000) / vcpu->arch.virtual_tsc_khz;
-#else
-               /* do_div() only does unsigned */
-               asm("1: idivl %[divisor]\n"
-                   "2: xor %%edx, %%edx\n"
-                   "   movl $0, %[faulted]\n"
-                   "3:\n"
-                   ".section .fixup,\"ax\"\n"
-                   "4: movl $1, %[faulted]\n"
-                   "   jmp  3b\n"
-                   ".previous\n"
-
-               _ASM_EXTABLE(1b, 4b)
-
-               : "=A"(usdiff), [faulted] "=r" (faulted)
-               : "A"(usdiff * 1000), [divisor] "rm"(vcpu->arch.virtual_tsc_khz));
-
-#endif
-               do_div(elapsed, 1000);
-               usdiff -= elapsed;
-               if (usdiff < 0)
-                       usdiff = -usdiff;
-
-               /* idivl overflow => difference is larger than USEC_PER_SEC */
-               if (faulted)
-                       usdiff = USEC_PER_SEC;
-       } else
-               usdiff = USEC_PER_SEC; /* disable TSC match window below */
+               if (data == 0 && msr->host_initiated) {
+                       /*
+                        * detection of vcpu initialization -- need to sync
+                        * with other vCPUs. This particularly helps to keep
+                        * kvm_clock stable after CPU hotplug
+                        */
+                       synchronizing = true;
+               } else {
+                       u64 tsc_exp = kvm->arch.last_tsc_write +
+                                               nsec_to_cycles(vcpu, elapsed);
+                       u64 tsc_hz = vcpu->arch.virtual_tsc_khz * 1000LL;
+                       /*
+                        * Special case: TSC write with a small delta (1 second)
+                        * of virtual cycle time against real time is
+                        * interpreted as an attempt to synchronize the CPU.
+                        */
+                       synchronizing = data < tsc_exp + tsc_hz &&
+                                       data + tsc_hz > tsc_exp;
+               }
+       }
 
        /*
-        * Special case: TSC write with a small delta (1 second) of virtual
-        * cycle time against real time is interpreted as an attempt to
-        * synchronize the CPU.
-         *
         * For a reliable TSC, we can match TSC offsets, and for an unstable
         * TSC, we add elapsed time in this computation.  We could let the
         * compensation code attempt to catch up if we fall behind, but
         * it's better to try to match offsets from the beginning.
          */
-       if (usdiff < USEC_PER_SEC &&
+       if (synchronizing &&
            vcpu->arch.virtual_tsc_khz == kvm->arch.last_tsc_khz) {
                if (!check_tsc_unstable()) {
                        offset = kvm->arch.cur_tsc_offset;
@@ -1774,7 +1759,7 @@ static void kvm_gen_update_masterclock(struct kvm *kvm)
 #endif
 }
 
-static u64 __get_kvmclock_ns(struct kvm *kvm)
+u64 get_kvmclock_ns(struct kvm *kvm)
 {
        struct kvm_arch *ka = &kvm->arch;
        struct pvclock_vcpu_time_info hv_clock;
@@ -1795,18 +1780,6 @@ static u64 __get_kvmclock_ns(struct kvm *kvm)
        return __pvclock_read_cycles(&hv_clock, rdtsc());
 }
 
-u64 get_kvmclock_ns(struct kvm *kvm)
-{
-       unsigned long flags;
-       s64 ns;
-
-       local_irq_save(flags);
-       ns = __get_kvmclock_ns(kvm);
-       local_irq_restore(flags);
-
-       return ns;
-}
-
 static void kvm_setup_pvclock_page(struct kvm_vcpu *v)
 {
        struct kvm_vcpu_arch *vcpu = &v->arch;
@@ -2154,6 +2127,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
        case MSR_VM_HSAVE_PA:
        case MSR_AMD64_PATCH_LOADER:
        case MSR_AMD64_BU_CFG2:
+       case MSR_AMD64_DC_CFG:
                break;
 
        case MSR_EFER:
@@ -2330,6 +2304,21 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
                        return 1;
                vcpu->arch.osvw.status = data;
                break;
+       case MSR_PLATFORM_INFO:
+               if (!msr_info->host_initiated ||
+                   data & ~MSR_PLATFORM_INFO_CPUID_FAULT ||
+                   (!(data & MSR_PLATFORM_INFO_CPUID_FAULT) &&
+                    cpuid_fault_enabled(vcpu)))
+                       return 1;
+               vcpu->arch.msr_platform_info = data;
+               break;
+       case MSR_MISC_FEATURES_ENABLES:
+               if (data & ~MSR_MISC_FEATURES_ENABLES_CPUID_FAULT ||
+                   (data & MSR_MISC_FEATURES_ENABLES_CPUID_FAULT &&
+                    !supports_cpuid_fault(vcpu)))
+                       return 1;
+               vcpu->arch.msr_misc_features_enables = data;
+               break;
        default:
                if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr))
                        return xen_hvm_config(vcpu, data);
@@ -2416,6 +2405,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
        case MSR_FAM10H_MMIO_CONF_BASE:
        case MSR_AMD64_BU_CFG2:
        case MSR_IA32_PERF_CTL:
+       case MSR_AMD64_DC_CFG:
                msr_info->data = 0;
                break;
        case MSR_K7_EVNTSEL0 ... MSR_K7_EVNTSEL3:
@@ -2544,6 +2534,12 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
                        return 1;
                msr_info->data = vcpu->arch.osvw.status;
                break;
+       case MSR_PLATFORM_INFO:
+               msr_info->data = vcpu->arch.msr_platform_info;
+               break;
+       case MSR_MISC_FEATURES_ENABLES:
+               msr_info->data = vcpu->arch.msr_misc_features_enables;
+               break;
        default:
                if (kvm_pmu_is_valid_msr(vcpu, msr_info->index))
                        return kvm_pmu_get_msr(vcpu, msr_info->index, &msr_info->data);
@@ -2679,6 +2675,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
        case KVM_CAP_ADJUST_CLOCK:
                r = KVM_CLOCK_TSC_STABLE;
                break;
+       case KVM_CAP_X86_GUEST_MWAIT:
+               r = kvm_mwait_in_guest();
+               break;
        case KVM_CAP_X86_SMM:
                /* SMBASE is usually relocated above 1M on modern chipsets,
                 * and SMM handlers might indeed rely on 4G segment limits,
@@ -3715,22 +3714,21 @@ static int kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm)
 
 static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
 {
+       struct kvm_pic *pic = kvm->arch.vpic;
        int r;
 
        r = 0;
        switch (chip->chip_id) {
        case KVM_IRQCHIP_PIC_MASTER:
-               memcpy(&chip->chip.pic,
-                       &pic_irqchip(kvm)->pics[0],
+               memcpy(&chip->chip.pic, &pic->pics[0],
                        sizeof(struct kvm_pic_state));
                break;
        case KVM_IRQCHIP_PIC_SLAVE:
-               memcpy(&chip->chip.pic,
-                       &pic_irqchip(kvm)->pics[1],
+               memcpy(&chip->chip.pic, &pic->pics[1],
                        sizeof(struct kvm_pic_state));
                break;
        case KVM_IRQCHIP_IOAPIC:
-               r = kvm_get_ioapic(kvm, &chip->chip.ioapic);
+               kvm_get_ioapic(kvm, &chip->chip.ioapic);
                break;
        default:
                r = -EINVAL;
@@ -3741,32 +3739,31 @@ static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
 
 static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
 {
+       struct kvm_pic *pic = kvm->arch.vpic;
        int r;
 
        r = 0;
        switch (chip->chip_id) {
        case KVM_IRQCHIP_PIC_MASTER:
-               spin_lock(&pic_irqchip(kvm)->lock);
-               memcpy(&pic_irqchip(kvm)->pics[0],
-                       &chip->chip.pic,
+               spin_lock(&pic->lock);
+               memcpy(&pic->pics[0], &chip->chip.pic,
                        sizeof(struct kvm_pic_state));
-               spin_unlock(&pic_irqchip(kvm)->lock);
+               spin_unlock(&pic->lock);
                break;
        case KVM_IRQCHIP_PIC_SLAVE:
-               spin_lock(&pic_irqchip(kvm)->lock);
-               memcpy(&pic_irqchip(kvm)->pics[1],
-                       &chip->chip.pic,
+               spin_lock(&pic->lock);
+               memcpy(&pic->pics[1], &chip->chip.pic,
                        sizeof(struct kvm_pic_state));
-               spin_unlock(&pic_irqchip(kvm)->lock);
+               spin_unlock(&pic->lock);
                break;
        case KVM_IRQCHIP_IOAPIC:
-               r = kvm_set_ioapic(kvm, &chip->chip.ioapic);
+               kvm_set_ioapic(kvm, &chip->chip.ioapic);
                break;
        default:
                r = -EINVAL;
                break;
        }
-       kvm_pic_update_irq(pic_irqchip(kvm));
+       kvm_pic_update_irq(pic);
        return r;
 }
 
@@ -3928,9 +3925,14 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
                        goto split_irqchip_unlock;
                if (kvm->created_vcpus)
                        goto split_irqchip_unlock;
+               kvm->arch.irqchip_mode = KVM_IRQCHIP_INIT_IN_PROGRESS;
                r = kvm_setup_empty_irq_routing(kvm);
-               if (r)
+               if (r) {
+                       kvm->arch.irqchip_mode = KVM_IRQCHIP_NONE;
+                       /* Pairs with smp_rmb() when reading irqchip_mode */
+                       smp_wmb();
                        goto split_irqchip_unlock;
+               }
                /* Pairs with irqchip_in_kernel. */
                smp_wmb();
                kvm->arch.irqchip_mode = KVM_IRQCHIP_SPLIT;
@@ -4012,20 +4014,18 @@ long kvm_arch_vm_ioctl(struct file *filp,
 
                r = kvm_ioapic_init(kvm);
                if (r) {
-                       mutex_lock(&kvm->slots_lock);
                        kvm_pic_destroy(kvm);
-                       mutex_unlock(&kvm->slots_lock);
                        goto create_irqchip_unlock;
                }
 
+               kvm->arch.irqchip_mode = KVM_IRQCHIP_INIT_IN_PROGRESS;
                r = kvm_setup_default_irq_routing(kvm);
                if (r) {
-                       mutex_lock(&kvm->slots_lock);
-                       mutex_lock(&kvm->irq_lock);
+                       kvm->arch.irqchip_mode = KVM_IRQCHIP_NONE;
+                       /* Pairs with smp_rmb() when reading irqchip_mode */
+                       smp_wmb();
                        kvm_ioapic_destroy(kvm);
                        kvm_pic_destroy(kvm);
-                       mutex_unlock(&kvm->irq_lock);
-                       mutex_unlock(&kvm->slots_lock);
                        goto create_irqchip_unlock;
                }
                /* Write kvm->irq_routing before enabling irqchip_in_kernel. */
@@ -4190,10 +4190,8 @@ long kvm_arch_vm_ioctl(struct file *filp,
                        goto out;
 
                r = 0;
-               local_irq_disable();
-               now_ns = __get_kvmclock_ns(kvm);
+               now_ns = get_kvmclock_ns(kvm);
                kvm->arch.kvmclock_offset += user_ns.clock - now_ns;
-               local_irq_enable();
                kvm_gen_update_masterclock(kvm);
                break;
        }
@@ -4201,11 +4199,9 @@ long kvm_arch_vm_ioctl(struct file *filp,
                struct kvm_clock_data user_ns;
                u64 now_ns;
 
-               local_irq_disable();
-               now_ns = __get_kvmclock_ns(kvm);
+               now_ns = get_kvmclock_ns(kvm);
                user_ns.clock = now_ns;
                user_ns.flags = kvm->arch.use_master_clock ? KVM_CLOCK_TSC_STABLE : 0;
-               local_irq_enable();
                memset(&user_ns.pad, 0, sizeof(user_ns.pad));
 
                r = -EFAULT;
@@ -7724,6 +7720,9 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
        if (!init_event) {
                kvm_pmu_reset(vcpu);
                vcpu->arch.smbase = 0x30000;
+
+               vcpu->arch.msr_platform_info = MSR_PLATFORM_INFO_CPUID_FAULT;
+               vcpu->arch.msr_misc_features_enables = 0;
        }
 
        memset(vcpu->arch.regs, 0, sizeof(vcpu->arch.regs));
index e8ff3e4ce38a53e167bc475f0d2b91b5fd9065b3..6120670749055ddb43174c606d3d5eceaf41ecd6 100644 (file)
@@ -1,6 +1,8 @@
 #ifndef ARCH_X86_KVM_X86_H
 #define ARCH_X86_KVM_X86_H
 
+#include <asm/processor.h>
+#include <asm/mwait.h>
 #include <linux/kvm_host.h>
 #include <asm/pvclock.h>
 #include "kvm_cache_regs.h"
@@ -212,4 +214,38 @@ static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec)
            __rem;                                              \
         })
 
+static inline bool kvm_mwait_in_guest(void)
+{
+       unsigned int eax, ebx, ecx, edx;
+
+       if (!cpu_has(&boot_cpu_data, X86_FEATURE_MWAIT))
+               return false;
+
+       switch (boot_cpu_data.x86_vendor) {
+       case X86_VENDOR_AMD:
+               /* All AMD CPUs have a working MWAIT implementation */
+               return true;
+       case X86_VENDOR_INTEL:
+               /* Handle Intel below */
+               break;
+       default:
+               return false;
+       }
+
+       /*
+        * Intel CPUs without CPUID5_ECX_INTERRUPT_BREAK are problematic as
+        * they would allow guest to stop the CPU completely by disabling
+        * interrupts then invoking MWAIT.
+        */
+       if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF)
+               return false;
+
+       cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &edx);
+
+       if (!(ecx & CPUID5_ECX_INTERRUPT_BREAK))
+               return false;
+
+       return true;
+}
+
 #endif
index e7e7055a86589dea6d0f9ca043365db4ab8b1b9c..69f0827d5f5391e1ad3b4ff970aa63504c17b434 100644 (file)
@@ -16,7 +16,7 @@ obj-y = bug.o bugs_$(BITS).o delay.o fault.o ldt.o \
 
 ifeq ($(CONFIG_X86_32),y)
 
-obj-y += checksum_32.o
+obj-y += checksum_32.o syscalls_32.o
 obj-$(CONFIG_ELF_CORE) += elfcore.o
 
 subarch-y = ../lib/string_32.o ../lib/atomic64_32.o ../lib/atomic64_cx8_32.o
index e59eef20647b3e722cc3f5a97fdec08c29071c14..b291ca5cf66bfc83e8176d0d6c05fa82a49ad823 100644 (file)
@@ -78,7 +78,7 @@ static inline int ptrace_set_thread_area(struct task_struct *child, int idx,
         return -ENOSYS;
 }
 
-extern long arch_prctl(struct task_struct *task, int code,
+extern long arch_prctl(struct task_struct *task, int option,
                       unsigned long __user *addr);
 
 #endif
index 96eb2bd288320b28e1ff6278bd0d410f1536ceab..8431e87ac33338eed5d037fe07259558c9b5ee23 100644 (file)
@@ -6,7 +6,7 @@
 #include <sys/ptrace.h>
 #include <asm/ptrace.h>
 
-int os_arch_prctl(int pid, int code, unsigned long *addr)
+int os_arch_prctl(int pid, int option, unsigned long *arg2)
 {
-        return ptrace(PTRACE_ARCH_PRCTL, pid, (unsigned long) addr, code);
+       return ptrace(PTRACE_ARCH_PRCTL, pid, (unsigned long) arg2, option);
 }
diff --git a/arch/x86/um/syscalls_32.c b/arch/x86/um/syscalls_32.c
new file mode 100644 (file)
index 0000000..627d688
--- /dev/null
@@ -0,0 +1,7 @@
+#include <linux/syscalls.h>
+#include <os.h>
+
+SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2)
+{
+       return -EINVAL;
+}
index 10d907098c2614835b002cbdc8d754cca1f7d56a..58f51667e2e4beb82ec794cf618bf7026cfcb66c 100644 (file)
@@ -7,13 +7,15 @@
 
 #include <linux/sched.h>
 #include <linux/sched/mm.h>
+#include <linux/syscalls.h>
 #include <linux/uaccess.h>
 #include <asm/prctl.h> /* XXX This should get the constants from libc */
 #include <os.h>
 
-long arch_prctl(struct task_struct *task, int code, unsigned long __user *addr)
+long arch_prctl(struct task_struct *task, int option,
+               unsigned long __user *arg2)
 {
-       unsigned long *ptr = addr, tmp;
+       unsigned long *ptr = arg2, tmp;
        long ret;
        int pid = task->mm->context.id.u.pid;
 
@@ -30,7 +32,7 @@ long arch_prctl(struct task_struct *task, int code, unsigned long __user *addr)
         * arch_prctl is run on the host, then the registers are read
         * back.
         */
-       switch (code) {
+       switch (option) {
        case ARCH_SET_FS:
        case ARCH_SET_GS:
                ret = restore_registers(pid, &current->thread.regs.regs);
@@ -50,11 +52,11 @@ long arch_prctl(struct task_struct *task, int code, unsigned long __user *addr)
                ptr = &tmp;
        }
 
-       ret = os_arch_prctl(pid, code, ptr);
+       ret = os_arch_prctl(pid, option, ptr);
        if (ret)
                return ret;
 
-       switch (code) {
+       switch (option) {
        case ARCH_SET_FS:
                current->thread.arch.fs = (unsigned long) ptr;
                ret = save_registers(pid, &current->thread.regs.regs);
@@ -63,19 +65,19 @@ long arch_prctl(struct task_struct *task, int code, unsigned long __user *addr)
                ret = save_registers(pid, &current->thread.regs.regs);
                break;
        case ARCH_GET_FS:
-               ret = put_user(tmp, addr);
+               ret = put_user(tmp, arg2);
                break;
        case ARCH_GET_GS:
-               ret = put_user(tmp, addr);
+               ret = put_user(tmp, arg2);
                break;
        }
 
        return ret;
 }
 
-long sys_arch_prctl(int code, unsigned long addr)
+SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2)
 {
-       return arch_prctl(current, code, (unsigned long __user *) addr);
+       return arch_prctl(current, option, (unsigned long __user *) arg2);
 }
 
 void arch_switch_to(struct task_struct *to)
index cf3de91fbfe7a522456d00bff9005df9a3662ed4..8031d3a55a177211a23ba202272359781d2cb574 100644 (file)
@@ -680,7 +680,7 @@ static void tce_iommu_free_table(struct tce_container *container,
        unsigned long pages = tbl->it_allocated_size >> PAGE_SHIFT;
 
        tce_iommu_userspace_view_free(tbl, container->mm);
-       tbl->it_ops->free(tbl);
+       iommu_tce_table_put(tbl);
        decrement_locked_vm(container->mm, pages);
 }
 
index 65145a3df065192345c66ebc311d464fe09a6f29..72934df6847150ba50dfbadad78fe10e01d2eadd 100644 (file)
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1320,6 +1320,7 @@ void setup_new_exec(struct linux_binprm * bprm)
        else
                set_dumpable(current->mm, suid_dumpable);
 
+       arch_setup_new_exec();
        perf_event_exec();
        __set_task_comm(current, kbasename(bprm->filename), true);
 
index aef47be2a5c1a3fd3ea75161f5bc93627772515c..af9dbc44fd9212f42b1fd07212f8dda7d059ed53 100644 (file)
@@ -723,6 +723,8 @@ asmlinkage long compat_sys_sched_rr_get_interval(compat_pid_t pid,
 asmlinkage long compat_sys_fanotify_mark(int, unsigned int, __u32, __u32,
                                            int, const char __user *);
 
+asmlinkage long compat_sys_arch_prctl(int option, unsigned long arg2);
+
 /*
  * For most but not all architectures, "am I in a compat syscall?" and
  * "am I a compat task?" are the same question.  For architectures on which
index 7e74ae4d99bbef5ccb2b0bacc3f80542427c3427..397b7b5b1933b8082ce2955ffdc52463aafdcbf3 100644 (file)
@@ -502,10 +502,10 @@ int __must_check vcpu_load(struct kvm_vcpu *vcpu);
 void vcpu_put(struct kvm_vcpu *vcpu);
 
 #ifdef __KVM_HAVE_IOAPIC
-void kvm_vcpu_request_scan_ioapic(struct kvm *kvm);
+void kvm_arch_post_irq_ack_notifier_list_update(struct kvm *kvm);
 void kvm_arch_post_irq_routing_update(struct kvm *kvm);
 #else
-static inline void kvm_vcpu_request_scan_ioapic(struct kvm *kvm)
+static inline void kvm_arch_post_irq_ack_notifier_list_update(struct kvm *kvm)
 {
 }
 static inline void kvm_arch_post_irq_routing_update(struct kvm *kvm)
index 58373875e8eec2aee668e5fdac4526796ebe78c8..55125d67433851df5fffba9f641e907735bae772 100644 (file)
@@ -101,6 +101,10 @@ static inline void check_object_size(const void *ptr, unsigned long n,
 { }
 #endif /* CONFIG_HARDENED_USERCOPY */
 
+#ifndef arch_setup_new_exec
+static inline void arch_setup_new_exec(void) { }
+#endif
+
 #endif /* __KERNEL__ */
 
 #endif /* _LINUX_THREAD_INFO_H */
index 6180ea50e9ef01c62d1817bcb8399a29da3973e5..e43906b95d9f64fe9ab84117dd52666d520764dd 100644 (file)
@@ -892,6 +892,8 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_MIPS_64BIT 139
 #define KVM_CAP_S390_GS 140
 #define KVM_CAP_S390_AIS 141
+#define KVM_CAP_SPAPR_TCE_VFIO 142
+#define KVM_CAP_X86_GUEST_MWAIT 143
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -1096,6 +1098,7 @@ struct kvm_device_attr {
 #define  KVM_DEV_VFIO_GROUP                    1
 #define   KVM_DEV_VFIO_GROUP_ADD                       1
 #define   KVM_DEV_VFIO_GROUP_DEL                       2
+#define   KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE             3
 
 enum kvm_device_type {
        KVM_DEV_TYPE_FSL_MPIC_20        = 1,
@@ -1117,6 +1120,11 @@ enum kvm_device_type {
        KVM_DEV_TYPE_MAX,
 };
 
+struct kvm_vfio_spapr_tce {
+       __s32   groupfd;
+       __s32   tablefd;
+};
+
 /*
  * ioctls for VM fds
  */
index 2c9082ba61376fd7fb5dd375ca2afac1bb44d53a..116b7735ee9f7aad88b748de929be5b9b83d2859 100755 (executable)
@@ -148,6 +148,7 @@ cat << EOF
 #define __IGNORE_sysfs
 #define __IGNORE_uselib
 #define __IGNORE__sysctl
+#define __IGNORE_arch_prctl
 
 /* ... including the "new" 32-bit uid syscalls */
 #define __IGNORE_lchown32
index 4d28a9ddbee01077fea01beeeae5523917822da9..a8d540398bbd0350b8b969820d7a11d528cf672d 100644 (file)
@@ -490,7 +490,7 @@ void kvm_register_irq_ack_notifier(struct kvm *kvm,
        mutex_lock(&kvm->irq_lock);
        hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list);
        mutex_unlock(&kvm->irq_lock);
-       kvm_vcpu_request_scan_ioapic(kvm);
+       kvm_arch_post_irq_ack_notifier_list_update(kvm);
 }
 
 void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
@@ -500,7 +500,7 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
        hlist_del_init_rcu(&kian->link);
        mutex_unlock(&kvm->irq_lock);
        synchronize_srcu(&kvm->irq_srcu);
-       kvm_vcpu_request_scan_ioapic(kvm);
+       kvm_arch_post_irq_ack_notifier_list_update(kvm);
 }
 #endif
 
index 3bcc9990adf79eb9367a6fee61a7fd9683233a35..cc30d01a56bee9e93b02aa0b586b83288765b9e8 100644 (file)
@@ -142,8 +142,8 @@ static int setup_routing_entry(struct kvm *kvm,
                               struct kvm_kernel_irq_routing_entry *e,
                               const struct kvm_irq_routing_entry *ue)
 {
-       int r = -EINVAL;
        struct kvm_kernel_irq_routing_entry *ei;
+       int r;
 
        /*
         * Do not allow GSI to be mapped to the same irqchip more than once.
@@ -153,20 +153,19 @@ static int setup_routing_entry(struct kvm *kvm,
                if (ei->type != KVM_IRQ_ROUTING_IRQCHIP ||
                    ue->type != KVM_IRQ_ROUTING_IRQCHIP ||
                    ue->u.irqchip.irqchip == ei->irqchip.irqchip)
-                       return r;
+                       return -EINVAL;
 
        e->gsi = ue->gsi;
        e->type = ue->type;
        r = kvm_set_routing_entry(kvm, e, ue);
        if (r)
-               goto out;
+               return r;
        if (e->type == KVM_IRQ_ROUTING_IRQCHIP)
                rt->chip[e->irqchip.irqchip][e->irqchip.pin] = e->gsi;
 
        hlist_add_head(&e->link, &rt->map[e->gsi]);
-       r = 0;
-out:
-       return r;
+
+       return 0;
 }
 
 void __attribute__((weak)) kvm_arch_irq_routing_update(struct kvm *kvm)
index f489167839c48b49aad6982bc3bee4dc858f56c2..357e67cba32eeddab73323356dbdc79d5736ae24 100644 (file)
@@ -3069,8 +3069,11 @@ static long kvm_vm_ioctl(struct file *filp,
                                           routing.nr * sizeof(*entries)))
                                goto out_free_irq_routing;
                }
+               /* avoid races with KVM_CREATE_IRQCHIP on x86 */
+               mutex_lock(&kvm->lock);
                r = kvm_set_irq_routing(kvm, entries, routing.nr,
                                        routing.flags);
+               mutex_unlock(&kvm->lock);
 out_free_irq_routing:
                vfree(entries);
                break;
index d32f239eb47133ae3f57ab0ed52a71d5c0478b8e..37d9118fd84be07bc119d40efe58b8edc1f2a889 100644 (file)
 #include <linux/vfio.h>
 #include "vfio.h"
 
+#ifdef CONFIG_SPAPR_TCE_IOMMU
+#include <asm/kvm_ppc.h>
+#endif
+
 struct kvm_vfio_group {
        struct list_head node;
        struct vfio_group *vfio_group;
@@ -89,6 +93,47 @@ static bool kvm_vfio_group_is_coherent(struct vfio_group *vfio_group)
        return ret > 0;
 }
 
+#ifdef CONFIG_SPAPR_TCE_IOMMU
+static int kvm_vfio_external_user_iommu_id(struct vfio_group *vfio_group)
+{
+       int (*fn)(struct vfio_group *);
+       int ret = -EINVAL;
+
+       fn = symbol_get(vfio_external_user_iommu_id);
+       if (!fn)
+               return ret;
+
+       ret = fn(vfio_group);
+
+       symbol_put(vfio_external_user_iommu_id);
+
+       return ret;
+}
+
+static struct iommu_group *kvm_vfio_group_get_iommu_group(
+               struct vfio_group *group)
+{
+       int group_id = kvm_vfio_external_user_iommu_id(group);
+
+       if (group_id < 0)
+               return NULL;
+
+       return iommu_group_get_by_id(group_id);
+}
+
+static void kvm_spapr_tce_release_vfio_group(struct kvm *kvm,
+               struct vfio_group *vfio_group)
+{
+       struct iommu_group *grp = kvm_vfio_group_get_iommu_group(vfio_group);
+
+       if (WARN_ON_ONCE(!grp))
+               return;
+
+       kvm_spapr_tce_release_iommu_group(kvm, grp);
+       iommu_group_put(grp);
+}
+#endif
+
 /*
  * Groups can use the same or different IOMMU domains.  If the same then
  * adding a new group may change the coherency of groups we've previously
@@ -211,6 +256,9 @@ static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg)
 
                mutex_unlock(&kv->lock);
 
+#ifdef CONFIG_SPAPR_TCE_IOMMU
+               kvm_spapr_tce_release_vfio_group(dev->kvm, vfio_group);
+#endif
                kvm_vfio_group_set_kvm(vfio_group, NULL);
 
                kvm_vfio_group_put_external_user(vfio_group);
@@ -218,6 +266,57 @@ static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg)
                kvm_vfio_update_coherency(dev);
 
                return ret;
+
+#ifdef CONFIG_SPAPR_TCE_IOMMU
+       case KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE: {
+               struct kvm_vfio_spapr_tce param;
+               struct kvm_vfio *kv = dev->private;
+               struct vfio_group *vfio_group;
+               struct kvm_vfio_group *kvg;
+               struct fd f;
+               struct iommu_group *grp;
+
+               if (copy_from_user(&param, (void __user *)arg,
+                               sizeof(struct kvm_vfio_spapr_tce)))
+                       return -EFAULT;
+
+               f = fdget(param.groupfd);
+               if (!f.file)
+                       return -EBADF;
+
+               vfio_group = kvm_vfio_group_get_external_user(f.file);
+               fdput(f);
+
+               if (IS_ERR(vfio_group))
+                       return PTR_ERR(vfio_group);
+
+               grp = kvm_vfio_group_get_iommu_group(vfio_group);
+               if (WARN_ON_ONCE(!grp)) {
+                       kvm_vfio_group_put_external_user(vfio_group);
+                       return -EIO;
+               }
+
+               ret = -ENOENT;
+
+               mutex_lock(&kv->lock);
+
+               list_for_each_entry(kvg, &kv->group_list, node) {
+                       if (kvg->vfio_group != vfio_group)
+                               continue;
+
+                       ret = kvm_spapr_tce_attach_iommu_group(dev->kvm,
+                                       param.tablefd, grp);
+                       break;
+               }
+
+               mutex_unlock(&kv->lock);
+
+               iommu_group_put(grp);
+               kvm_vfio_group_put_external_user(vfio_group);
+
+               return ret;
+       }
+#endif /* CONFIG_SPAPR_TCE_IOMMU */
        }
 
        return -ENXIO;
@@ -242,6 +341,9 @@ static int kvm_vfio_has_attr(struct kvm_device *dev,
                switch (attr->attr) {
                case KVM_DEV_VFIO_GROUP_ADD:
                case KVM_DEV_VFIO_GROUP_DEL:
+#ifdef CONFIG_SPAPR_TCE_IOMMU
+               case KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE:
+#endif
                        return 0;
                }
 
@@ -257,6 +359,9 @@ static void kvm_vfio_destroy(struct kvm_device *dev)
        struct kvm_vfio_group *kvg, *tmp;
 
        list_for_each_entry_safe(kvg, tmp, &kv->group_list, node) {
+#ifdef CONFIG_SPAPR_TCE_IOMMU
+               kvm_spapr_tce_release_vfio_group(dev->kvm, kvg->vfio_group);
+#endif
                kvm_vfio_group_set_kvm(kvg->vfio_group, NULL);
                kvm_vfio_group_put_external_user(kvg->vfio_group);
                list_del(&kvg->node);