arch/s390/kvm/kvm-s390.c

   1 /*
   2  * hosting zSeries kernel virtual machines
   3  *
   4  * Copyright IBM Corp. 2008, 2009
   5  *
   6  * This program is free software; you can redistribute it and/or modify
   7  * it under the terms of the GNU General Public License (version 2 only)
   8  * as published by the Free Software Foundation.
   9  *
  10  *    Author(s): Carsten Otte <cotte@de.ibm.com>
  11  *               Christian Borntraeger <borntraeger@de.ibm.com>
  12  *               Heiko Carstens <heiko.carstens@de.ibm.com>
  13  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
  14  *               Jason J. Herne <jjherne@us.ibm.com>
  15  */
  16
  17 #include <linux/compiler.h>
  18 #include <linux/err.h>
  19 #include <linux/fs.h>
  20 #include <linux/hrtimer.h>
  21 #include <linux/init.h>
  22 #include <linux/kvm.h>
  23 #include <linux/kvm_host.h>
  24 #include <linux/mman.h>
  25 #include <linux/module.h>
  26 #include <linux/random.h>
  27 #include <linux/slab.h>
  28 #include <linux/timer.h>
  29 #include <linux/vmalloc.h>
  30 #include <linux/bitmap.h>
  31 #include <asm/asm-offsets.h>
  32 #include <asm/lowcore.h>
  33 #include <asm/stp.h>
  34 #include <asm/pgtable.h>
  35 #include <asm/gmap.h>
  36 #include <asm/nmi.h>
  37 #include <asm/switch_to.h>
  38 #include <asm/isc.h>
  39 #include <asm/sclp.h>
  40 #include <asm/cpacf.h>
  41 #include <asm/timex.h>
  42 #include "kvm-s390.h"
  43 #include "gaccess.h"
  44
  45 #define KMSG_COMPONENT "kvm-s390"
  46 #undef pr_fmt
  47 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  48
  49 #define CREATE_TRACE_POINTS
  50 #include "trace.h"
  51 #include "trace-s390.h"
  52
  53 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
  54 #define LOCAL_IRQS 32
  55 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
  56                            (KVM_MAX_VCPUS + LOCAL_IRQS))
  57
  58 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
  59
  60 struct kvm_stats_debugfs_item debugfs_entries[] = {
  61         { "userspace_handled", VCPU_STAT(exit_userspace) },
  62         { "exit_null", VCPU_STAT(exit_null) },
  63         { "exit_validity", VCPU_STAT(exit_validity) },
  64         { "exit_stop_request", VCPU_STAT(exit_stop_request) },
  65         { "exit_external_request", VCPU_STAT(exit_external_request) },
  66         { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
  67         { "exit_instruction", VCPU_STAT(exit_instruction) },
  68         { "exit_pei", VCPU_STAT(exit_pei) },
  69         { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
  70         { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
  71         { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
  72         { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
  73         { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
  74         { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
  75         { "halt_wakeup", VCPU_STAT(halt_wakeup) },
  76         { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
  77         { "instruction_lctl", VCPU_STAT(instruction_lctl) },
  78         { "instruction_stctl", VCPU_STAT(instruction_stctl) },
  79         { "instruction_stctg", VCPU_STAT(instruction_stctg) },
  80         { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
  81         { "deliver_external_call", VCPU_STAT(deliver_external_call) },
  82         { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
  83         { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
  84         { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
  85         { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
  86         { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
  87         { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
  88         { "exit_wait_state", VCPU_STAT(exit_wait_state) },
  89         { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
  90         { "instruction_stidp", VCPU_STAT(instruction_stidp) },
  91         { "instruction_spx", VCPU_STAT(instruction_spx) },
  92         { "instruction_stpx", VCPU_STAT(instruction_stpx) },
  93         { "instruction_stap", VCPU_STAT(instruction_stap) },
  94         { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
  95         { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
  96         { "instruction_stsch", VCPU_STAT(instruction_stsch) },
  97         { "instruction_chsc", VCPU_STAT(instruction_chsc) },
  98         { "instruction_essa", VCPU_STAT(instruction_essa) },
  99         { "instruction_stsi", VCPU_STAT(instruction_stsi) },
 100         { "instruction_stfl", VCPU_STAT(instruction_stfl) },
 101         { "instruction_tprot", VCPU_STAT(instruction_tprot) },
 102         { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
 103         { "instruction_sie", VCPU_STAT(instruction_sie) },
 104         { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
 105         { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
 106         { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
 107         { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
 108         { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
 109         { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
 110         { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
 111         { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
 112         { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
 113         { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
 114         { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
 115         { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
 116         { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
 117         { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
 118         { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
 119         { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
 120         { "diagnose_10", VCPU_STAT(diagnose_10) },
 121         { "diagnose_44", VCPU_STAT(diagnose_44) },
 122         { "diagnose_9c", VCPU_STAT(diagnose_9c) },
 123         { "diagnose_258", VCPU_STAT(diagnose_258) },
 124         { "diagnose_308", VCPU_STAT(diagnose_308) },
 125         { "diagnose_500", VCPU_STAT(diagnose_500) },
 126         { NULL }
 127 };
 128
 129 /* allow nested virtualization in KVM (if enabled by user space) */
 130 static int nested;
 131 module_param(nested, int, S_IRUGO);
 132 MODULE_PARM_DESC(nested, "Nested virtualization support");
 133
 134 /* upper facilities limit for kvm */
 135 unsigned long kvm_s390_fac_list_mask[16] = { FACILITIES_KVM };
 136
 137 unsigned long kvm_s390_fac_list_mask_size(void)
 138 {
 139         BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
 140         return ARRAY_SIZE(kvm_s390_fac_list_mask);
 141 }
 142
 143 /* available cpu features supported by kvm */
 144 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
 145 /* available subfunctions indicated via query / "test bit" */
 146 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
 147
 148 static struct gmap_notifier gmap_notifier;
 149 static struct gmap_notifier vsie_gmap_notifier;
 150 debug_info_t *kvm_s390_dbf;
 151
 152 /* Section: not file related */
 153 int kvm_arch_hardware_enable(void)
 154 {
 155         /* every s390 is virtualization enabled ;-) */
 156         return 0;
 157 }
 158
 159 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
 160                               unsigned long end);
 161
 162 /*
 163  * This callback is executed during stop_machine(). All CPUs are therefore
 164  * temporarily stopped. In order not to change guest behavior, we have to
 165  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
 166  * so a CPU won't be stopped while calculating with the epoch.
 167  */
 168 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
 169                           void *v)
 170 {
 171         struct kvm *kvm;
 172         struct kvm_vcpu *vcpu;
 173         int i;
 174         unsigned long long *delta = v;
 175
 176         list_for_each_entry(kvm, &vm_list, vm_list) {
 177                 kvm->arch.epoch -= *delta;
 178                 kvm_for_each_vcpu(i, vcpu, kvm) {
 179                         vcpu->arch.sie_block->epoch -= *delta;
 180                         if (vcpu->arch.cputm_enabled)
 181                                 vcpu->arch.cputm_start += *delta;
 182                         if (vcpu->arch.vsie_block)
 183                                 vcpu->arch.vsie_block->epoch -= *delta;
 184                 }
 185         }
 186         return NOTIFY_OK;
 187 }
 188
 189 static struct notifier_block kvm_clock_notifier = {
 190         .notifier_call = kvm_clock_sync,
 191 };
 192
 193 int kvm_arch_hardware_setup(void)
 194 {
 195         gmap_notifier.notifier_call = kvm_gmap_notifier;
 196         gmap_register_pte_notifier(&gmap_notifier);
 197         vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
 198         gmap_register_pte_notifier(&vsie_gmap_notifier);
 199         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
 200                                        &kvm_clock_notifier);
 201         return 0;
 202 }
 203
 204 void kvm_arch_hardware_unsetup(void)
 205 {
 206         gmap_unregister_pte_notifier(&gmap_notifier);
 207         gmap_unregister_pte_notifier(&vsie_gmap_notifier);
 208         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
 209                                          &kvm_clock_notifier);
 210 }
 211
 212 static void allow_cpu_feat(unsigned long nr)
 213 {
 214         set_bit_inv(nr, kvm_s390_available_cpu_feat);
 215 }
 216
 217 static inline int plo_test_bit(unsigned char nr)
 218 {
 219         register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
 220         int cc = 3; /* subfunction not available */
 221
 222         asm volatile(
 223                 /* Parameter registers are ignored for "test bit" */
 224                 "       plo     0,0,0,0(0)\n"
 225                 "       ipm     %0\n"
 226                 "       srl     %0,28\n"
 227                 : "=d" (cc)
 228                 : "d" (r0)
 229                 : "cc");
 230         return cc == 0;
 231 }
 232
 233 static void kvm_s390_cpu_feat_init(void)
 234 {
 235         int i;
 236
 237         for (i = 0; i < 256; ++i) {
 238                 if (plo_test_bit(i))
 239                         kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
 240         }
 241
 242         if (test_facility(28)) /* TOD-clock steering */
 243                 ptff(kvm_s390_available_subfunc.ptff,
 244                      sizeof(kvm_s390_available_subfunc.ptff),
 245                      PTFF_QAF);
 246
 247         if (test_facility(17)) { /* MSA */
 248                 __cpacf_query(CPACF_KMAC, kvm_s390_available_subfunc.kmac);
 249                 __cpacf_query(CPACF_KMC, kvm_s390_available_subfunc.kmc);
 250                 __cpacf_query(CPACF_KM, kvm_s390_available_subfunc.km);
 251                 __cpacf_query(CPACF_KIMD, kvm_s390_available_subfunc.kimd);
 252                 __cpacf_query(CPACF_KLMD, kvm_s390_available_subfunc.klmd);
 253         }
 254         if (test_facility(76)) /* MSA3 */
 255                 __cpacf_query(CPACF_PCKMO, kvm_s390_available_subfunc.pckmo);
 256         if (test_facility(77)) { /* MSA4 */
 257                 __cpacf_query(CPACF_KMCTR, kvm_s390_available_subfunc.kmctr);
 258                 __cpacf_query(CPACF_KMF, kvm_s390_available_subfunc.kmf);
 259                 __cpacf_query(CPACF_KMO, kvm_s390_available_subfunc.kmo);
 260                 __cpacf_query(CPACF_PCC, kvm_s390_available_subfunc.pcc);
 261         }
 262         if (test_facility(57)) /* MSA5 */
 263                 __cpacf_query(CPACF_PPNO, kvm_s390_available_subfunc.ppno);
 264
 265         if (MACHINE_HAS_ESOP)
 266                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
 267         /*
 268          * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
 269          * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
 270          */
 271         if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
 272             !test_facility(3) || !nested)
 273                 return;
 274         allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
 275         if (sclp.has_64bscao)
 276                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
 277         if (sclp.has_siif)
 278                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
 279         if (sclp.has_gpere)
 280                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
 281         if (sclp.has_gsls)
 282                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
 283         if (sclp.has_ib)
 284                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
 285         if (sclp.has_cei)
 286                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
 287         if (sclp.has_ibs)
 288                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
 289         /*
 290          * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
 291          * all skey handling functions read/set the skey from the PGSTE
 292          * instead of the real storage key.
 293          *
 294          * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
 295          * pages being detected as preserved although they are resident.
 296          *
 297          * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
 298          * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
 299          *
 300          * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
 301          * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
 302          * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
 303          *
 304          * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
 305          * cannot easily shadow the SCA because of the ipte lock.
 306          */
 307 }
 308
 309 int kvm_arch_init(void *opaque)
 310 {
 311         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
 312         if (!kvm_s390_dbf)
 313                 return -ENOMEM;
 314
 315         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
 316                 debug_unregister(kvm_s390_dbf);
 317                 return -ENOMEM;
 318         }
 319
 320         kvm_s390_cpu_feat_init();
 321
 322         /* Register floating interrupt controller interface. */
 323         return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
 324 }
 325
 326 void kvm_arch_exit(void)
 327 {
 328         debug_unregister(kvm_s390_dbf);
 329 }
 330
 331 /* Section: device related */
 332 long kvm_arch_dev_ioctl(struct file *filp,
 333                         unsigned int ioctl, unsigned long arg)
 334 {
 335         if (ioctl == KVM_S390_ENABLE_SIE)
 336                 return s390_enable_sie();
 337         return -EINVAL;
 338 }
 339
 340 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 341 {
 342         int r;
 343
 344         switch (ext) {
 345         case KVM_CAP_S390_PSW:
 346         case KVM_CAP_S390_GMAP:
 347         case KVM_CAP_SYNC_MMU:
 348 #ifdef CONFIG_KVM_S390_UCONTROL
 349         case KVM_CAP_S390_UCONTROL:
 350 #endif
 351         case KVM_CAP_ASYNC_PF:
 352         case KVM_CAP_SYNC_REGS:
 353         case KVM_CAP_ONE_REG:
 354         case KVM_CAP_ENABLE_CAP:
 355         case KVM_CAP_S390_CSS_SUPPORT:
 356         case KVM_CAP_IOEVENTFD:
 357         case KVM_CAP_DEVICE_CTRL:
 358         case KVM_CAP_ENABLE_CAP_VM:
 359         case KVM_CAP_S390_IRQCHIP:
 360         case KVM_CAP_VM_ATTRIBUTES:
 361         case KVM_CAP_MP_STATE:
 362         case KVM_CAP_S390_INJECT_IRQ:
 363         case KVM_CAP_S390_USER_SIGP:
 364         case KVM_CAP_S390_USER_STSI:
 365         case KVM_CAP_S390_SKEYS:
 366         case KVM_CAP_S390_IRQ_STATE:
 367         case KVM_CAP_S390_USER_INSTR0:
 368                 r = 1;
 369                 break;
 370         case KVM_CAP_S390_MEM_OP:
 371                 r = MEM_OP_MAX_SIZE;
 372                 break;
 373         case KVM_CAP_NR_VCPUS:
 374         case KVM_CAP_MAX_VCPUS:
 375                 r = KVM_S390_BSCA_CPU_SLOTS;
 376                 if (!kvm_s390_use_sca_entries())
 377                         r = KVM_MAX_VCPUS;
 378                 else if (sclp.has_esca && sclp.has_64bscao)
 379                         r = KVM_S390_ESCA_CPU_SLOTS;
 380                 break;
 381         case KVM_CAP_NR_MEMSLOTS:
 382                 r = KVM_USER_MEM_SLOTS;
 383                 break;
 384         case KVM_CAP_S390_COW:
 385                 r = MACHINE_HAS_ESOP;
 386                 break;
 387         case KVM_CAP_S390_VECTOR_REGISTERS:
 388                 r = MACHINE_HAS_VX;
 389                 break;
 390         case KVM_CAP_S390_RI:
 391                 r = test_facility(64);
 392                 break;
 393         default:
 394                 r = 0;
 395         }
 396         return r;
 397 }
 398
 399 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
 400                                         struct kvm_memory_slot *memslot)
 401 {
 402         gfn_t cur_gfn, last_gfn;
 403         unsigned long address;
 404         struct gmap *gmap = kvm->arch.gmap;
 405
 406         /* Loop over all guest pages */
 407         last_gfn = memslot->base_gfn + memslot->npages;
 408         for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
 409                 address = gfn_to_hva_memslot(memslot, cur_gfn);
 410
 411                 if (test_and_clear_guest_dirty(gmap->mm, address))
 412                         mark_page_dirty(kvm, cur_gfn);
 413                 if (fatal_signal_pending(current))
 414                         return;
 415                 cond_resched();
 416         }
 417 }
 418
 419 /* Section: vm related */
 420 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
 421
 422 /*
 423  * Get (and clear) the dirty memory log for a memory slot.
 424  */
 425 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 426                                struct kvm_dirty_log *log)
 427 {
 428         int r;
 429         unsigned long n;
 430         struct kvm_memslots *slots;
 431         struct kvm_memory_slot *memslot;
 432         int is_dirty = 0;
 433
 434         mutex_lock(&kvm->slots_lock);
 435
 436         r = -EINVAL;
 437         if (log->slot >= KVM_USER_MEM_SLOTS)
 438                 goto out;
 439
 440         slots = kvm_memslots(kvm);
 441         memslot = id_to_memslot(slots, log->slot);
 442         r = -ENOENT;
 443         if (!memslot->dirty_bitmap)
 444                 goto out;
 445
 446         kvm_s390_sync_dirty_log(kvm, memslot);
 447         r = kvm_get_dirty_log(kvm, log, &is_dirty);
 448         if (r)
 449                 goto out;
 450
 451         /* Clear the dirty log */
 452         if (is_dirty) {
 453                 n = kvm_dirty_bitmap_bytes(memslot);
 454                 memset(memslot->dirty_bitmap, 0, n);
 455         }
 456         r = 0;
 457 out:
 458         mutex_unlock(&kvm->slots_lock);
 459         return r;
 460 }
 461
 462 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
 463 {
 464         unsigned int i;
 465         struct kvm_vcpu *vcpu;
 466
 467         kvm_for_each_vcpu(i, vcpu, kvm) {
 468                 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
 469         }
 470 }
 471
 472 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 473 {
 474         int r;
 475
 476         if (cap->flags)
 477                 return -EINVAL;
 478
 479         switch (cap->cap) {
 480         case KVM_CAP_S390_IRQCHIP:
 481                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
 482                 kvm->arch.use_irqchip = 1;
 483                 r = 0;
 484                 break;
 485         case KVM_CAP_S390_USER_SIGP:
 486                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
 487                 kvm->arch.user_sigp = 1;
 488                 r = 0;
 489                 break;
 490         case KVM_CAP_S390_VECTOR_REGISTERS:
 491                 mutex_lock(&kvm->lock);
 492                 if (kvm->created_vcpus) {
 493                         r = -EBUSY;
 494                 } else if (MACHINE_HAS_VX) {
 495                         set_kvm_facility(kvm->arch.model.fac_mask, 129);
 496                         set_kvm_facility(kvm->arch.model.fac_list, 129);
 497                         r = 0;
 498                 } else
 499                         r = -EINVAL;
 500                 mutex_unlock(&kvm->lock);
 501                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
 502                          r ? "(not available)" : "(success)");
 503                 break;
 504         case KVM_CAP_S390_RI:
 505                 r = -EINVAL;
 506                 mutex_lock(&kvm->lock);
 507                 if (kvm->created_vcpus) {
 508                         r = -EBUSY;
 509                 } else if (test_facility(64)) {
 510                         set_kvm_facility(kvm->arch.model.fac_mask, 64);
 511                         set_kvm_facility(kvm->arch.model.fac_list, 64);
 512                         r = 0;
 513                 }
 514                 mutex_unlock(&kvm->lock);
 515                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
 516                          r ? "(not available)" : "(success)");
 517                 break;
 518         case KVM_CAP_S390_USER_STSI:
 519                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
 520                 kvm->arch.user_stsi = 1;
 521                 r = 0;
 522                 break;
 523         case KVM_CAP_S390_USER_INSTR0:
 524                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
 525                 kvm->arch.user_instr0 = 1;
 526                 icpt_operexc_on_all_vcpus(kvm);
 527                 r = 0;
 528                 break;
 529         default:
 530                 r = -EINVAL;
 531                 break;
 532         }
 533         return r;
 534 }
 535
 536 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 537 {
 538         int ret;
 539
 540         switch (attr->attr) {
 541         case KVM_S390_VM_MEM_LIMIT_SIZE:
 542                 ret = 0;
 543                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
 544                          kvm->arch.mem_limit);
 545                 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
 546                         ret = -EFAULT;
 547                 break;
 548         default:
 549                 ret = -ENXIO;
 550                 break;
 551         }
 552         return ret;
 553 }
 554
 555 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 556 {
 557         int ret;
 558         unsigned int idx;
 559         switch (attr->attr) {
 560         case KVM_S390_VM_MEM_ENABLE_CMMA:
 561                 ret = -ENXIO;
 562                 if (!sclp.has_cmma)
 563                         break;
 564
 565                 ret = -EBUSY;
 566                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
 567                 mutex_lock(&kvm->lock);
 568                 if (!kvm->created_vcpus) {
 569                         kvm->arch.use_cmma = 1;
 570                         ret = 0;
 571                 }
 572                 mutex_unlock(&kvm->lock);
 573                 break;
 574         case KVM_S390_VM_MEM_CLR_CMMA:
 575                 ret = -ENXIO;
 576                 if (!sclp.has_cmma)
 577                         break;
 578                 ret = -EINVAL;
 579                 if (!kvm->arch.use_cmma)
 580                         break;
 581
 582                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
 583                 mutex_lock(&kvm->lock);
 584                 idx = srcu_read_lock(&kvm->srcu);
 585                 s390_reset_cmma(kvm->arch.gmap->mm);
 586                 srcu_read_unlock(&kvm->srcu, idx);
 587                 mutex_unlock(&kvm->lock);
 588                 ret = 0;
 589                 break;
 590         case KVM_S390_VM_MEM_LIMIT_SIZE: {
 591                 unsigned long new_limit;
 592
 593                 if (kvm_is_ucontrol(kvm))
 594                         return -EINVAL;
 595
 596                 if (get_user(new_limit, (u64 __user *)attr->addr))
 597                         return -EFAULT;
 598
 599                 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
 600                     new_limit > kvm->arch.mem_limit)
 601                         return -E2BIG;
 602
 603                 if (!new_limit)
 604                         return -EINVAL;
 605
 606                 /* gmap_create takes last usable address */
 607                 if (new_limit != KVM_S390_NO_MEM_LIMIT)
 608                         new_limit -= 1;
 609
 610                 ret = -EBUSY;
 611                 mutex_lock(&kvm->lock);
 612                 if (!kvm->created_vcpus) {
 613                         /* gmap_create will round the limit up */
 614                         struct gmap *new = gmap_create(current->mm, new_limit);
 615
 616                         if (!new) {
 617                                 ret = -ENOMEM;
 618                         } else {
 619                                 gmap_remove(kvm->arch.gmap);
 620                                 new->private = kvm;
 621                                 kvm->arch.gmap = new;
 622                                 ret = 0;
 623                         }
 624                 }
 625                 mutex_unlock(&kvm->lock);
 626                 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
 627                 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
 628                          (void *) kvm->arch.gmap->asce);
 629                 break;
 630         }
 631         default:
 632                 ret = -ENXIO;
 633                 break;
 634         }
 635         return ret;
 636 }
 637
 638 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
 639
 640 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
 641 {
 642         struct kvm_vcpu *vcpu;
 643         int i;
 644
 645         if (!test_kvm_facility(kvm, 76))
 646                 return -EINVAL;
 647
 648         mutex_lock(&kvm->lock);
 649         switch (attr->attr) {
 650         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
 651                 get_random_bytes(
 652                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
 653                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 654                 kvm->arch.crypto.aes_kw = 1;
 655                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
 656                 break;
 657         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
 658                 get_random_bytes(
 659                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
 660                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 661                 kvm->arch.crypto.dea_kw = 1;
 662                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
 663                 break;
 664         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
 665                 kvm->arch.crypto.aes_kw = 0;
 666                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
 667                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 668                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
 669                 break;
 670         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
 671                 kvm->arch.crypto.dea_kw = 0;
 672                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
 673                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 674                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
 675                 break;
 676         default:
 677                 mutex_unlock(&kvm->lock);
 678                 return -ENXIO;
 679         }
 680
 681         kvm_for_each_vcpu(i, vcpu, kvm) {
 682                 kvm_s390_vcpu_crypto_setup(vcpu);
 683                 exit_sie(vcpu);
 684         }
 685         mutex_unlock(&kvm->lock);
 686         return 0;
 687 }
 688
 689 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
 690 {
 691         u8 gtod_high;
 692
 693         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
 694                                            sizeof(gtod_high)))
 695                 return -EFAULT;
 696
 697         if (gtod_high != 0)
 698                 return -EINVAL;
 699         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
 700
 701         return 0;
 702 }
 703
 704 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
 705 {
 706         u64 gtod;
 707
 708         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
 709                 return -EFAULT;
 710
 711         kvm_s390_set_tod_clock(kvm, gtod);
 712         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
 713         return 0;
 714 }
 715
 716 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
 717 {
 718         int ret;
 719
 720         if (attr->flags)
 721                 return -EINVAL;
 722
 723         switch (attr->attr) {
 724         case KVM_S390_VM_TOD_HIGH:
 725                 ret = kvm_s390_set_tod_high(kvm, attr);
 726                 break;
 727         case KVM_S390_VM_TOD_LOW:
 728                 ret = kvm_s390_set_tod_low(kvm, attr);
 729                 break;
 730         default:
 731                 ret = -ENXIO;
 732                 break;
 733         }
 734         return ret;
 735 }
 736
 737 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
 738 {
 739         u8 gtod_high = 0;
 740
 741         if (copy_to_user((void __user *)attr->addr, &gtod_high,
 742                                          sizeof(gtod_high)))
 743                 return -EFAULT;
 744         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
 745
 746         return 0;
 747 }
 748
 749 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
 750 {
 751         u64 gtod;
 752
 753         gtod = kvm_s390_get_tod_clock_fast(kvm);
 754         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
 755                 return -EFAULT;
 756         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
 757
 758         return 0;
 759 }
 760
 761 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
 762 {
 763         int ret;
 764
 765         if (attr->flags)
 766                 return -EINVAL;
 767
 768         switch (attr->attr) {
 769         case KVM_S390_VM_TOD_HIGH:
 770                 ret = kvm_s390_get_tod_high(kvm, attr);
 771                 break;
 772         case KVM_S390_VM_TOD_LOW:
 773                 ret = kvm_s390_get_tod_low(kvm, attr);
 774                 break;
 775         default:
 776                 ret = -ENXIO;
 777                 break;
 778         }
 779         return ret;
 780 }
 781
 782 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
 783 {
 784         struct kvm_s390_vm_cpu_processor *proc;
 785         u16 lowest_ibc, unblocked_ibc;
 786         int ret = 0;
 787
 788         mutex_lock(&kvm->lock);
 789         if (kvm->created_vcpus) {
 790                 ret = -EBUSY;
 791                 goto out;
 792         }
 793         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
 794         if (!proc) {
 795                 ret = -ENOMEM;
 796                 goto out;
 797         }
 798         if (!copy_from_user(proc, (void __user *)attr->addr,
 799                             sizeof(*proc))) {
 800                 kvm->arch.model.cpuid = proc->cpuid;
 801                 lowest_ibc = sclp.ibc >> 16 & 0xfff;
 802                 unblocked_ibc = sclp.ibc & 0xfff;
 803                 if (lowest_ibc && proc->ibc) {
 804                         if (proc->ibc > unblocked_ibc)
 805                                 kvm->arch.model.ibc = unblocked_ibc;
 806                         else if (proc->ibc < lowest_ibc)
 807                                 kvm->arch.model.ibc = lowest_ibc;
 808                         else
 809                                 kvm->arch.model.ibc = proc->ibc;
 810                 }
 811                 memcpy(kvm->arch.model.fac_list, proc->fac_list,
 812                        S390_ARCH_FAC_LIST_SIZE_BYTE);
 813         } else
 814                 ret = -EFAULT;
 815         kfree(proc);
 816 out:
 817         mutex_unlock(&kvm->lock);
 818         return ret;
 819 }
 820
 821 static int kvm_s390_set_processor_feat(struct kvm *kvm,
 822                                        struct kvm_device_attr *attr)
 823 {
 824         struct kvm_s390_vm_cpu_feat data;
 825         int ret = -EBUSY;
 826
 827         if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
 828                 return -EFAULT;
 829         if (!bitmap_subset((unsigned long *) data.feat,
 830                            kvm_s390_available_cpu_feat,
 831                            KVM_S390_VM_CPU_FEAT_NR_BITS))
 832                 return -EINVAL;
 833
 834         mutex_lock(&kvm->lock);
 835         if (!atomic_read(&kvm->online_vcpus)) {
 836                 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
 837                             KVM_S390_VM_CPU_FEAT_NR_BITS);
 838                 ret = 0;
 839         }
 840         mutex_unlock(&kvm->lock);
 841         return ret;
 842 }
 843
 844 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
 845                                           struct kvm_device_attr *attr)
 846 {
 847         /*
 848          * Once supported by kernel + hw, we have to store the subfunctions
 849          * in kvm->arch and remember that user space configured them.
 850          */
 851         return -ENXIO;
 852 }
 853
 854 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
 855 {
 856         int ret = -ENXIO;
 857
 858         switch (attr->attr) {
 859         case KVM_S390_VM_CPU_PROCESSOR:
 860                 ret = kvm_s390_set_processor(kvm, attr);
 861                 break;
 862         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
 863                 ret = kvm_s390_set_processor_feat(kvm, attr);
 864                 break;
 865         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
 866                 ret = kvm_s390_set_processor_subfunc(kvm, attr);
 867                 break;
 868         }
 869         return ret;
 870 }
 871
 872 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
 873 {
 874         struct kvm_s390_vm_cpu_processor *proc;
 875         int ret = 0;
 876
 877         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
 878         if (!proc) {
 879                 ret = -ENOMEM;
 880                 goto out;
 881         }
 882         proc->cpuid = kvm->arch.model.cpuid;
 883         proc->ibc = kvm->arch.model.ibc;
 884         memcpy(&proc->fac_list, kvm->arch.model.fac_list,
 885                S390_ARCH_FAC_LIST_SIZE_BYTE);
 886         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
 887                 ret = -EFAULT;
 888         kfree(proc);
 889 out:
 890         return ret;
 891 }
 892
 893 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
 894 {
 895         struct kvm_s390_vm_cpu_machine *mach;
 896         int ret = 0;
 897
 898         mach = kzalloc(sizeof(*mach), GFP_KERNEL);
 899         if (!mach) {
 900                 ret = -ENOMEM;
 901                 goto out;
 902         }
 903         get_cpu_id((struct cpuid *) &mach->cpuid);
 904         mach->ibc = sclp.ibc;
 905         memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
 906                S390_ARCH_FAC_LIST_SIZE_BYTE);
 907         memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
 908                S390_ARCH_FAC_LIST_SIZE_BYTE);
 909         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
 910                 ret = -EFAULT;
 911         kfree(mach);
 912 out:
 913         return ret;
 914 }
 915
 916 static int kvm_s390_get_processor_feat(struct kvm *kvm,
 917                                        struct kvm_device_attr *attr)
 918 {
 919         struct kvm_s390_vm_cpu_feat data;
 920
 921         bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
 922                     KVM_S390_VM_CPU_FEAT_NR_BITS);
 923         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
 924                 return -EFAULT;
 925         return 0;
 926 }
 927
 928 static int kvm_s390_get_machine_feat(struct kvm *kvm,
 929                                      struct kvm_device_attr *attr)
 930 {
 931         struct kvm_s390_vm_cpu_feat data;
 932
 933         bitmap_copy((unsigned long *) data.feat,
 934                     kvm_s390_available_cpu_feat,
 935                     KVM_S390_VM_CPU_FEAT_NR_BITS);
 936         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
 937                 return -EFAULT;
 938         return 0;
 939 }
 940
 941 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
 942                                           struct kvm_device_attr *attr)
 943 {
 944         /*
 945          * Once we can actually configure subfunctions (kernel + hw support),
 946          * we have to check if they were already set by user space, if so copy
 947          * them from kvm->arch.
 948          */
 949         return -ENXIO;
 950 }
 951
 952 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
 953                                         struct kvm_device_attr *attr)
 954 {
 955         if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
 956             sizeof(struct kvm_s390_vm_cpu_subfunc)))
 957                 return -EFAULT;
 958         return 0;
 959 }
 960 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
 961 {
 962         int ret = -ENXIO;
 963
 964         switch (attr->attr) {
 965         case KVM_S390_VM_CPU_PROCESSOR:
 966                 ret = kvm_s390_get_processor(kvm, attr);
 967                 break;
 968         case KVM_S390_VM_CPU_MACHINE:
 969                 ret = kvm_s390_get_machine(kvm, attr);
 970                 break;
 971         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
 972                 ret = kvm_s390_get_processor_feat(kvm, attr);
 973                 break;
 974         case KVM_S390_VM_CPU_MACHINE_FEAT:
 975                 ret = kvm_s390_get_machine_feat(kvm, attr);
 976                 break;
 977         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
 978                 ret = kvm_s390_get_processor_subfunc(kvm, attr);
 979                 break;
 980         case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
 981                 ret = kvm_s390_get_machine_subfunc(kvm, attr);
 982                 break;
 983         }
 984         return ret;
 985 }
 986
 987 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
 988 {
 989         int ret;
 990
 991         switch (attr->group) {
 992         case KVM_S390_VM_MEM_CTRL:
 993                 ret = kvm_s390_set_mem_control(kvm, attr);
 994                 break;
 995         case KVM_S390_VM_TOD:
 996                 ret = kvm_s390_set_tod(kvm, attr);
 997                 break;
 998         case KVM_S390_VM_CPU_MODEL:
 999                 ret = kvm_s390_set_cpu_model(kvm, attr);
1000                 break;
1001         case KVM_S390_VM_CRYPTO:
1002                 ret = kvm_s390_vm_set_crypto(kvm, attr);
1003                 break;
1004         default:
1005                 ret = -ENXIO;
1006                 break;
1007         }
1008
1009         return ret;
1010 }
1011
1012 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1013 {
1014         int ret;
1015
1016         switch (attr->group) {
1017         case KVM_S390_VM_MEM_CTRL:
1018                 ret = kvm_s390_get_mem_control(kvm, attr);
1019                 break;
1020         case KVM_S390_VM_TOD:
1021                 ret = kvm_s390_get_tod(kvm, attr);
1022                 break;
1023         case KVM_S390_VM_CPU_MODEL:
1024                 ret = kvm_s390_get_cpu_model(kvm, attr);
1025                 break;
1026         default:
1027                 ret = -ENXIO;
1028                 break;
1029         }
1030
1031         return ret;
1032 }
1033
1034 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1035 {
1036         int ret;
1037
1038         switch (attr->group) {
1039         case KVM_S390_VM_MEM_CTRL:
1040                 switch (attr->attr) {
1041                 case KVM_S390_VM_MEM_ENABLE_CMMA:
1042                 case KVM_S390_VM_MEM_CLR_CMMA:
1043                         ret = sclp.has_cmma ? 0 : -ENXIO;
1044                         break;
1045                 case KVM_S390_VM_MEM_LIMIT_SIZE:
1046                         ret = 0;
1047                         break;
1048                 default:
1049                         ret = -ENXIO;
1050                         break;
1051                 }
1052                 break;
1053         case KVM_S390_VM_TOD:
1054                 switch (attr->attr) {
1055                 case KVM_S390_VM_TOD_LOW:
1056                 case KVM_S390_VM_TOD_HIGH:
1057                         ret = 0;
1058                         break;
1059                 default:
1060                         ret = -ENXIO;
1061                         break;
1062                 }
1063                 break;
1064         case KVM_S390_VM_CPU_MODEL:
1065                 switch (attr->attr) {
1066                 case KVM_S390_VM_CPU_PROCESSOR:
1067                 case KVM_S390_VM_CPU_MACHINE:
1068                 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1069                 case KVM_S390_VM_CPU_MACHINE_FEAT:
1070                 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1071                         ret = 0;
1072                         break;
1073                 /* configuring subfunctions is not supported yet */
1074                 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1075                 default:
1076                         ret = -ENXIO;
1077                         break;
1078                 }
1079                 break;
1080         case KVM_S390_VM_CRYPTO:
1081                 switch (attr->attr) {
1082                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1083                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1084                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1085                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1086                         ret = 0;
1087                         break;
1088                 default:
1089                         ret = -ENXIO;
1090                         break;
1091                 }
1092                 break;
1093         default:
1094                 ret = -ENXIO;
1095                 break;
1096         }
1097
1098         return ret;
1099 }
1100
1101 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1102 {
1103         uint8_t *keys;
1104         uint64_t hva;
1105         int i, r = 0;
1106
1107         if (args->flags != 0)
1108                 return -EINVAL;
1109
1110         /* Is this guest using storage keys? */
1111         if (!mm_use_skey(current->mm))
1112                 return KVM_S390_GET_SKEYS_NONE;
1113
1114         /* Enforce sane limit on memory allocation */
1115         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1116                 return -EINVAL;
1117
1118         keys = kmalloc_array(args->count, sizeof(uint8_t),
1119                              GFP_KERNEL | __GFP_NOWARN);
1120         if (!keys)
1121                 keys = vmalloc(sizeof(uint8_t) * args->count);
1122         if (!keys)
1123                 return -ENOMEM;
1124
1125         down_read(&current->mm->mmap_sem);
1126         for (i = 0; i < args->count; i++) {
1127                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1128                 if (kvm_is_error_hva(hva)) {
1129                         r = -EFAULT;
1130                         break;
1131                 }
1132
1133                 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1134                 if (r)
1135                         break;
1136         }
1137         up_read(&current->mm->mmap_sem);
1138
1139         if (!r) {
1140                 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1141                                  sizeof(uint8_t) * args->count);
1142                 if (r)
1143                         r = -EFAULT;
1144         }
1145
1146         kvfree(keys);
1147         return r;
1148 }
1149
1150 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1151 {
1152         uint8_t *keys;
1153         uint64_t hva;
1154         int i, r = 0;
1155
1156         if (args->flags != 0)
1157                 return -EINVAL;
1158
1159         /* Enforce sane limit on memory allocation */
1160         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1161                 return -EINVAL;
1162
1163         keys = kmalloc_array(args->count, sizeof(uint8_t),
1164                              GFP_KERNEL | __GFP_NOWARN);
1165         if (!keys)
1166                 keys = vmalloc(sizeof(uint8_t) * args->count);
1167         if (!keys)
1168                 return -ENOMEM;
1169
1170         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1171                            sizeof(uint8_t) * args->count);
1172         if (r) {
1173                 r = -EFAULT;
1174                 goto out;
1175         }
1176
1177         /* Enable storage key handling for the guest */
1178         r = s390_enable_skey();
1179         if (r)
1180                 goto out;
1181
1182         down_read(&current->mm->mmap_sem);
1183         for (i = 0; i < args->count; i++) {
1184                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1185                 if (kvm_is_error_hva(hva)) {
1186                         r = -EFAULT;
1187                         break;
1188                 }
1189
1190                 /* Lowest order bit is reserved */
1191                 if (keys[i] & 0x01) {
1192                         r = -EINVAL;
1193                         break;
1194                 }
1195
1196                 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1197                 if (r)
1198                         break;
1199         }
1200         up_read(&current->mm->mmap_sem);
1201 out:
1202         kvfree(keys);
1203         return r;
1204 }
1205
1206 long kvm_arch_vm_ioctl(struct file *filp,
1207                        unsigned int ioctl, unsigned long arg)
1208 {
1209         struct kvm *kvm = filp->private_data;
1210         void __user *argp = (void __user *)arg;
1211         struct kvm_device_attr attr;
1212         int r;
1213
1214         switch (ioctl) {
1215         case KVM_S390_INTERRUPT: {
1216                 struct kvm_s390_interrupt s390int;
1217
1218                 r = -EFAULT;
1219                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1220                         break;
1221                 r = kvm_s390_inject_vm(kvm, &s390int);
1222                 break;
1223         }
1224         case KVM_ENABLE_CAP: {
1225                 struct kvm_enable_cap cap;
1226                 r = -EFAULT;
1227                 if (copy_from_user(&cap, argp, sizeof(cap)))
1228                         break;
1229                 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1230                 break;
1231         }
1232         case KVM_CREATE_IRQCHIP: {
1233                 struct kvm_irq_routing_entry routing;
1234
1235                 r = -EINVAL;
1236                 if (kvm->arch.use_irqchip) {
1237                         /* Set up dummy routing. */
1238                         memset(&routing, 0, sizeof(routing));
1239                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1240                 }
1241                 break;
1242         }
1243         case KVM_SET_DEVICE_ATTR: {
1244                 r = -EFAULT;
1245                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1246                         break;
1247                 r = kvm_s390_vm_set_attr(kvm, &attr);
1248                 break;
1249         }
1250         case KVM_GET_DEVICE_ATTR: {
1251                 r = -EFAULT;
1252                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1253                         break;
1254                 r = kvm_s390_vm_get_attr(kvm, &attr);
1255                 break;
1256         }
1257         case KVM_HAS_DEVICE_ATTR: {
1258                 r = -EFAULT;
1259                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1260                         break;
1261                 r = kvm_s390_vm_has_attr(kvm, &attr);
1262                 break;
1263         }
1264         case KVM_S390_GET_SKEYS: {
1265                 struct kvm_s390_skeys args;
1266
1267                 r = -EFAULT;
1268                 if (copy_from_user(&args, argp,
1269                                    sizeof(struct kvm_s390_skeys)))
1270                         break;
1271                 r = kvm_s390_get_skeys(kvm, &args);
1272                 break;
1273         }
1274         case KVM_S390_SET_SKEYS: {
1275                 struct kvm_s390_skeys args;
1276
1277                 r = -EFAULT;
1278                 if (copy_from_user(&args, argp,
1279                                    sizeof(struct kvm_s390_skeys)))
1280                         break;
1281                 r = kvm_s390_set_skeys(kvm, &args);
1282                 break;
1283         }
1284         default:
1285                 r = -ENOTTY;
1286         }
1287
1288         return r;
1289 }
1290
1291 static int kvm_s390_query_ap_config(u8 *config)
1292 {
1293         u32 fcn_code = 0x04000000UL;
1294         u32 cc = 0;
1295
1296         memset(config, 0, 128);
1297         asm volatile(
1298                 "lgr 0,%1\n"
1299                 "lgr 2,%2\n"
1300                 ".long 0xb2af0000\n"            /* PQAP(QCI) */
1301                 "0: ipm %0\n"
1302                 "srl %0,28\n"
1303                 "1:\n"
1304                 EX_TABLE(0b, 1b)
1305                 : "+r" (cc)
1306                 : "r" (fcn_code), "r" (config)
1307                 : "cc", "0", "2", "memory"
1308         );
1309
1310         return cc;
1311 }
1312
1313 static int kvm_s390_apxa_installed(void)
1314 {
1315         u8 config[128];
1316         int cc;
1317
1318         if (test_facility(12)) {
1319                 cc = kvm_s390_query_ap_config(config);
1320
1321                 if (cc)
1322                         pr_err("PQAP(QCI) failed with cc=%d", cc);
1323                 else
1324                         return config[0] & 0x40;
1325         }
1326
1327         return 0;
1328 }
1329
1330 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1331 {
1332         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1333
1334         if (kvm_s390_apxa_installed())
1335                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1336         else
1337                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1338 }
1339
1340 static u64 kvm_s390_get_initial_cpuid(void)
1341 {
1342         struct cpuid cpuid;
1343
1344         get_cpu_id(&cpuid);
1345         cpuid.version = 0xff;
1346         return *((u64 *) &cpuid);
1347 }
1348
1349 static void kvm_s390_crypto_init(struct kvm *kvm)
1350 {
1351         if (!test_kvm_facility(kvm, 76))
1352                 return;
1353
1354         kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1355         kvm_s390_set_crycb_format(kvm);
1356
1357         /* Enable AES/DEA protected key functions by default */
1358         kvm->arch.crypto.aes_kw = 1;
1359         kvm->arch.crypto.dea_kw = 1;
1360         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1361                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1362         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1363                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1364 }
1365
1366 static void sca_dispose(struct kvm *kvm)
1367 {
1368         if (kvm->arch.use_esca)
1369                 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1370         else
1371                 free_page((unsigned long)(kvm->arch.sca));
1372         kvm->arch.sca = NULL;
1373 }
1374
1375 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1376 {
1377         gfp_t alloc_flags = GFP_KERNEL;
1378         int i, rc;
1379         char debug_name[16];
1380         static unsigned long sca_offset;
1381
1382         rc = -EINVAL;
1383 #ifdef CONFIG_KVM_S390_UCONTROL
1384         if (type & ~KVM_VM_S390_UCONTROL)
1385                 goto out_err;
1386         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1387                 goto out_err;
1388 #else
1389         if (type)
1390                 goto out_err;
1391 #endif
1392
1393         rc = s390_enable_sie();
1394         if (rc)
1395                 goto out_err;
1396
1397         rc = -ENOMEM;
1398
1399         ratelimit_state_init(&kvm->arch.sthyi_limit, 5 * HZ, 500);
1400
1401         kvm->arch.use_esca = 0; /* start with basic SCA */
1402         if (!sclp.has_64bscao)
1403                 alloc_flags |= GFP_DMA;
1404         rwlock_init(&kvm->arch.sca_lock);
1405         kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1406         if (!kvm->arch.sca)
1407                 goto out_err;
1408         spin_lock(&kvm_lock);
1409         sca_offset += 16;
1410         if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1411                 sca_offset = 0;
1412         kvm->arch.sca = (struct bsca_block *)
1413                         ((char *) kvm->arch.sca + sca_offset);
1414         spin_unlock(&kvm_lock);
1415
1416         sprintf(debug_name, "kvm-%u", current->pid);
1417
1418         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1419         if (!kvm->arch.dbf)
1420                 goto out_err;
1421
1422         kvm->arch.sie_page2 =
1423              (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1424         if (!kvm->arch.sie_page2)
1425                 goto out_err;
1426
1427         /* Populate the facility mask initially. */
1428         memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1429                S390_ARCH_FAC_LIST_SIZE_BYTE);
1430         for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1431                 if (i < kvm_s390_fac_list_mask_size())
1432                         kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1433                 else
1434                         kvm->arch.model.fac_mask[i] = 0UL;
1435         }
1436
1437         /* Populate the facility list initially. */
1438         kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1439         memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1440                S390_ARCH_FAC_LIST_SIZE_BYTE);
1441
1442         set_kvm_facility(kvm->arch.model.fac_mask, 74);
1443         set_kvm_facility(kvm->arch.model.fac_list, 74);
1444
1445         kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
1446         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1447
1448         kvm_s390_crypto_init(kvm);
1449
1450         spin_lock_init(&kvm->arch.float_int.lock);
1451         for (i = 0; i < FIRQ_LIST_COUNT; i++)
1452                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1453         init_waitqueue_head(&kvm->arch.ipte_wq);
1454         mutex_init(&kvm->arch.ipte_mutex);
1455
1456         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1457         VM_EVENT(kvm, 3, "vm created with type %lu", type);
1458
1459         if (type & KVM_VM_S390_UCONTROL) {
1460                 kvm->arch.gmap = NULL;
1461                 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1462         } else {
1463                 if (sclp.hamax == U64_MAX)
1464                         kvm->arch.mem_limit = TASK_MAX_SIZE;
1465                 else
1466                         kvm->arch.mem_limit = min_t(unsigned long, TASK_MAX_SIZE,
1467                                                     sclp.hamax + 1);
1468                 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
1469                 if (!kvm->arch.gmap)
1470                         goto out_err;
1471                 kvm->arch.gmap->private = kvm;
1472                 kvm->arch.gmap->pfault_enabled = 0;
1473         }
1474
1475         kvm->arch.css_support = 0;
1476         kvm->arch.use_irqchip = 0;
1477         kvm->arch.epoch = 0;
1478
1479         spin_lock_init(&kvm->arch.start_stop_lock);
1480         kvm_s390_vsie_init(kvm);
1481         KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1482
1483         return 0;
1484 out_err:
1485         free_page((unsigned long)kvm->arch.sie_page2);
1486         debug_unregister(kvm->arch.dbf);
1487         sca_dispose(kvm);
1488         KVM_EVENT(3, "creation of vm failed: %d", rc);
1489         return rc;
1490 }
1491
1492 bool kvm_arch_has_vcpu_debugfs(void)
1493 {
1494         return false;
1495 }
1496
1497 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
1498 {
1499         return 0;
1500 }
1501
1502 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1503 {
1504         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1505         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1506         kvm_s390_clear_local_irqs(vcpu);
1507         kvm_clear_async_pf_completion_queue(vcpu);
1508         if (!kvm_is_ucontrol(vcpu->kvm))
1509                 sca_del_vcpu(vcpu);
1510
1511         if (kvm_is_ucontrol(vcpu->kvm))
1512                 gmap_remove(vcpu->arch.gmap);
1513
1514         if (vcpu->kvm->arch.use_cmma)
1515                 kvm_s390_vcpu_unsetup_cmma(vcpu);
1516         free_page((unsigned long)(vcpu->arch.sie_block));
1517
1518         kvm_vcpu_uninit(vcpu);
1519         kmem_cache_free(kvm_vcpu_cache, vcpu);
1520 }
1521
1522 static void kvm_free_vcpus(struct kvm *kvm)
1523 {
1524         unsigned int i;
1525         struct kvm_vcpu *vcpu;
1526
1527         kvm_for_each_vcpu(i, vcpu, kvm)
1528                 kvm_arch_vcpu_destroy(vcpu);
1529
1530         mutex_lock(&kvm->lock);
1531         for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1532                 kvm->vcpus[i] = NULL;
1533
1534         atomic_set(&kvm->online_vcpus, 0);
1535         mutex_unlock(&kvm->lock);
1536 }
1537
1538 void kvm_arch_destroy_vm(struct kvm *kvm)
1539 {
1540         kvm_free_vcpus(kvm);
1541         sca_dispose(kvm);
1542         debug_unregister(kvm->arch.dbf);
1543         free_page((unsigned long)kvm->arch.sie_page2);
1544         if (!kvm_is_ucontrol(kvm))
1545                 gmap_remove(kvm->arch.gmap);
1546         kvm_s390_destroy_adapters(kvm);
1547         kvm_s390_clear_float_irqs(kvm);
1548         kvm_s390_vsie_destroy(kvm);
1549         KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
1550 }
1551
1552 /* Section: vcpu related */
1553 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1554 {
1555         vcpu->arch.gmap = gmap_create(current->mm, -1UL);
1556         if (!vcpu->arch.gmap)
1557                 return -ENOMEM;
1558         vcpu->arch.gmap->private = vcpu->kvm;
1559
1560         return 0;
1561 }
1562
1563 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
1564 {
1565         if (!kvm_s390_use_sca_entries())
1566                 return;
1567         read_lock(&vcpu->kvm->arch.sca_lock);
1568         if (vcpu->kvm->arch.use_esca) {
1569                 struct esca_block *sca = vcpu->kvm->arch.sca;
1570
1571                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1572                 sca->cpu[vcpu->vcpu_id].sda = 0;
1573         } else {
1574                 struct bsca_block *sca = vcpu->kvm->arch.sca;
1575
1576                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1577                 sca->cpu[vcpu->vcpu_id].sda = 0;
1578         }
1579         read_unlock(&vcpu->kvm->arch.sca_lock);
1580 }
1581
1582 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
1583 {
1584         if (!kvm_s390_use_sca_entries()) {
1585                 struct bsca_block *sca = vcpu->kvm->arch.sca;
1586
1587                 /* we still need the basic sca for the ipte control */
1588                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1589                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
1590         }
1591         read_lock(&vcpu->kvm->arch.sca_lock);
1592         if (vcpu->kvm->arch.use_esca) {
1593                 struct esca_block *sca = vcpu->kvm->arch.sca;
1594
1595                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1596                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1597                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
1598                 vcpu->arch.sie_block->ecb2 |= 0x04U;
1599                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1600         } else {
1601                 struct bsca_block *sca = vcpu->kvm->arch.sca;
1602
1603                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1604                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1605                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
1606                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1607         }
1608         read_unlock(&vcpu->kvm->arch.sca_lock);
1609 }
1610
1611 /* Basic SCA to Extended SCA data copy routines */
1612 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
1613 {
1614         d->sda = s->sda;
1615         d->sigp_ctrl.c = s->sigp_ctrl.c;
1616         d->sigp_ctrl.scn = s->sigp_ctrl.scn;
1617 }
1618
1619 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
1620 {
1621         int i;
1622
1623         d->ipte_control = s->ipte_control;
1624         d->mcn[0] = s->mcn;
1625         for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
1626                 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
1627 }
1628
1629 static int sca_switch_to_extended(struct kvm *kvm)
1630 {
1631         struct bsca_block *old_sca = kvm->arch.sca;
1632         struct esca_block *new_sca;
1633         struct kvm_vcpu *vcpu;
1634         unsigned int vcpu_idx;
1635         u32 scaol, scaoh;
1636
1637         new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
1638         if (!new_sca)
1639                 return -ENOMEM;
1640
1641         scaoh = (u32)((u64)(new_sca) >> 32);
1642         scaol = (u32)(u64)(new_sca) & ~0x3fU;
1643
1644         kvm_s390_vcpu_block_all(kvm);
1645         write_lock(&kvm->arch.sca_lock);
1646
1647         sca_copy_b_to_e(new_sca, old_sca);
1648
1649         kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
1650                 vcpu->arch.sie_block->scaoh = scaoh;
1651                 vcpu->arch.sie_block->scaol = scaol;
1652                 vcpu->arch.sie_block->ecb2 |= 0x04U;
1653         }
1654         kvm->arch.sca = new_sca;
1655         kvm->arch.use_esca = 1;
1656
1657         write_unlock(&kvm->arch.sca_lock);
1658         kvm_s390_vcpu_unblock_all(kvm);
1659
1660         free_page((unsigned long)old_sca);
1661
1662         VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
1663                  old_sca, kvm->arch.sca);
1664         return 0;
1665 }
1666
1667 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
1668 {
1669         int rc;
1670
1671         if (!kvm_s390_use_sca_entries()) {
1672                 if (id < KVM_MAX_VCPUS)
1673                         return true;
1674                 return false;
1675         }
1676         if (id < KVM_S390_BSCA_CPU_SLOTS)
1677                 return true;
1678         if (!sclp.has_esca || !sclp.has_64bscao)
1679                 return false;
1680
1681         mutex_lock(&kvm->lock);
1682         rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
1683         mutex_unlock(&kvm->lock);
1684
1685         return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
1686 }
1687
1688 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1689 {
1690         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1691         kvm_clear_async_pf_completion_queue(vcpu);
1692         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
1693                                     KVM_SYNC_GPRS |
1694                                     KVM_SYNC_ACRS |
1695                                     KVM_SYNC_CRS |
1696                                     KVM_SYNC_ARCH0 |
1697                                     KVM_SYNC_PFAULT;
1698         kvm_s390_set_prefix(vcpu, 0);
1699         if (test_kvm_facility(vcpu->kvm, 64))
1700                 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
1701         /* fprs can be synchronized via vrs, even if the guest has no vx. With
1702          * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
1703          */
1704         if (MACHINE_HAS_VX)
1705                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
1706         else
1707                 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
1708
1709         if (kvm_is_ucontrol(vcpu->kvm))
1710                 return __kvm_ucontrol_vcpu_init(vcpu);
1711
1712         return 0;
1713 }
1714
1715 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1716 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1717 {
1718         WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
1719         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1720         vcpu->arch.cputm_start = get_tod_clock_fast();
1721         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1722 }
1723
1724 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1725 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1726 {
1727         WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
1728         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1729         vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1730         vcpu->arch.cputm_start = 0;
1731         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1732 }
1733
1734 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1735 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1736 {
1737         WARN_ON_ONCE(vcpu->arch.cputm_enabled);
1738         vcpu->arch.cputm_enabled = true;
1739         __start_cpu_timer_accounting(vcpu);
1740 }
1741
1742 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1743 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1744 {
1745         WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
1746         __stop_cpu_timer_accounting(vcpu);
1747         vcpu->arch.cputm_enabled = false;
1748 }
1749
1750 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1751 {
1752         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1753         __enable_cpu_timer_accounting(vcpu);
1754         preempt_enable();
1755 }
1756
1757 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1758 {
1759         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1760         __disable_cpu_timer_accounting(vcpu);
1761         preempt_enable();
1762 }
1763
1764 /* set the cpu timer - may only be called from the VCPU thread itself */
1765 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
1766 {
1767         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1768         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1769         if (vcpu->arch.cputm_enabled)
1770                 vcpu->arch.cputm_start = get_tod_clock_fast();
1771         vcpu->arch.sie_block->cputm = cputm;
1772         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1773         preempt_enable();
1774 }
1775
1776 /* update and get the cpu timer - can also be called from other VCPU threads */
1777 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
1778 {
1779         unsigned int seq;
1780         __u64 value;
1781
1782         if (unlikely(!vcpu->arch.cputm_enabled))
1783                 return vcpu->arch.sie_block->cputm;
1784
1785         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1786         do {
1787                 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
1788                 /*
1789                  * If the writer would ever execute a read in the critical
1790                  * section, e.g. in irq context, we have a deadlock.
1791                  */
1792                 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
1793                 value = vcpu->arch.sie_block->cputm;
1794                 /* if cputm_start is 0, accounting is being started/stopped */
1795                 if (likely(vcpu->arch.cputm_start))
1796                         value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1797         } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
1798         preempt_enable();
1799         return value;
1800 }
1801
1802 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1803 {
1804         /* Save host register state */
1805         save_fpu_regs();
1806         vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
1807         vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
1808
1809         if (MACHINE_HAS_VX)
1810                 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
1811         else
1812                 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
1813         current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
1814         if (test_fp_ctl(current->thread.fpu.fpc))
1815                 /* User space provided an invalid FPC, let's clear it */
1816                 current->thread.fpu.fpc = 0;
1817
1818         save_access_regs(vcpu->arch.host_acrs);
1819         restore_access_regs(vcpu->run->s.regs.acrs);
1820         gmap_enable(vcpu->arch.enabled_gmap);
1821         atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1822         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1823                 __start_cpu_timer_accounting(vcpu);
1824         vcpu->cpu = cpu;
1825 }
1826
1827 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1828 {
1829         vcpu->cpu = -1;
1830         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1831                 __stop_cpu_timer_accounting(vcpu);
1832         atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1833         vcpu->arch.enabled_gmap = gmap_get_enabled();
1834         gmap_disable(vcpu->arch.enabled_gmap);
1835
1836         /* Save guest register state */
1837         save_fpu_regs();
1838         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
1839
1840         /* Restore host register state */
1841         current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
1842         current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
1843
1844         save_access_regs(vcpu->run->s.regs.acrs);
1845         restore_access_regs(vcpu->arch.host_acrs);
1846 }
1847
1848 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
1849 {
1850         /* this equals initial cpu reset in pop, but we don't switch to ESA */
1851         vcpu->arch.sie_block->gpsw.mask = 0UL;
1852         vcpu->arch.sie_block->gpsw.addr = 0UL;
1853         kvm_s390_set_prefix(vcpu, 0);
1854         kvm_s390_set_cpu_timer(vcpu, 0);
1855         vcpu->arch.sie_block->ckc       = 0UL;
1856         vcpu->arch.sie_block->todpr     = 0;
1857         memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
1858         vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
1859         vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
1860         /* make sure the new fpc will be lazily loaded */
1861         save_fpu_regs();
1862         current->thread.fpu.fpc = 0;
1863         vcpu->arch.sie_block->gbea = 1;
1864         vcpu->arch.sie_block->pp = 0;
1865         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1866         kvm_clear_async_pf_completion_queue(vcpu);
1867         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
1868                 kvm_s390_vcpu_stop(vcpu);
1869         kvm_s390_clear_local_irqs(vcpu);
1870 }
1871
1872 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
1873 {
1874         mutex_lock(&vcpu->kvm->lock);
1875         preempt_disable();
1876         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
1877         preempt_enable();
1878         mutex_unlock(&vcpu->kvm->lock);
1879         if (!kvm_is_ucontrol(vcpu->kvm)) {
1880                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
1881                 sca_add_vcpu(vcpu);
1882         }
1883         if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
1884                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
1885         /* make vcpu_load load the right gmap on the first trigger */
1886         vcpu->arch.enabled_gmap = vcpu->arch.gmap;
1887 }
1888
1889 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
1890 {
1891         if (!test_kvm_facility(vcpu->kvm, 76))
1892                 return;
1893
1894         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
1895
1896         if (vcpu->kvm->arch.crypto.aes_kw)
1897                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
1898         if (vcpu->kvm->arch.crypto.dea_kw)
1899                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
1900
1901         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
1902 }
1903
1904 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
1905 {
1906         free_page(vcpu->arch.sie_block->cbrlo);
1907         vcpu->arch.sie_block->cbrlo = 0;
1908 }
1909
1910 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
1911 {
1912         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
1913         if (!vcpu->arch.sie_block->cbrlo)
1914                 return -ENOMEM;
1915
1916         vcpu->arch.sie_block->ecb2 |= 0x80;
1917         vcpu->arch.sie_block->ecb2 &= ~0x08;
1918         return 0;
1919 }
1920
1921 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
1922 {
1923         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
1924
1925         vcpu->arch.sie_block->ibc = model->ibc;
1926         if (test_kvm_facility(vcpu->kvm, 7))
1927                 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
1928 }
1929
1930 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1931 {
1932         int rc = 0;
1933
1934         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
1935                                                     CPUSTAT_SM |
1936                                                     CPUSTAT_STOPPED);
1937
1938         if (test_kvm_facility(vcpu->kvm, 78))
1939                 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
1940         else if (test_kvm_facility(vcpu->kvm, 8))
1941                 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
1942
1943         kvm_s390_vcpu_setup_model(vcpu);
1944
1945         /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
1946         if (MACHINE_HAS_ESOP)
1947                 vcpu->arch.sie_block->ecb |= 0x02;
1948         if (test_kvm_facility(vcpu->kvm, 9))
1949                 vcpu->arch.sie_block->ecb |= 0x04;
1950         if (test_kvm_facility(vcpu->kvm, 73))
1951                 vcpu->arch.sie_block->ecb |= 0x10;
1952
1953         if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
1954                 vcpu->arch.sie_block->ecb2 |= 0x08;
1955         vcpu->arch.sie_block->eca = 0x1002000U;
1956         if (sclp.has_cei)
1957                 vcpu->arch.sie_block->eca |= 0x80000000U;
1958         if (sclp.has_ib)
1959                 vcpu->arch.sie_block->eca |= 0x40000000U;
1960         if (sclp.has_siif)
1961                 vcpu->arch.sie_block->eca |= 1;
1962         if (sclp.has_sigpif)
1963                 vcpu->arch.sie_block->eca |= 0x10000000U;
1964         if (test_kvm_facility(vcpu->kvm, 129)) {
1965                 vcpu->arch.sie_block->eca |= 0x00020000;
1966                 vcpu->arch.sie_block->ecd |= 0x20000000;
1967         }
1968         vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
1969         vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
1970
1971         if (vcpu->kvm->arch.use_cmma) {
1972                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
1973                 if (rc)
1974                         return rc;
1975         }
1976         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1977         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
1978
1979         kvm_s390_vcpu_crypto_setup(vcpu);
1980
1981         return rc;
1982 }
1983
1984 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
1985                                       unsigned int id)
1986 {
1987         struct kvm_vcpu *vcpu;
1988         struct sie_page *sie_page;
1989         int rc = -EINVAL;
1990
1991         if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
1992                 goto out;
1993
1994         rc = -ENOMEM;
1995
1996         vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
1997         if (!vcpu)
1998                 goto out;
1999
2000         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2001         if (!sie_page)
2002                 goto out_free_cpu;
2003
2004         vcpu->arch.sie_block = &sie_page->sie_block;
2005         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2006
2007         /* the real guest size will always be smaller than msl */
2008         vcpu->arch.sie_block->mso = 0;
2009         vcpu->arch.sie_block->msl = sclp.hamax;
2010
2011         vcpu->arch.sie_block->icpua = id;
2012         spin_lock_init(&vcpu->arch.local_int.lock);
2013         vcpu->arch.local_int.float_int = &kvm->arch.float_int;
2014         vcpu->arch.local_int.wq = &vcpu->wq;
2015         vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
2016         seqcount_init(&vcpu->arch.cputm_seqcount);
2017
2018         rc = kvm_vcpu_init(vcpu, kvm, id);
2019         if (rc)
2020                 goto out_free_sie_block;
2021         VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2022                  vcpu->arch.sie_block);
2023         trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2024
2025         return vcpu;
2026 out_free_sie_block:
2027         free_page((unsigned long)(vcpu->arch.sie_block));
2028 out_free_cpu:
2029         kmem_cache_free(kvm_vcpu_cache, vcpu);
2030 out:
2031         return ERR_PTR(rc);
2032 }
2033
2034 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2035 {
2036         return kvm_s390_vcpu_has_irq(vcpu, 0);
2037 }
2038
2039 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2040 {
2041         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2042         exit_sie(vcpu);
2043 }
2044
2045 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2046 {
2047         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2048 }
2049
2050 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2051 {
2052         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2053         exit_sie(vcpu);
2054 }
2055
2056 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2057 {
2058         atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2059 }
2060
2061 /*
2062  * Kick a guest cpu out of SIE and wait until SIE is not running.
2063  * If the CPU is not running (e.g. waiting as idle) the function will
2064  * return immediately. */
2065 void exit_sie(struct kvm_vcpu *vcpu)
2066 {
2067         atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
2068         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2069                 cpu_relax();
2070 }
2071
2072 /* Kick a guest cpu out of SIE to process a request synchronously */
2073 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2074 {
2075         kvm_make_request(req, vcpu);
2076         kvm_s390_vcpu_request(vcpu);
2077 }
2078
2079 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2080                               unsigned long end)
2081 {
2082         struct kvm *kvm = gmap->private;
2083         struct kvm_vcpu *vcpu;
2084         unsigned long prefix;
2085         int i;
2086
2087         if (gmap_is_shadow(gmap))
2088                 return;
2089         if (start >= 1UL << 31)
2090                 /* We are only interested in prefix pages */
2091                 return;
2092         kvm_for_each_vcpu(i, vcpu, kvm) {
2093                 /* match against both prefix pages */
2094                 prefix = kvm_s390_get_prefix(vcpu);
2095                 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2096                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2097                                    start, end);
2098                         kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2099                 }
2100         }
2101 }
2102
2103 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2104 {
2105         /* kvm common code refers to this, but never calls it */
2106         BUG();
2107         return 0;
2108 }
2109
2110 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2111                                            struct kvm_one_reg *reg)
2112 {
2113         int r = -EINVAL;
2114
2115         switch (reg->id) {
2116         case KVM_REG_S390_TODPR:
2117                 r = put_user(vcpu->arch.sie_block->todpr,
2118                              (u32 __user *)reg->addr);
2119                 break;
2120         case KVM_REG_S390_EPOCHDIFF:
2121                 r = put_user(vcpu->arch.sie_block->epoch,
2122                              (u64 __user *)reg->addr);
2123                 break;
2124         case KVM_REG_S390_CPU_TIMER:
2125                 r = put_user(kvm_s390_get_cpu_timer(vcpu),
2126                              (u64 __user *)reg->addr);
2127                 break;
2128         case KVM_REG_S390_CLOCK_COMP:
2129                 r = put_user(vcpu->arch.sie_block->ckc,
2130                              (u64 __user *)reg->addr);
2131                 break;
2132         case KVM_REG_S390_PFTOKEN:
2133                 r = put_user(vcpu->arch.pfault_token,
2134                              (u64 __user *)reg->addr);
2135                 break;
2136         case KVM_REG_S390_PFCOMPARE:
2137                 r = put_user(vcpu->arch.pfault_compare,
2138                              (u64 __user *)reg->addr);
2139                 break;
2140         case KVM_REG_S390_PFSELECT:
2141                 r = put_user(vcpu->arch.pfault_select,
2142                              (u64 __user *)reg->addr);
2143                 break;
2144         case KVM_REG_S390_PP:
2145                 r = put_user(vcpu->arch.sie_block->pp,
2146                              (u64 __user *)reg->addr);
2147                 break;
2148         case KVM_REG_S390_GBEA:
2149                 r = put_user(vcpu->arch.sie_block->gbea,
2150                              (u64 __user *)reg->addr);
2151                 break;
2152         default:
2153                 break;
2154         }
2155
2156         return r;
2157 }
2158
2159 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2160                                            struct kvm_one_reg *reg)
2161 {
2162         int r = -EINVAL;
2163         __u64 val;
2164
2165         switch (reg->id) {
2166         case KVM_REG_S390_TODPR:
2167                 r = get_user(vcpu->arch.sie_block->todpr,
2168                              (u32 __user *)reg->addr);
2169                 break;
2170         case KVM_REG_S390_EPOCHDIFF:
2171                 r = get_user(vcpu->arch.sie_block->epoch,
2172                              (u64 __user *)reg->addr);
2173                 break;
2174         case KVM_REG_S390_CPU_TIMER:
2175                 r = get_user(val, (u64 __user *)reg->addr);
2176                 if (!r)
2177                         kvm_s390_set_cpu_timer(vcpu, val);
2178                 break;
2179         case KVM_REG_S390_CLOCK_COMP:
2180                 r = get_user(vcpu->arch.sie_block->ckc,
2181                              (u64 __user *)reg->addr);
2182                 break;
2183         case KVM_REG_S390_PFTOKEN:
2184                 r = get_user(vcpu->arch.pfault_token,
2185                              (u64 __user *)reg->addr);
2186                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2187                         kvm_clear_async_pf_completion_queue(vcpu);
2188                 break;
2189         case KVM_REG_S390_PFCOMPARE:
2190                 r = get_user(vcpu->arch.pfault_compare,
2191                              (u64 __user *)reg->addr);
2192                 break;
2193         case KVM_REG_S390_PFSELECT:
2194                 r = get_user(vcpu->arch.pfault_select,
2195                              (u64 __user *)reg->addr);
2196                 break;
2197         case KVM_REG_S390_PP:
2198                 r = get_user(vcpu->arch.sie_block->pp,
2199                              (u64 __user *)reg->addr);
2200                 break;
2201         case KVM_REG_S390_GBEA:
2202                 r = get_user(vcpu->arch.sie_block->gbea,
2203                              (u64 __user *)reg->addr);
2204                 break;
2205         default:
2206                 break;
2207         }
2208
2209         return r;
2210 }
2211
2212 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2213 {
2214         kvm_s390_vcpu_initial_reset(vcpu);
2215         return 0;
2216 }
2217
2218 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2219 {
2220         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
2221         return 0;
2222 }
2223
2224 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2225 {
2226         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2227         return 0;
2228 }
2229
2230 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2231                                   struct kvm_sregs *sregs)
2232 {
2233         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2234         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2235         restore_access_regs(vcpu->run->s.regs.acrs);
2236         return 0;
2237 }
2238
2239 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2240                                   struct kvm_sregs *sregs)
2241 {
2242         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2243         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2244         return 0;
2245 }
2246
2247 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2248 {
2249         /* make sure the new values will be lazily loaded */
2250         save_fpu_regs();
2251         if (test_fp_ctl(fpu->fpc))
2252                 return -EINVAL;
2253         current->thread.fpu.fpc = fpu->fpc;
2254         if (MACHINE_HAS_VX)
2255                 convert_fp_to_vx(current->thread.fpu.vxrs, (freg_t *)fpu->fprs);
2256         else
2257                 memcpy(current->thread.fpu.fprs, &fpu->fprs, sizeof(fpu->fprs));
2258         return 0;
2259 }
2260
2261 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2262 {
2263         /* make sure we have the latest values */
2264         save_fpu_regs();
2265         if (MACHINE_HAS_VX)
2266                 convert_vx_to_fp((freg_t *)fpu->fprs, current->thread.fpu.vxrs);
2267         else
2268                 memcpy(fpu->fprs, current->thread.fpu.fprs, sizeof(fpu->fprs));
2269         fpu->fpc = current->thread.fpu.fpc;
2270         return 0;
2271 }
2272
2273 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2274 {
2275         int rc = 0;
2276
2277         if (!is_vcpu_stopped(vcpu))
2278                 rc = -EBUSY;
2279         else {
2280                 vcpu->run->psw_mask = psw.mask;
2281                 vcpu->run->psw_addr = psw.addr;
2282         }
2283         return rc;
2284 }
2285
2286 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2287                                   struct kvm_translation *tr)
2288 {
2289         return -EINVAL; /* not implemented yet */
2290 }
2291
2292 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2293                               KVM_GUESTDBG_USE_HW_BP | \
2294                               KVM_GUESTDBG_ENABLE)
2295
2296 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2297                                         struct kvm_guest_debug *dbg)
2298 {
2299         int rc = 0;
2300
2301         vcpu->guest_debug = 0;
2302         kvm_s390_clear_bp_data(vcpu);
2303
2304         if (dbg->control & ~VALID_GUESTDBG_FLAGS)
2305                 return -EINVAL;
2306         if (!sclp.has_gpere)
2307                 return -EINVAL;
2308
2309         if (dbg->control & KVM_GUESTDBG_ENABLE) {
2310                 vcpu->guest_debug = dbg->control;
2311                 /* enforce guest PER */
2312                 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2313
2314                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2315                         rc = kvm_s390_import_bp_data(vcpu, dbg);
2316         } else {
2317                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2318                 vcpu->arch.guestdbg.last_bp = 0;
2319         }
2320
2321         if (rc) {
2322                 vcpu->guest_debug = 0;
2323                 kvm_s390_clear_bp_data(vcpu);
2324                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2325         }
2326
2327         return rc;
2328 }
2329
2330 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2331                                     struct kvm_mp_state *mp_state)
2332 {
2333         /* CHECK_STOP and LOAD are not supported yet */
2334         return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2335                                        KVM_MP_STATE_OPERATING;
2336 }
2337
2338 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2339                                     struct kvm_mp_state *mp_state)
2340 {
2341         int rc = 0;
2342
2343         /* user space knows about this interface - let it control the state */
2344         vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2345
2346         switch (mp_state->mp_state) {
2347         case KVM_MP_STATE_STOPPED:
2348                 kvm_s390_vcpu_stop(vcpu);
2349                 break;
2350         case KVM_MP_STATE_OPERATING:
2351                 kvm_s390_vcpu_start(vcpu);
2352                 break;
2353         case KVM_MP_STATE_LOAD:
2354         case KVM_MP_STATE_CHECK_STOP:
2355                 /* fall through - CHECK_STOP and LOAD are not supported yet */
2356         default:
2357                 rc = -ENXIO;
2358         }
2359
2360         return rc;
2361 }
2362
2363 static bool ibs_enabled(struct kvm_vcpu *vcpu)
2364 {
2365         return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
2366 }
2367
2368 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2369 {
2370 retry:
2371         kvm_s390_vcpu_request_handled(vcpu);
2372         if (!vcpu->requests)
2373                 return 0;
2374         /*
2375          * We use MMU_RELOAD just to re-arm the ipte notifier for the
2376          * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
2377          * This ensures that the ipte instruction for this request has
2378          * already finished. We might race against a second unmapper that
2379          * wants to set the blocking bit. Lets just retry the request loop.
2380          */
2381         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2382                 int rc;
2383                 rc = gmap_mprotect_notify(vcpu->arch.gmap,
2384                                           kvm_s390_get_prefix(vcpu),
2385                                           PAGE_SIZE * 2, PROT_WRITE);
2386                 if (rc) {
2387                         kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
2388                         return rc;
2389                 }
2390                 goto retry;
2391         }
2392
2393         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2394                 vcpu->arch.sie_block->ihcpu = 0xffff;
2395                 goto retry;
2396         }
2397
2398         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
2399                 if (!ibs_enabled(vcpu)) {
2400                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
2401                         atomic_or(CPUSTAT_IBS,
2402                                         &vcpu->arch.sie_block->cpuflags);
2403                 }
2404                 goto retry;
2405         }
2406
2407         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2408                 if (ibs_enabled(vcpu)) {
2409                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2410                         atomic_andnot(CPUSTAT_IBS,
2411                                           &vcpu->arch.sie_block->cpuflags);
2412                 }
2413                 goto retry;
2414         }
2415
2416         if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
2417                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2418                 goto retry;
2419         }
2420
2421         /* nothing to do, just clear the request */
2422         clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
2423
2424         return 0;
2425 }
2426
2427 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2428 {
2429         struct kvm_vcpu *vcpu;
2430         int i;
2431
2432         mutex_lock(&kvm->lock);
2433         preempt_disable();
2434         kvm->arch.epoch = tod - get_tod_clock();
2435         kvm_s390_vcpu_block_all(kvm);
2436         kvm_for_each_vcpu(i, vcpu, kvm)
2437                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2438         kvm_s390_vcpu_unblock_all(kvm);
2439         preempt_enable();
2440         mutex_unlock(&kvm->lock);
2441 }
2442
2443 /**
2444  * kvm_arch_fault_in_page - fault-in guest page if necessary
2445  * @vcpu: The corresponding virtual cpu
2446  * @gpa: Guest physical address
2447  * @writable: Whether the page should be writable or not
2448  *
2449  * Make sure that a guest page has been faulted-in on the host.
2450  *
2451  * Return: Zero on success, negative error code otherwise.
2452  */
2453 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
2454 {
2455         return gmap_fault(vcpu->arch.gmap, gpa,
2456                           writable ? FAULT_FLAG_WRITE : 0);
2457 }
2458
2459 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
2460                                       unsigned long token)
2461 {
2462         struct kvm_s390_interrupt inti;
2463         struct kvm_s390_irq irq;
2464
2465         if (start_token) {
2466                 irq.u.ext.ext_params2 = token;
2467                 irq.type = KVM_S390_INT_PFAULT_INIT;
2468                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
2469         } else {
2470                 inti.type = KVM_S390_INT_PFAULT_DONE;
2471                 inti.parm64 = token;
2472                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
2473         }
2474 }
2475
2476 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
2477                                      struct kvm_async_pf *work)
2478 {
2479         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
2480         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
2481 }
2482
2483 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
2484                                  struct kvm_async_pf *work)
2485 {
2486         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
2487         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
2488 }
2489
2490 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
2491                                struct kvm_async_pf *work)
2492 {
2493         /* s390 will always inject the page directly */
2494 }
2495
2496 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
2497 {
2498         /*
2499          * s390 will always inject the page directly,
2500          * but we still want check_async_completion to cleanup
2501          */
2502         return true;
2503 }
2504
2505 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
2506 {
2507         hva_t hva;
2508         struct kvm_arch_async_pf arch;
2509         int rc;
2510
2511         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2512                 return 0;
2513         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
2514             vcpu->arch.pfault_compare)
2515                 return 0;
2516         if (psw_extint_disabled(vcpu))
2517                 return 0;
2518         if (kvm_s390_vcpu_has_irq(vcpu, 0))
2519                 return 0;
2520         if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
2521                 return 0;
2522         if (!vcpu->arch.gmap->pfault_enabled)
2523                 return 0;
2524
2525         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
2526         hva += current->thread.gmap_addr & ~PAGE_MASK;
2527         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
2528                 return 0;
2529
2530         rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
2531         return rc;
2532 }
2533
2534 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
2535 {
2536         int rc, cpuflags;
2537
2538         /*
2539          * On s390 notifications for arriving pages will be delivered directly
2540          * to the guest but the house keeping for completed pfaults is
2541          * handled outside the worker.
2542          */
2543         kvm_check_async_pf_completion(vcpu);
2544
2545         vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
2546         vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
2547
2548         if (need_resched())
2549                 schedule();
2550
2551         if (test_cpu_flag(CIF_MCCK_PENDING))
2552                 s390_handle_mcck();
2553
2554         if (!kvm_is_ucontrol(vcpu->kvm)) {
2555                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
2556                 if (rc)
2557                         return rc;
2558         }
2559
2560         rc = kvm_s390_handle_requests(vcpu);
2561         if (rc)
2562                 return rc;
2563
2564         if (guestdbg_enabled(vcpu)) {
2565                 kvm_s390_backup_guest_per_regs(vcpu);
2566                 kvm_s390_patch_guest_per_regs(vcpu);
2567         }
2568
2569         vcpu->arch.sie_block->icptcode = 0;
2570         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
2571         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
2572         trace_kvm_s390_sie_enter(vcpu, cpuflags);
2573
2574         return 0;
2575 }
2576
2577 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
2578 {
2579         struct kvm_s390_pgm_info pgm_info = {
2580                 .code = PGM_ADDRESSING,
2581         };
2582         u8 opcode, ilen;
2583         int rc;
2584
2585         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
2586         trace_kvm_s390_sie_fault(vcpu);
2587
2588         /*
2589          * We want to inject an addressing exception, which is defined as a
2590          * suppressing or terminating exception. However, since we came here
2591          * by a DAT access exception, the PSW still points to the faulting
2592          * instruction since DAT exceptions are nullifying. So we've got
2593          * to look up the current opcode to get the length of the instruction
2594          * to be able to forward the PSW.
2595          */
2596         rc = read_guest_instr(vcpu, &opcode, 1);
2597         ilen = insn_length(opcode);
2598         if (rc < 0) {
2599                 return rc;
2600         } else if (rc) {
2601                 /* Instruction-Fetching Exceptions - we can't detect the ilen.
2602                  * Forward by arbitrary ilc, injection will take care of
2603                  * nullification if necessary.
2604                  */
2605                 pgm_info = vcpu->arch.pgm;
2606                 ilen = 4;
2607         }
2608         pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
2609         kvm_s390_forward_psw(vcpu, ilen);
2610         return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
2611 }
2612
2613 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
2614 {
2615         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
2616                    vcpu->arch.sie_block->icptcode);
2617         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
2618
2619         if (guestdbg_enabled(vcpu))
2620                 kvm_s390_restore_guest_per_regs(vcpu);
2621
2622         vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
2623         vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
2624
2625         if (vcpu->arch.sie_block->icptcode > 0) {
2626                 int rc = kvm_handle_sie_intercept(vcpu);
2627
2628                 if (rc != -EOPNOTSUPP)
2629                         return rc;
2630                 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
2631                 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
2632                 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
2633                 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
2634                 return -EREMOTE;
2635         } else if (exit_reason != -EFAULT) {
2636                 vcpu->stat.exit_null++;
2637                 return 0;
2638         } else if (kvm_is_ucontrol(vcpu->kvm)) {
2639                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
2640                 vcpu->run->s390_ucontrol.trans_exc_code =
2641                                                 current->thread.gmap_addr;
2642                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
2643                 return -EREMOTE;
2644         } else if (current->thread.gmap_pfault) {
2645                 trace_kvm_s390_major_guest_pfault(vcpu);
2646                 current->thread.gmap_pfault = 0;
2647                 if (kvm_arch_setup_async_pf(vcpu))
2648                         return 0;
2649                 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
2650         }
2651         return vcpu_post_run_fault_in_sie(vcpu);
2652 }
2653
2654 static int __vcpu_run(struct kvm_vcpu *vcpu)
2655 {
2656         int rc, exit_reason;
2657
2658         /*
2659          * We try to hold kvm->srcu during most of vcpu_run (except when run-
2660          * ning the guest), so that memslots (and other stuff) are protected
2661          */
2662         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2663
2664         do {
2665                 rc = vcpu_pre_run(vcpu);
2666                 if (rc)
2667                         break;
2668
2669                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2670                 /*
2671                  * As PF_VCPU will be used in fault handler, between
2672                  * guest_enter and guest_exit should be no uaccess.
2673                  */
2674                 local_irq_disable();
2675                 guest_enter_irqoff();
2676                 __disable_cpu_timer_accounting(vcpu);
2677                 local_irq_enable();
2678                 exit_reason = sie64a(vcpu->arch.sie_block,
2679                                      vcpu->run->s.regs.gprs);
2680                 local_irq_disable();
2681                 __enable_cpu_timer_accounting(vcpu);
2682                 guest_exit_irqoff();
2683                 local_irq_enable();
2684                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2685
2686                 rc = vcpu_post_run(vcpu, exit_reason);
2687         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
2688
2689         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2690         return rc;
2691 }
2692
2693 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2694 {
2695         vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
2696         vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
2697         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
2698                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
2699         if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
2700                 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
2701                 /* some control register changes require a tlb flush */
2702                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2703         }
2704         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
2705                 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
2706                 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
2707                 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
2708                 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
2709                 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
2710         }
2711         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
2712                 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
2713                 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
2714                 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
2715                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2716                         kvm_clear_async_pf_completion_queue(vcpu);
2717         }
2718         /*
2719          * If userspace sets the riccb (e.g. after migration) to a valid state,
2720          * we should enable RI here instead of doing the lazy enablement.
2721          */
2722         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
2723             test_kvm_facility(vcpu->kvm, 64)) {
2724                 struct runtime_instr_cb *riccb =
2725                         (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
2726
2727                 if (riccb->valid)
2728                         vcpu->arch.sie_block->ecb3 |= 0x01;
2729         }
2730
2731         kvm_run->kvm_dirty_regs = 0;
2732 }
2733
2734 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2735 {
2736         kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
2737         kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
2738         kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
2739         memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
2740         kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
2741         kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
2742         kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
2743         kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
2744         kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
2745         kvm_run->s.regs.pft = vcpu->arch.pfault_token;
2746         kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
2747         kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
2748 }
2749
2750 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2751 {
2752         int rc;
2753         sigset_t sigsaved;
2754
2755         if (guestdbg_exit_pending(vcpu)) {
2756                 kvm_s390_prepare_debug_exit(vcpu);
2757                 return 0;
2758         }
2759
2760         if (vcpu->sigset_active)
2761                 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
2762
2763         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
2764                 kvm_s390_vcpu_start(vcpu);
2765         } else if (is_vcpu_stopped(vcpu)) {
2766                 pr_err_ratelimited("can't run stopped vcpu %d\n",
2767                                    vcpu->vcpu_id);
2768                 return -EINVAL;
2769         }
2770
2771         sync_regs(vcpu, kvm_run);
2772         enable_cpu_timer_accounting(vcpu);
2773
2774         might_fault();
2775         rc = __vcpu_run(vcpu);
2776
2777         if (signal_pending(current) && !rc) {
2778                 kvm_run->exit_reason = KVM_EXIT_INTR;
2779                 rc = -EINTR;
2780         }
2781
2782         if (guestdbg_exit_pending(vcpu) && !rc)  {
2783                 kvm_s390_prepare_debug_exit(vcpu);
2784                 rc = 0;
2785         }
2786
2787         if (rc == -EREMOTE) {
2788                 /* userspace support is needed, kvm_run has been prepared */
2789                 rc = 0;
2790         }
2791
2792         disable_cpu_timer_accounting(vcpu);
2793         store_regs(vcpu, kvm_run);
2794
2795         if (vcpu->sigset_active)
2796                 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
2797
2798         vcpu->stat.exit_userspace++;
2799         return rc;
2800 }
2801
2802 /*
2803  * store status at address
2804  * we use have two special cases:
2805  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
2806  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
2807  */
2808 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
2809 {
2810         unsigned char archmode = 1;
2811         freg_t fprs[NUM_FPRS];
2812         unsigned int px;
2813         u64 clkcomp, cputm;
2814         int rc;
2815
2816         px = kvm_s390_get_prefix(vcpu);
2817         if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
2818                 if (write_guest_abs(vcpu, 163, &archmode, 1))
2819                         return -EFAULT;
2820                 gpa = 0;
2821         } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
2822                 if (write_guest_real(vcpu, 163, &archmode, 1))
2823                         return -EFAULT;
2824                 gpa = px;
2825         } else
2826                 gpa -= __LC_FPREGS_SAVE_AREA;
2827
2828         /* manually convert vector registers if necessary */
2829         if (MACHINE_HAS_VX) {
2830                 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
2831                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2832                                      fprs, 128);
2833         } else {
2834                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2835                                      vcpu->run->s.regs.fprs, 128);
2836         }
2837         rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
2838                               vcpu->run->s.regs.gprs, 128);
2839         rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
2840                               &vcpu->arch.sie_block->gpsw, 16);
2841         rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
2842                               &px, 4);
2843         rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
2844                               &vcpu->run->s.regs.fpc, 4);
2845         rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
2846                               &vcpu->arch.sie_block->todpr, 4);
2847         cputm = kvm_s390_get_cpu_timer(vcpu);
2848         rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
2849                               &cputm, 8);
2850         clkcomp = vcpu->arch.sie_block->ckc >> 8;
2851         rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
2852                               &clkcomp, 8);
2853         rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
2854                               &vcpu->run->s.regs.acrs, 64);
2855         rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
2856                               &vcpu->arch.sie_block->gcr, 128);
2857         return rc ? -EFAULT : 0;
2858 }
2859
2860 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
2861 {
2862         /*
2863          * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
2864          * copying in vcpu load/put. Lets update our copies before we save
2865          * it into the save area
2866          */
2867         save_fpu_regs();
2868         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
2869         save_access_regs(vcpu->run->s.regs.acrs);
2870
2871         return kvm_s390_store_status_unloaded(vcpu, addr);
2872 }
2873
2874 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2875 {
2876         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
2877         kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
2878 }
2879
2880 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
2881 {
2882         unsigned int i;
2883         struct kvm_vcpu *vcpu;
2884
2885         kvm_for_each_vcpu(i, vcpu, kvm) {
2886                 __disable_ibs_on_vcpu(vcpu);
2887         }
2888 }
2889
2890 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2891 {
2892         if (!sclp.has_ibs)
2893                 return;
2894         kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
2895         kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
2896 }
2897
2898 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
2899 {
2900         int i, online_vcpus, started_vcpus = 0;
2901
2902         if (!is_vcpu_stopped(vcpu))
2903                 return;
2904
2905         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
2906         /* Only one cpu at a time may enter/leave the STOPPED state. */
2907         spin_lock(&vcpu->kvm->arch.start_stop_lock);
2908         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2909
2910         for (i = 0; i < online_vcpus; i++) {
2911                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
2912                         started_vcpus++;
2913         }
2914
2915         if (started_vcpus == 0) {
2916                 /* we're the only active VCPU -> speed it up */
2917                 __enable_ibs_on_vcpu(vcpu);
2918         } else if (started_vcpus == 1) {
2919                 /*
2920                  * As we are starting a second VCPU, we have to disable
2921                  * the IBS facility on all VCPUs to remove potentially
2922                  * oustanding ENABLE requests.
2923                  */
2924                 __disable_ibs_on_all_vcpus(vcpu->kvm);
2925         }
2926
2927         atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2928         /*
2929          * Another VCPU might have used IBS while we were offline.
2930          * Let's play safe and flush the VCPU at startup.
2931          */
2932         kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2933         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2934         return;
2935 }
2936
2937 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
2938 {
2939         int i, online_vcpus, started_vcpus = 0;
2940         struct kvm_vcpu *started_vcpu = NULL;
2941
2942         if (is_vcpu_stopped(vcpu))
2943                 return;
2944
2945         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
2946         /* Only one cpu at a time may enter/leave the STOPPED state. */
2947         spin_lock(&vcpu->kvm->arch.start_stop_lock);
2948         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2949
2950         /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
2951         kvm_s390_clear_stop_irq(vcpu);
2952
2953         atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2954         __disable_ibs_on_vcpu(vcpu);
2955
2956         for (i = 0; i < online_vcpus; i++) {
2957                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
2958                         started_vcpus++;
2959                         started_vcpu = vcpu->kvm->vcpus[i];
2960                 }
2961         }
2962
2963         if (started_vcpus == 1) {
2964                 /*
2965                  * As we only have one VCPU left, we want to enable the
2966                  * IBS facility for that VCPU to speed it up.
2967                  */
2968                 __enable_ibs_on_vcpu(started_vcpu);
2969         }
2970
2971         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2972         return;
2973 }
2974
2975 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
2976                                      struct kvm_enable_cap *cap)
2977 {
2978         int r;
2979
2980         if (cap->flags)
2981                 return -EINVAL;
2982
2983         switch (cap->cap) {
2984         case KVM_CAP_S390_CSS_SUPPORT:
2985                 if (!vcpu->kvm->arch.css_support) {
2986                         vcpu->kvm->arch.css_support = 1;
2987                         VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
2988                         trace_kvm_s390_enable_css(vcpu->kvm);
2989                 }
2990                 r = 0;
2991                 break;
2992         default:
2993                 r = -EINVAL;
2994                 break;
2995         }
2996         return r;
2997 }
2998
2999 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3000                                   struct kvm_s390_mem_op *mop)
3001 {
3002         void __user *uaddr = (void __user *)mop->buf;
3003         void *tmpbuf = NULL;
3004         int r, srcu_idx;
3005         const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3006                                     | KVM_S390_MEMOP_F_CHECK_ONLY;
3007
3008         if (mop->flags & ~supported_flags)
3009                 return -EINVAL;
3010
3011         if (mop->size > MEM_OP_MAX_SIZE)
3012                 return -E2BIG;
3013
3014         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3015                 tmpbuf = vmalloc(mop->size);
3016                 if (!tmpbuf)
3017                         return -ENOMEM;
3018         }
3019
3020         srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3021
3022         switch (mop->op) {
3023         case KVM_S390_MEMOP_LOGICAL_READ:
3024                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3025                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3026                                             mop->size, GACC_FETCH);
3027                         break;
3028                 }
3029                 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3030                 if (r == 0) {
3031                         if (copy_to_user(uaddr, tmpbuf, mop->size))
3032                                 r = -EFAULT;
3033                 }
3034                 break;
3035         case KVM_S390_MEMOP_LOGICAL_WRITE:
3036                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3037                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3038                                             mop->size, GACC_STORE);
3039                         break;
3040                 }
3041                 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
3042                         r = -EFAULT;
3043                         break;
3044                 }
3045                 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3046                 break;
3047         default:
3048                 r = -EINVAL;
3049         }
3050
3051         srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
3052
3053         if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
3054                 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
3055
3056         vfree(tmpbuf);
3057         return r;
3058 }
3059
3060 long kvm_arch_vcpu_ioctl(struct file *filp,
3061                          unsigned int ioctl, unsigned long arg)
3062 {
3063         struct kvm_vcpu *vcpu = filp->private_data;
3064         void __user *argp = (void __user *)arg;
3065         int idx;
3066         long r;
3067
3068         switch (ioctl) {
3069         case KVM_S390_IRQ: {
3070                 struct kvm_s390_irq s390irq;
3071
3072                 r = -EFAULT;
3073                 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3074                         break;
3075                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3076                 break;
3077         }
3078         case KVM_S390_INTERRUPT: {
3079                 struct kvm_s390_interrupt s390int;
3080                 struct kvm_s390_irq s390irq;
3081
3082                 r = -EFAULT;
3083                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
3084                         break;
3085                 if (s390int_to_s390irq(&s390int, &s390irq))
3086                         return -EINVAL;
3087                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3088                 break;
3089         }
3090         case KVM_S390_STORE_STATUS:
3091                 idx = srcu_read_lock(&vcpu->kvm->srcu);
3092                 r = kvm_s390_vcpu_store_status(vcpu, arg);
3093                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
3094                 break;
3095         case KVM_S390_SET_INITIAL_PSW: {
3096                 psw_t psw;
3097
3098                 r = -EFAULT;
3099                 if (copy_from_user(&psw, argp, sizeof(psw)))
3100                         break;
3101                 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3102                 break;
3103         }
3104         case KVM_S390_INITIAL_RESET:
3105                 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3106                 break;
3107         case KVM_SET_ONE_REG:
3108         case KVM_GET_ONE_REG: {
3109                 struct kvm_one_reg reg;
3110                 r = -EFAULT;
3111                 if (copy_from_user(&reg, argp, sizeof(reg)))
3112                         break;
3113                 if (ioctl == KVM_SET_ONE_REG)
3114                         r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
3115                 else
3116                         r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
3117                 break;
3118         }
3119 #ifdef CONFIG_KVM_S390_UCONTROL
3120         case KVM_S390_UCAS_MAP: {
3121                 struct kvm_s390_ucas_mapping ucasmap;
3122
3123                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3124                         r = -EFAULT;
3125                         break;
3126                 }
3127
3128                 if (!kvm_is_ucontrol(vcpu->kvm)) {
3129                         r = -EINVAL;
3130                         break;
3131                 }
3132
3133                 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
3134                                      ucasmap.vcpu_addr, ucasmap.length);
3135                 break;
3136         }
3137         case KVM_S390_UCAS_UNMAP: {
3138                 struct kvm_s390_ucas_mapping ucasmap;
3139
3140                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3141                         r = -EFAULT;
3142                         break;
3143                 }
3144
3145                 if (!kvm_is_ucontrol(vcpu->kvm)) {
3146                         r = -EINVAL;
3147                         break;
3148                 }
3149
3150                 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
3151                         ucasmap.length);
3152                 break;
3153         }
3154 #endif
3155         case KVM_S390_VCPU_FAULT: {
3156                 r = gmap_fault(vcpu->arch.gmap, arg, 0);
3157                 break;
3158         }
3159         case KVM_ENABLE_CAP:
3160         {
3161                 struct kvm_enable_cap cap;
3162                 r = -EFAULT;
3163                 if (copy_from_user(&cap, argp, sizeof(cap)))
3164                         break;
3165                 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
3166                 break;
3167         }
3168         case KVM_S390_MEM_OP: {
3169                 struct kvm_s390_mem_op mem_op;
3170
3171                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3172                         r = kvm_s390_guest_mem_op(vcpu, &mem_op);
3173                 else
3174                         r = -EFAULT;
3175                 break;
3176         }
3177         case KVM_S390_SET_IRQ_STATE: {
3178                 struct kvm_s390_irq_state irq_state;
3179
3180                 r = -EFAULT;
3181                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3182                         break;
3183                 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
3184                     irq_state.len == 0 ||
3185                     irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
3186                         r = -EINVAL;
3187                         break;
3188                 }
3189                 r = kvm_s390_set_irq_state(vcpu,
3190                                            (void __user *) irq_state.buf,
3191                                            irq_state.len);
3192                 break;
3193         }
3194         case KVM_S390_GET_IRQ_STATE: {
3195                 struct kvm_s390_irq_state irq_state;
3196
3197                 r = -EFAULT;
3198                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3199                         break;
3200                 if (irq_state.len == 0) {
3201                         r = -EINVAL;
3202                         break;
3203                 }
3204                 r = kvm_s390_get_irq_state(vcpu,
3205                                            (__u8 __user *)  irq_state.buf,
3206                                            irq_state.len);
3207                 break;
3208         }
3209         default:
3210                 r = -ENOTTY;
3211         }
3212         return r;
3213 }
3214
3215 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3216 {
3217 #ifdef CONFIG_KVM_S390_UCONTROL
3218         if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
3219                  && (kvm_is_ucontrol(vcpu->kvm))) {
3220                 vmf->page = virt_to_page(vcpu->arch.sie_block);
3221                 get_page(vmf->page);
3222                 return 0;
3223         }
3224 #endif
3225         return VM_FAULT_SIGBUS;
3226 }
3227
3228 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
3229                             unsigned long npages)
3230 {
3231         return 0;
3232 }
3233
3234 /* Section: memory related */
3235 int kvm_arch_prepare_memory_region(struct kvm *kvm,
3236                                    struct kvm_memory_slot *memslot,
3237                                    const struct kvm_userspace_memory_region *mem,
3238                                    enum kvm_mr_change change)
3239 {
3240         /* A few sanity checks. We can have memory slots which have to be
3241            located/ended at a segment boundary (1MB). The memory in userland is
3242            ok to be fragmented into various different vmas. It is okay to mmap()
3243            and munmap() stuff in this slot after doing this call at any time */
3244
3245         if (mem->userspace_addr & 0xffffful)
3246                 return -EINVAL;
3247
3248         if (mem->memory_size & 0xffffful)
3249                 return -EINVAL;
3250
3251         if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
3252                 return -EINVAL;
3253
3254         return 0;
3255 }
3256
3257 void kvm_arch_commit_memory_region(struct kvm *kvm,
3258                                 const struct kvm_userspace_memory_region *mem,
3259                                 const struct kvm_memory_slot *old,
3260                                 const struct kvm_memory_slot *new,
3261                                 enum kvm_mr_change change)
3262 {
3263         int rc;
3264
3265         /* If the basics of the memslot do not change, we do not want
3266          * to update the gmap. Every update causes several unnecessary
3267          * segment translation exceptions. This is usually handled just
3268          * fine by the normal fault handler + gmap, but it will also
3269          * cause faults on the prefix page of running guest CPUs.
3270          */
3271         if (old->userspace_addr == mem->userspace_addr &&
3272             old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
3273             old->npages * PAGE_SIZE == mem->memory_size)
3274                 return;
3275
3276         rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
3277                 mem->guest_phys_addr, mem->memory_size);
3278         if (rc)
3279                 pr_warn("failed to commit memory region\n");
3280         return;
3281 }
3282
3283 static inline unsigned long nonhyp_mask(int i)
3284 {
3285         unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
3286
3287         return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
3288 }
3289
3290 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
3291 {
3292         vcpu->valid_wakeup = false;
3293 }
3294
3295 static int __init kvm_s390_init(void)
3296 {
3297         int i;
3298
3299         if (!sclp.has_sief2) {
3300                 pr_info("SIE not available\n");
3301                 return -ENODEV;
3302         }
3303
3304         for (i = 0; i < 16; i++)
3305                 kvm_s390_fac_list_mask[i] |=
3306                         S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
3307
3308         return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
3309 }
3310
3311 static void __exit kvm_s390_exit(void)
3312 {
3313         kvm_exit();
3314 }
3315
3316 module_init(kvm_s390_init);
3317 module_exit(kvm_s390_exit);
3318
3319 /*
3320  * Enable autoloading of the kvm module.
3321  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
3322  * since x86 takes a different approach.
3323  */
3324 #include <linux/miscdevice.h>
3325 MODULE_ALIAS_MISCDEV(KVM_MINOR);
3326 MODULE_ALIAS("devname:kvm");