arch/s390/kvm/kvm-s390.c

   1 /*
   2  * hosting zSeries kernel virtual machines
   3  *
   4  * Copyright IBM Corp. 2008, 2009
   5  *
   6  * This program is free software; you can redistribute it and/or modify
   7  * it under the terms of the GNU General Public License (version 2 only)
   8  * as published by the Free Software Foundation.
   9  *
  10  *    Author(s): Carsten Otte <cotte@de.ibm.com>
  11  *               Christian Borntraeger <borntraeger@de.ibm.com>
  12  *               Heiko Carstens <heiko.carstens@de.ibm.com>
  13  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
  14  *               Jason J. Herne <jjherne@us.ibm.com>
  15  */
  16
  17 #include <linux/compiler.h>
  18 #include <linux/err.h>
  19 #include <linux/fs.h>
  20 #include <linux/hrtimer.h>
  21 #include <linux/init.h>
  22 #include <linux/kvm.h>
  23 #include <linux/kvm_host.h>
  24 #include <linux/mman.h>
  25 #include <linux/module.h>
  26 #include <linux/moduleparam.h>
  27 #include <linux/random.h>
  28 #include <linux/slab.h>
  29 #include <linux/timer.h>
  30 #include <linux/vmalloc.h>
  31 #include <linux/bitmap.h>
  32 #include <linux/sched/signal.h>
  33
  34 #include <asm/asm-offsets.h>
  35 #include <asm/lowcore.h>
  36 #include <asm/stp.h>
  37 #include <asm/pgtable.h>
  38 #include <asm/gmap.h>
  39 #include <asm/nmi.h>
  40 #include <asm/switch_to.h>
  41 #include <asm/isc.h>
  42 #include <asm/sclp.h>
  43 #include <asm/cpacf.h>
  44 #include <asm/timex.h>
  45 #include "kvm-s390.h"
  46 #include "gaccess.h"
  47
  48 #define KMSG_COMPONENT "kvm-s390"
  49 #undef pr_fmt
  50 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  51
  52 #define CREATE_TRACE_POINTS
  53 #include "trace.h"
  54 #include "trace-s390.h"
  55
  56 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
  57 #define LOCAL_IRQS 32
  58 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
  59                            (KVM_MAX_VCPUS + LOCAL_IRQS))
  60
  61 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
  62
  63 struct kvm_stats_debugfs_item debugfs_entries[] = {
  64         { "userspace_handled", VCPU_STAT(exit_userspace) },
  65         { "exit_null", VCPU_STAT(exit_null) },
  66         { "exit_validity", VCPU_STAT(exit_validity) },
  67         { "exit_stop_request", VCPU_STAT(exit_stop_request) },
  68         { "exit_external_request", VCPU_STAT(exit_external_request) },
  69         { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
  70         { "exit_instruction", VCPU_STAT(exit_instruction) },
  71         { "exit_pei", VCPU_STAT(exit_pei) },
  72         { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
  73         { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
  74         { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
  75         { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
  76         { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
  77         { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
  78         { "halt_wakeup", VCPU_STAT(halt_wakeup) },
  79         { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
  80         { "instruction_lctl", VCPU_STAT(instruction_lctl) },
  81         { "instruction_stctl", VCPU_STAT(instruction_stctl) },
  82         { "instruction_stctg", VCPU_STAT(instruction_stctg) },
  83         { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
  84         { "deliver_external_call", VCPU_STAT(deliver_external_call) },
  85         { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
  86         { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
  87         { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
  88         { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
  89         { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
  90         { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
  91         { "exit_wait_state", VCPU_STAT(exit_wait_state) },
  92         { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
  93         { "instruction_stidp", VCPU_STAT(instruction_stidp) },
  94         { "instruction_spx", VCPU_STAT(instruction_spx) },
  95         { "instruction_stpx", VCPU_STAT(instruction_stpx) },
  96         { "instruction_stap", VCPU_STAT(instruction_stap) },
  97         { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
  98         { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
  99         { "instruction_stsch", VCPU_STAT(instruction_stsch) },
 100         { "instruction_chsc", VCPU_STAT(instruction_chsc) },
 101         { "instruction_essa", VCPU_STAT(instruction_essa) },
 102         { "instruction_stsi", VCPU_STAT(instruction_stsi) },
 103         { "instruction_stfl", VCPU_STAT(instruction_stfl) },
 104         { "instruction_tprot", VCPU_STAT(instruction_tprot) },
 105         { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
 106         { "instruction_sie", VCPU_STAT(instruction_sie) },
 107         { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
 108         { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
 109         { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
 110         { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
 111         { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
 112         { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
 113         { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
 114         { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
 115         { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
 116         { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
 117         { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
 118         { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
 119         { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
 120         { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
 121         { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
 122         { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
 123         { "diagnose_10", VCPU_STAT(diagnose_10) },
 124         { "diagnose_44", VCPU_STAT(diagnose_44) },
 125         { "diagnose_9c", VCPU_STAT(diagnose_9c) },
 126         { "diagnose_258", VCPU_STAT(diagnose_258) },
 127         { "diagnose_308", VCPU_STAT(diagnose_308) },
 128         { "diagnose_500", VCPU_STAT(diagnose_500) },
 129         { NULL }
 130 };
 131
 132 /* allow nested virtualization in KVM (if enabled by user space) */
 133 static int nested;
 134 module_param(nested, int, S_IRUGO);
 135 MODULE_PARM_DESC(nested, "Nested virtualization support");
 136
 137 /* upper facilities limit for kvm */
 138 unsigned long kvm_s390_fac_list_mask[16] = { FACILITIES_KVM };
 139
 140 unsigned long kvm_s390_fac_list_mask_size(void)
 141 {
 142         BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
 143         return ARRAY_SIZE(kvm_s390_fac_list_mask);
 144 }
 145
 146 /* available cpu features supported by kvm */
 147 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
 148 /* available subfunctions indicated via query / "test bit" */
 149 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
 150
 151 static struct gmap_notifier gmap_notifier;
 152 static struct gmap_notifier vsie_gmap_notifier;
 153 debug_info_t *kvm_s390_dbf;
 154
 155 /* Section: not file related */
 156 int kvm_arch_hardware_enable(void)
 157 {
 158         /* every s390 is virtualization enabled ;-) */
 159         return 0;
 160 }
 161
 162 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
 163                               unsigned long end);
 164
 165 /*
 166  * This callback is executed during stop_machine(). All CPUs are therefore
 167  * temporarily stopped. In order not to change guest behavior, we have to
 168  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
 169  * so a CPU won't be stopped while calculating with the epoch.
 170  */
 171 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
 172                           void *v)
 173 {
 174         struct kvm *kvm;
 175         struct kvm_vcpu *vcpu;
 176         int i;
 177         unsigned long long *delta = v;
 178
 179         list_for_each_entry(kvm, &vm_list, vm_list) {
 180                 kvm->arch.epoch -= *delta;
 181                 kvm_for_each_vcpu(i, vcpu, kvm) {
 182                         vcpu->arch.sie_block->epoch -= *delta;
 183                         if (vcpu->arch.cputm_enabled)
 184                                 vcpu->arch.cputm_start += *delta;
 185                         if (vcpu->arch.vsie_block)
 186                                 vcpu->arch.vsie_block->epoch -= *delta;
 187                 }
 188         }
 189         return NOTIFY_OK;
 190 }
 191
 192 static struct notifier_block kvm_clock_notifier = {
 193         .notifier_call = kvm_clock_sync,
 194 };
 195
 196 int kvm_arch_hardware_setup(void)
 197 {
 198         gmap_notifier.notifier_call = kvm_gmap_notifier;
 199         gmap_register_pte_notifier(&gmap_notifier);
 200         vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
 201         gmap_register_pte_notifier(&vsie_gmap_notifier);
 202         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
 203                                        &kvm_clock_notifier);
 204         return 0;
 205 }
 206
 207 void kvm_arch_hardware_unsetup(void)
 208 {
 209         gmap_unregister_pte_notifier(&gmap_notifier);
 210         gmap_unregister_pte_notifier(&vsie_gmap_notifier);
 211         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
 212                                          &kvm_clock_notifier);
 213 }
 214
 215 static void allow_cpu_feat(unsigned long nr)
 216 {
 217         set_bit_inv(nr, kvm_s390_available_cpu_feat);
 218 }
 219
 220 static inline int plo_test_bit(unsigned char nr)
 221 {
 222         register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
 223         int cc;
 224
 225         asm volatile(
 226                 /* Parameter registers are ignored for "test bit" */
 227                 "       plo     0,0,0,0(0)\n"
 228                 "       ipm     %0\n"
 229                 "       srl     %0,28\n"
 230                 : "=d" (cc)
 231                 : "d" (r0)
 232                 : "cc");
 233         return cc == 0;
 234 }
 235
 236 static void kvm_s390_cpu_feat_init(void)
 237 {
 238         int i;
 239
 240         for (i = 0; i < 256; ++i) {
 241                 if (plo_test_bit(i))
 242                         kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
 243         }
 244
 245         if (test_facility(28)) /* TOD-clock steering */
 246                 ptff(kvm_s390_available_subfunc.ptff,
 247                      sizeof(kvm_s390_available_subfunc.ptff),
 248                      PTFF_QAF);
 249
 250         if (test_facility(17)) { /* MSA */
 251                 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
 252                               kvm_s390_available_subfunc.kmac);
 253                 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
 254                               kvm_s390_available_subfunc.kmc);
 255                 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
 256                               kvm_s390_available_subfunc.km);
 257                 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
 258                               kvm_s390_available_subfunc.kimd);
 259                 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
 260                               kvm_s390_available_subfunc.klmd);
 261         }
 262         if (test_facility(76)) /* MSA3 */
 263                 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
 264                               kvm_s390_available_subfunc.pckmo);
 265         if (test_facility(77)) { /* MSA4 */
 266                 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
 267                               kvm_s390_available_subfunc.kmctr);
 268                 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
 269                               kvm_s390_available_subfunc.kmf);
 270                 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
 271                               kvm_s390_available_subfunc.kmo);
 272                 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
 273                               kvm_s390_available_subfunc.pcc);
 274         }
 275         if (test_facility(57)) /* MSA5 */
 276                 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
 277                               kvm_s390_available_subfunc.ppno);
 278
 279         if (test_facility(146)) /* MSA8 */
 280                 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
 281                               kvm_s390_available_subfunc.kma);
 282
 283         if (MACHINE_HAS_ESOP)
 284                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
 285         /*
 286          * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
 287          * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
 288          */
 289         if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
 290             !test_facility(3) || !nested)
 291                 return;
 292         allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
 293         if (sclp.has_64bscao)
 294                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
 295         if (sclp.has_siif)
 296                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
 297         if (sclp.has_gpere)
 298                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
 299         if (sclp.has_gsls)
 300                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
 301         if (sclp.has_ib)
 302                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
 303         if (sclp.has_cei)
 304                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
 305         if (sclp.has_ibs)
 306                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
 307         if (sclp.has_kss)
 308                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
 309         /*
 310          * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
 311          * all skey handling functions read/set the skey from the PGSTE
 312          * instead of the real storage key.
 313          *
 314          * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
 315          * pages being detected as preserved although they are resident.
 316          *
 317          * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
 318          * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
 319          *
 320          * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
 321          * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
 322          * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
 323          *
 324          * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
 325          * cannot easily shadow the SCA because of the ipte lock.
 326          */
 327 }
 328
 329 int kvm_arch_init(void *opaque)
 330 {
 331         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
 332         if (!kvm_s390_dbf)
 333                 return -ENOMEM;
 334
 335         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
 336                 debug_unregister(kvm_s390_dbf);
 337                 return -ENOMEM;
 338         }
 339
 340         kvm_s390_cpu_feat_init();
 341
 342         /* Register floating interrupt controller interface. */
 343         return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
 344 }
 345
 346 void kvm_arch_exit(void)
 347 {
 348         debug_unregister(kvm_s390_dbf);
 349 }
 350
 351 /* Section: device related */
 352 long kvm_arch_dev_ioctl(struct file *filp,
 353                         unsigned int ioctl, unsigned long arg)
 354 {
 355         if (ioctl == KVM_S390_ENABLE_SIE)
 356                 return s390_enable_sie();
 357         return -EINVAL;
 358 }
 359
 360 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 361 {
 362         int r;
 363
 364         switch (ext) {
 365         case KVM_CAP_S390_PSW:
 366         case KVM_CAP_S390_GMAP:
 367         case KVM_CAP_SYNC_MMU:
 368 #ifdef CONFIG_KVM_S390_UCONTROL
 369         case KVM_CAP_S390_UCONTROL:
 370 #endif
 371         case KVM_CAP_ASYNC_PF:
 372         case KVM_CAP_SYNC_REGS:
 373         case KVM_CAP_ONE_REG:
 374         case KVM_CAP_ENABLE_CAP:
 375         case KVM_CAP_S390_CSS_SUPPORT:
 376         case KVM_CAP_IOEVENTFD:
 377         case KVM_CAP_DEVICE_CTRL:
 378         case KVM_CAP_ENABLE_CAP_VM:
 379         case KVM_CAP_S390_IRQCHIP:
 380         case KVM_CAP_VM_ATTRIBUTES:
 381         case KVM_CAP_MP_STATE:
 382         case KVM_CAP_IMMEDIATE_EXIT:
 383         case KVM_CAP_S390_INJECT_IRQ:
 384         case KVM_CAP_S390_USER_SIGP:
 385         case KVM_CAP_S390_USER_STSI:
 386         case KVM_CAP_S390_SKEYS:
 387         case KVM_CAP_S390_IRQ_STATE:
 388         case KVM_CAP_S390_USER_INSTR0:
 389         case KVM_CAP_S390_AIS:
 390                 r = 1;
 391                 break;
 392         case KVM_CAP_S390_MEM_OP:
 393                 r = MEM_OP_MAX_SIZE;
 394                 break;
 395         case KVM_CAP_NR_VCPUS:
 396         case KVM_CAP_MAX_VCPUS:
 397                 r = KVM_S390_BSCA_CPU_SLOTS;
 398                 if (!kvm_s390_use_sca_entries())
 399                         r = KVM_MAX_VCPUS;
 400                 else if (sclp.has_esca && sclp.has_64bscao)
 401                         r = KVM_S390_ESCA_CPU_SLOTS;
 402                 break;
 403         case KVM_CAP_NR_MEMSLOTS:
 404                 r = KVM_USER_MEM_SLOTS;
 405                 break;
 406         case KVM_CAP_S390_COW:
 407                 r = MACHINE_HAS_ESOP;
 408                 break;
 409         case KVM_CAP_S390_VECTOR_REGISTERS:
 410                 r = MACHINE_HAS_VX;
 411                 break;
 412         case KVM_CAP_S390_RI:
 413                 r = test_facility(64);
 414                 break;
 415         case KVM_CAP_S390_GS:
 416                 r = test_facility(133);
 417                 break;
 418         default:
 419                 r = 0;
 420         }
 421         return r;
 422 }
 423
 424 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
 425                                         struct kvm_memory_slot *memslot)
 426 {
 427         gfn_t cur_gfn, last_gfn;
 428         unsigned long address;
 429         struct gmap *gmap = kvm->arch.gmap;
 430
 431         /* Loop over all guest pages */
 432         last_gfn = memslot->base_gfn + memslot->npages;
 433         for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
 434                 address = gfn_to_hva_memslot(memslot, cur_gfn);
 435
 436                 if (test_and_clear_guest_dirty(gmap->mm, address))
 437                         mark_page_dirty(kvm, cur_gfn);
 438                 if (fatal_signal_pending(current))
 439                         return;
 440                 cond_resched();
 441         }
 442 }
 443
 444 /* Section: vm related */
 445 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
 446
 447 /*
 448  * Get (and clear) the dirty memory log for a memory slot.
 449  */
 450 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 451                                struct kvm_dirty_log *log)
 452 {
 453         int r;
 454         unsigned long n;
 455         struct kvm_memslots *slots;
 456         struct kvm_memory_slot *memslot;
 457         int is_dirty = 0;
 458
 459         if (kvm_is_ucontrol(kvm))
 460                 return -EINVAL;
 461
 462         mutex_lock(&kvm->slots_lock);
 463
 464         r = -EINVAL;
 465         if (log->slot >= KVM_USER_MEM_SLOTS)
 466                 goto out;
 467
 468         slots = kvm_memslots(kvm);
 469         memslot = id_to_memslot(slots, log->slot);
 470         r = -ENOENT;
 471         if (!memslot->dirty_bitmap)
 472                 goto out;
 473
 474         kvm_s390_sync_dirty_log(kvm, memslot);
 475         r = kvm_get_dirty_log(kvm, log, &is_dirty);
 476         if (r)
 477                 goto out;
 478
 479         /* Clear the dirty log */
 480         if (is_dirty) {
 481                 n = kvm_dirty_bitmap_bytes(memslot);
 482                 memset(memslot->dirty_bitmap, 0, n);
 483         }
 484         r = 0;
 485 out:
 486         mutex_unlock(&kvm->slots_lock);
 487         return r;
 488 }
 489
 490 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
 491 {
 492         unsigned int i;
 493         struct kvm_vcpu *vcpu;
 494
 495         kvm_for_each_vcpu(i, vcpu, kvm) {
 496                 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
 497         }
 498 }
 499
 500 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 501 {
 502         int r;
 503
 504         if (cap->flags)
 505                 return -EINVAL;
 506
 507         switch (cap->cap) {
 508         case KVM_CAP_S390_IRQCHIP:
 509                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
 510                 kvm->arch.use_irqchip = 1;
 511                 r = 0;
 512                 break;
 513         case KVM_CAP_S390_USER_SIGP:
 514                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
 515                 kvm->arch.user_sigp = 1;
 516                 r = 0;
 517                 break;
 518         case KVM_CAP_S390_VECTOR_REGISTERS:
 519                 mutex_lock(&kvm->lock);
 520                 if (kvm->created_vcpus) {
 521                         r = -EBUSY;
 522                 } else if (MACHINE_HAS_VX) {
 523                         set_kvm_facility(kvm->arch.model.fac_mask, 129);
 524                         set_kvm_facility(kvm->arch.model.fac_list, 129);
 525                         if (test_facility(134)) {
 526                                 set_kvm_facility(kvm->arch.model.fac_mask, 134);
 527                                 set_kvm_facility(kvm->arch.model.fac_list, 134);
 528                         }
 529                         if (test_facility(135)) {
 530                                 set_kvm_facility(kvm->arch.model.fac_mask, 135);
 531                                 set_kvm_facility(kvm->arch.model.fac_list, 135);
 532                         }
 533                         r = 0;
 534                 } else
 535                         r = -EINVAL;
 536                 mutex_unlock(&kvm->lock);
 537                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
 538                          r ? "(not available)" : "(success)");
 539                 break;
 540         case KVM_CAP_S390_RI:
 541                 r = -EINVAL;
 542                 mutex_lock(&kvm->lock);
 543                 if (kvm->created_vcpus) {
 544                         r = -EBUSY;
 545                 } else if (test_facility(64)) {
 546                         set_kvm_facility(kvm->arch.model.fac_mask, 64);
 547                         set_kvm_facility(kvm->arch.model.fac_list, 64);
 548                         r = 0;
 549                 }
 550                 mutex_unlock(&kvm->lock);
 551                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
 552                          r ? "(not available)" : "(success)");
 553                 break;
 554         case KVM_CAP_S390_AIS:
 555                 mutex_lock(&kvm->lock);
 556                 if (kvm->created_vcpus) {
 557                         r = -EBUSY;
 558                 } else {
 559                         set_kvm_facility(kvm->arch.model.fac_mask, 72);
 560                         set_kvm_facility(kvm->arch.model.fac_list, 72);
 561                         kvm->arch.float_int.ais_enabled = 1;
 562                         r = 0;
 563                 }
 564                 mutex_unlock(&kvm->lock);
 565                 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
 566                          r ? "(not available)" : "(success)");
 567                 break;
 568         case KVM_CAP_S390_GS:
 569                 r = -EINVAL;
 570                 mutex_lock(&kvm->lock);
 571                 if (atomic_read(&kvm->online_vcpus)) {
 572                         r = -EBUSY;
 573                 } else if (test_facility(133)) {
 574                         set_kvm_facility(kvm->arch.model.fac_mask, 133);
 575                         set_kvm_facility(kvm->arch.model.fac_list, 133);
 576                         r = 0;
 577                 }
 578                 mutex_unlock(&kvm->lock);
 579                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
 580                          r ? "(not available)" : "(success)");
 581                 break;
 582         case KVM_CAP_S390_USER_STSI:
 583                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
 584                 kvm->arch.user_stsi = 1;
 585                 r = 0;
 586                 break;
 587         case KVM_CAP_S390_USER_INSTR0:
 588                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
 589                 kvm->arch.user_instr0 = 1;
 590                 icpt_operexc_on_all_vcpus(kvm);
 591                 r = 0;
 592                 break;
 593         default:
 594                 r = -EINVAL;
 595                 break;
 596         }
 597         return r;
 598 }
 599
 600 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 601 {
 602         int ret;
 603
 604         switch (attr->attr) {
 605         case KVM_S390_VM_MEM_LIMIT_SIZE:
 606                 ret = 0;
 607                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
 608                          kvm->arch.mem_limit);
 609                 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
 610                         ret = -EFAULT;
 611                 break;
 612         default:
 613                 ret = -ENXIO;
 614                 break;
 615         }
 616         return ret;
 617 }
 618
 619 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 620 {
 621         int ret;
 622         unsigned int idx;
 623         switch (attr->attr) {
 624         case KVM_S390_VM_MEM_ENABLE_CMMA:
 625                 ret = -ENXIO;
 626                 if (!sclp.has_cmma)
 627                         break;
 628
 629                 ret = -EBUSY;
 630                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
 631                 mutex_lock(&kvm->lock);
 632                 if (!kvm->created_vcpus) {
 633                         kvm->arch.use_cmma = 1;
 634                         ret = 0;
 635                 }
 636                 mutex_unlock(&kvm->lock);
 637                 break;
 638         case KVM_S390_VM_MEM_CLR_CMMA:
 639                 ret = -ENXIO;
 640                 if (!sclp.has_cmma)
 641                         break;
 642                 ret = -EINVAL;
 643                 if (!kvm->arch.use_cmma)
 644                         break;
 645
 646                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
 647                 mutex_lock(&kvm->lock);
 648                 idx = srcu_read_lock(&kvm->srcu);
 649                 s390_reset_cmma(kvm->arch.gmap->mm);
 650                 srcu_read_unlock(&kvm->srcu, idx);
 651                 mutex_unlock(&kvm->lock);
 652                 ret = 0;
 653                 break;
 654         case KVM_S390_VM_MEM_LIMIT_SIZE: {
 655                 unsigned long new_limit;
 656
 657                 if (kvm_is_ucontrol(kvm))
 658                         return -EINVAL;
 659
 660                 if (get_user(new_limit, (u64 __user *)attr->addr))
 661                         return -EFAULT;
 662
 663                 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
 664                     new_limit > kvm->arch.mem_limit)
 665                         return -E2BIG;
 666
 667                 if (!new_limit)
 668                         return -EINVAL;
 669
 670                 /* gmap_create takes last usable address */
 671                 if (new_limit != KVM_S390_NO_MEM_LIMIT)
 672                         new_limit -= 1;
 673
 674                 ret = -EBUSY;
 675                 mutex_lock(&kvm->lock);
 676                 if (!kvm->created_vcpus) {
 677                         /* gmap_create will round the limit up */
 678                         struct gmap *new = gmap_create(current->mm, new_limit);
 679
 680                         if (!new) {
 681                                 ret = -ENOMEM;
 682                         } else {
 683                                 gmap_remove(kvm->arch.gmap);
 684                                 new->private = kvm;
 685                                 kvm->arch.gmap = new;
 686                                 ret = 0;
 687                         }
 688                 }
 689                 mutex_unlock(&kvm->lock);
 690                 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
 691                 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
 692                          (void *) kvm->arch.gmap->asce);
 693                 break;
 694         }
 695         default:
 696                 ret = -ENXIO;
 697                 break;
 698         }
 699         return ret;
 700 }
 701
 702 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
 703
 704 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
 705 {
 706         struct kvm_vcpu *vcpu;
 707         int i;
 708
 709         if (!test_kvm_facility(kvm, 76))
 710                 return -EINVAL;
 711
 712         mutex_lock(&kvm->lock);
 713         switch (attr->attr) {
 714         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
 715                 get_random_bytes(
 716                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
 717                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 718                 kvm->arch.crypto.aes_kw = 1;
 719                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
 720                 break;
 721         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
 722                 get_random_bytes(
 723                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
 724                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 725                 kvm->arch.crypto.dea_kw = 1;
 726                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
 727                 break;
 728         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
 729                 kvm->arch.crypto.aes_kw = 0;
 730                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
 731                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 732                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
 733                 break;
 734         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
 735                 kvm->arch.crypto.dea_kw = 0;
 736                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
 737                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 738                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
 739                 break;
 740         default:
 741                 mutex_unlock(&kvm->lock);
 742                 return -ENXIO;
 743         }
 744
 745         kvm_for_each_vcpu(i, vcpu, kvm) {
 746                 kvm_s390_vcpu_crypto_setup(vcpu);
 747                 exit_sie(vcpu);
 748         }
 749         mutex_unlock(&kvm->lock);
 750         return 0;
 751 }
 752
 753 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
 754 {
 755         u8 gtod_high;
 756
 757         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
 758                                            sizeof(gtod_high)))
 759                 return -EFAULT;
 760
 761         if (gtod_high != 0)
 762                 return -EINVAL;
 763         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
 764
 765         return 0;
 766 }
 767
 768 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
 769 {
 770         u64 gtod;
 771
 772         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
 773                 return -EFAULT;
 774
 775         kvm_s390_set_tod_clock(kvm, gtod);
 776         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
 777         return 0;
 778 }
 779
 780 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
 781 {
 782         int ret;
 783
 784         if (attr->flags)
 785                 return -EINVAL;
 786
 787         switch (attr->attr) {
 788         case KVM_S390_VM_TOD_HIGH:
 789                 ret = kvm_s390_set_tod_high(kvm, attr);
 790                 break;
 791         case KVM_S390_VM_TOD_LOW:
 792                 ret = kvm_s390_set_tod_low(kvm, attr);
 793                 break;
 794         default:
 795                 ret = -ENXIO;
 796                 break;
 797         }
 798         return ret;
 799 }
 800
 801 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
 802 {
 803         u8 gtod_high = 0;
 804
 805         if (copy_to_user((void __user *)attr->addr, &gtod_high,
 806                                          sizeof(gtod_high)))
 807                 return -EFAULT;
 808         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
 809
 810         return 0;
 811 }
 812
 813 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
 814 {
 815         u64 gtod;
 816
 817         gtod = kvm_s390_get_tod_clock_fast(kvm);
 818         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
 819                 return -EFAULT;
 820         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
 821
 822         return 0;
 823 }
 824
 825 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
 826 {
 827         int ret;
 828
 829         if (attr->flags)
 830                 return -EINVAL;
 831
 832         switch (attr->attr) {
 833         case KVM_S390_VM_TOD_HIGH:
 834                 ret = kvm_s390_get_tod_high(kvm, attr);
 835                 break;
 836         case KVM_S390_VM_TOD_LOW:
 837                 ret = kvm_s390_get_tod_low(kvm, attr);
 838                 break;
 839         default:
 840                 ret = -ENXIO;
 841                 break;
 842         }
 843         return ret;
 844 }
 845
 846 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
 847 {
 848         struct kvm_s390_vm_cpu_processor *proc;
 849         u16 lowest_ibc, unblocked_ibc;
 850         int ret = 0;
 851
 852         mutex_lock(&kvm->lock);
 853         if (kvm->created_vcpus) {
 854                 ret = -EBUSY;
 855                 goto out;
 856         }
 857         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
 858         if (!proc) {
 859                 ret = -ENOMEM;
 860                 goto out;
 861         }
 862         if (!copy_from_user(proc, (void __user *)attr->addr,
 863                             sizeof(*proc))) {
 864                 kvm->arch.model.cpuid = proc->cpuid;
 865                 lowest_ibc = sclp.ibc >> 16 & 0xfff;
 866                 unblocked_ibc = sclp.ibc & 0xfff;
 867                 if (lowest_ibc && proc->ibc) {
 868                         if (proc->ibc > unblocked_ibc)
 869                                 kvm->arch.model.ibc = unblocked_ibc;
 870                         else if (proc->ibc < lowest_ibc)
 871                                 kvm->arch.model.ibc = lowest_ibc;
 872                         else
 873                                 kvm->arch.model.ibc = proc->ibc;
 874                 }
 875                 memcpy(kvm->arch.model.fac_list, proc->fac_list,
 876                        S390_ARCH_FAC_LIST_SIZE_BYTE);
 877                 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
 878                          kvm->arch.model.ibc,
 879                          kvm->arch.model.cpuid);
 880                 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
 881                          kvm->arch.model.fac_list[0],
 882                          kvm->arch.model.fac_list[1],
 883                          kvm->arch.model.fac_list[2]);
 884         } else
 885                 ret = -EFAULT;
 886         kfree(proc);
 887 out:
 888         mutex_unlock(&kvm->lock);
 889         return ret;
 890 }
 891
 892 static int kvm_s390_set_processor_feat(struct kvm *kvm,
 893                                        struct kvm_device_attr *attr)
 894 {
 895         struct kvm_s390_vm_cpu_feat data;
 896         int ret = -EBUSY;
 897
 898         if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
 899                 return -EFAULT;
 900         if (!bitmap_subset((unsigned long *) data.feat,
 901                            kvm_s390_available_cpu_feat,
 902                            KVM_S390_VM_CPU_FEAT_NR_BITS))
 903                 return -EINVAL;
 904
 905         mutex_lock(&kvm->lock);
 906         if (!atomic_read(&kvm->online_vcpus)) {
 907                 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
 908                             KVM_S390_VM_CPU_FEAT_NR_BITS);
 909                 ret = 0;
 910         }
 911         mutex_unlock(&kvm->lock);
 912         return ret;
 913 }
 914
 915 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
 916                                           struct kvm_device_attr *attr)
 917 {
 918         /*
 919          * Once supported by kernel + hw, we have to store the subfunctions
 920          * in kvm->arch and remember that user space configured them.
 921          */
 922         return -ENXIO;
 923 }
 924
 925 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
 926 {
 927         int ret = -ENXIO;
 928
 929         switch (attr->attr) {
 930         case KVM_S390_VM_CPU_PROCESSOR:
 931                 ret = kvm_s390_set_processor(kvm, attr);
 932                 break;
 933         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
 934                 ret = kvm_s390_set_processor_feat(kvm, attr);
 935                 break;
 936         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
 937                 ret = kvm_s390_set_processor_subfunc(kvm, attr);
 938                 break;
 939         }
 940         return ret;
 941 }
 942
 943 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
 944 {
 945         struct kvm_s390_vm_cpu_processor *proc;
 946         int ret = 0;
 947
 948         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
 949         if (!proc) {
 950                 ret = -ENOMEM;
 951                 goto out;
 952         }
 953         proc->cpuid = kvm->arch.model.cpuid;
 954         proc->ibc = kvm->arch.model.ibc;
 955         memcpy(&proc->fac_list, kvm->arch.model.fac_list,
 956                S390_ARCH_FAC_LIST_SIZE_BYTE);
 957         VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
 958                  kvm->arch.model.ibc,
 959                  kvm->arch.model.cpuid);
 960         VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
 961                  kvm->arch.model.fac_list[0],
 962                  kvm->arch.model.fac_list[1],
 963                  kvm->arch.model.fac_list[2]);
 964         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
 965                 ret = -EFAULT;
 966         kfree(proc);
 967 out:
 968         return ret;
 969 }
 970
 971 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
 972 {
 973         struct kvm_s390_vm_cpu_machine *mach;
 974         int ret = 0;
 975
 976         mach = kzalloc(sizeof(*mach), GFP_KERNEL);
 977         if (!mach) {
 978                 ret = -ENOMEM;
 979                 goto out;
 980         }
 981         get_cpu_id((struct cpuid *) &mach->cpuid);
 982         mach->ibc = sclp.ibc;
 983         memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
 984                S390_ARCH_FAC_LIST_SIZE_BYTE);
 985         memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
 986                sizeof(S390_lowcore.stfle_fac_list));
 987         VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
 988                  kvm->arch.model.ibc,
 989                  kvm->arch.model.cpuid);
 990         VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
 991                  mach->fac_mask[0],
 992                  mach->fac_mask[1],
 993                  mach->fac_mask[2]);
 994         VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
 995                  mach->fac_list[0],
 996                  mach->fac_list[1],
 997                  mach->fac_list[2]);
 998         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
 999                 ret = -EFAULT;
1000         kfree(mach);
1001 out:
1002         return ret;
1003 }
1004
1005 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1006                                        struct kvm_device_attr *attr)
1007 {
1008         struct kvm_s390_vm_cpu_feat data;
1009
1010         bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1011                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1012         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1013                 return -EFAULT;
1014         return 0;
1015 }
1016
1017 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1018                                      struct kvm_device_attr *attr)
1019 {
1020         struct kvm_s390_vm_cpu_feat data;
1021
1022         bitmap_copy((unsigned long *) data.feat,
1023                     kvm_s390_available_cpu_feat,
1024                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1025         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1026                 return -EFAULT;
1027         return 0;
1028 }
1029
1030 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1031                                           struct kvm_device_attr *attr)
1032 {
1033         /*
1034          * Once we can actually configure subfunctions (kernel + hw support),
1035          * we have to check if they were already set by user space, if so copy
1036          * them from kvm->arch.
1037          */
1038         return -ENXIO;
1039 }
1040
1041 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1042                                         struct kvm_device_attr *attr)
1043 {
1044         if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1045             sizeof(struct kvm_s390_vm_cpu_subfunc)))
1046                 return -EFAULT;
1047         return 0;
1048 }
1049 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1050 {
1051         int ret = -ENXIO;
1052
1053         switch (attr->attr) {
1054         case KVM_S390_VM_CPU_PROCESSOR:
1055                 ret = kvm_s390_get_processor(kvm, attr);
1056                 break;
1057         case KVM_S390_VM_CPU_MACHINE:
1058                 ret = kvm_s390_get_machine(kvm, attr);
1059                 break;
1060         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1061                 ret = kvm_s390_get_processor_feat(kvm, attr);
1062                 break;
1063         case KVM_S390_VM_CPU_MACHINE_FEAT:
1064                 ret = kvm_s390_get_machine_feat(kvm, attr);
1065                 break;
1066         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1067                 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1068                 break;
1069         case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1070                 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1071                 break;
1072         }
1073         return ret;
1074 }
1075
1076 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1077 {
1078         int ret;
1079
1080         switch (attr->group) {
1081         case KVM_S390_VM_MEM_CTRL:
1082                 ret = kvm_s390_set_mem_control(kvm, attr);
1083                 break;
1084         case KVM_S390_VM_TOD:
1085                 ret = kvm_s390_set_tod(kvm, attr);
1086                 break;
1087         case KVM_S390_VM_CPU_MODEL:
1088                 ret = kvm_s390_set_cpu_model(kvm, attr);
1089                 break;
1090         case KVM_S390_VM_CRYPTO:
1091                 ret = kvm_s390_vm_set_crypto(kvm, attr);
1092                 break;
1093         default:
1094                 ret = -ENXIO;
1095                 break;
1096         }
1097
1098         return ret;
1099 }
1100
1101 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1102 {
1103         int ret;
1104
1105         switch (attr->group) {
1106         case KVM_S390_VM_MEM_CTRL:
1107                 ret = kvm_s390_get_mem_control(kvm, attr);
1108                 break;
1109         case KVM_S390_VM_TOD:
1110                 ret = kvm_s390_get_tod(kvm, attr);
1111                 break;
1112         case KVM_S390_VM_CPU_MODEL:
1113                 ret = kvm_s390_get_cpu_model(kvm, attr);
1114                 break;
1115         default:
1116                 ret = -ENXIO;
1117                 break;
1118         }
1119
1120         return ret;
1121 }
1122
1123 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1124 {
1125         int ret;
1126
1127         switch (attr->group) {
1128         case KVM_S390_VM_MEM_CTRL:
1129                 switch (attr->attr) {
1130                 case KVM_S390_VM_MEM_ENABLE_CMMA:
1131                 case KVM_S390_VM_MEM_CLR_CMMA:
1132                         ret = sclp.has_cmma ? 0 : -ENXIO;
1133                         break;
1134                 case KVM_S390_VM_MEM_LIMIT_SIZE:
1135                         ret = 0;
1136                         break;
1137                 default:
1138                         ret = -ENXIO;
1139                         break;
1140                 }
1141                 break;
1142         case KVM_S390_VM_TOD:
1143                 switch (attr->attr) {
1144                 case KVM_S390_VM_TOD_LOW:
1145                 case KVM_S390_VM_TOD_HIGH:
1146                         ret = 0;
1147                         break;
1148                 default:
1149                         ret = -ENXIO;
1150                         break;
1151                 }
1152                 break;
1153         case KVM_S390_VM_CPU_MODEL:
1154                 switch (attr->attr) {
1155                 case KVM_S390_VM_CPU_PROCESSOR:
1156                 case KVM_S390_VM_CPU_MACHINE:
1157                 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1158                 case KVM_S390_VM_CPU_MACHINE_FEAT:
1159                 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1160                         ret = 0;
1161                         break;
1162                 /* configuring subfunctions is not supported yet */
1163                 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1164                 default:
1165                         ret = -ENXIO;
1166                         break;
1167                 }
1168                 break;
1169         case KVM_S390_VM_CRYPTO:
1170                 switch (attr->attr) {
1171                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1172                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1173                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1174                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1175                         ret = 0;
1176                         break;
1177                 default:
1178                         ret = -ENXIO;
1179                         break;
1180                 }
1181                 break;
1182         default:
1183                 ret = -ENXIO;
1184                 break;
1185         }
1186
1187         return ret;
1188 }
1189
1190 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1191 {
1192         uint8_t *keys;
1193         uint64_t hva;
1194         int i, r = 0;
1195
1196         if (args->flags != 0)
1197                 return -EINVAL;
1198
1199         /* Is this guest using storage keys? */
1200         if (!mm_use_skey(current->mm))
1201                 return KVM_S390_GET_SKEYS_NONE;
1202
1203         /* Enforce sane limit on memory allocation */
1204         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1205                 return -EINVAL;
1206
1207         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1208         if (!keys)
1209                 return -ENOMEM;
1210
1211         down_read(&current->mm->mmap_sem);
1212         for (i = 0; i < args->count; i++) {
1213                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1214                 if (kvm_is_error_hva(hva)) {
1215                         r = -EFAULT;
1216                         break;
1217                 }
1218
1219                 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1220                 if (r)
1221                         break;
1222         }
1223         up_read(&current->mm->mmap_sem);
1224
1225         if (!r) {
1226                 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1227                                  sizeof(uint8_t) * args->count);
1228                 if (r)
1229                         r = -EFAULT;
1230         }
1231
1232         kvfree(keys);
1233         return r;
1234 }
1235
1236 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1237 {
1238         uint8_t *keys;
1239         uint64_t hva;
1240         int i, r = 0;
1241
1242         if (args->flags != 0)
1243                 return -EINVAL;
1244
1245         /* Enforce sane limit on memory allocation */
1246         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1247                 return -EINVAL;
1248
1249         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1250         if (!keys)
1251                 return -ENOMEM;
1252
1253         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1254                            sizeof(uint8_t) * args->count);
1255         if (r) {
1256                 r = -EFAULT;
1257                 goto out;
1258         }
1259
1260         /* Enable storage key handling for the guest */
1261         r = s390_enable_skey();
1262         if (r)
1263                 goto out;
1264
1265         down_read(&current->mm->mmap_sem);
1266         for (i = 0; i < args->count; i++) {
1267                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1268                 if (kvm_is_error_hva(hva)) {
1269                         r = -EFAULT;
1270                         break;
1271                 }
1272
1273                 /* Lowest order bit is reserved */
1274                 if (keys[i] & 0x01) {
1275                         r = -EINVAL;
1276                         break;
1277                 }
1278
1279                 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1280                 if (r)
1281                         break;
1282         }
1283         up_read(&current->mm->mmap_sem);
1284 out:
1285         kvfree(keys);
1286         return r;
1287 }
1288
1289 long kvm_arch_vm_ioctl(struct file *filp,
1290                        unsigned int ioctl, unsigned long arg)
1291 {
1292         struct kvm *kvm = filp->private_data;
1293         void __user *argp = (void __user *)arg;
1294         struct kvm_device_attr attr;
1295         int r;
1296
1297         switch (ioctl) {
1298         case KVM_S390_INTERRUPT: {
1299                 struct kvm_s390_interrupt s390int;
1300
1301                 r = -EFAULT;
1302                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1303                         break;
1304                 r = kvm_s390_inject_vm(kvm, &s390int);
1305                 break;
1306         }
1307         case KVM_ENABLE_CAP: {
1308                 struct kvm_enable_cap cap;
1309                 r = -EFAULT;
1310                 if (copy_from_user(&cap, argp, sizeof(cap)))
1311                         break;
1312                 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1313                 break;
1314         }
1315         case KVM_CREATE_IRQCHIP: {
1316                 struct kvm_irq_routing_entry routing;
1317
1318                 r = -EINVAL;
1319                 if (kvm->arch.use_irqchip) {
1320                         /* Set up dummy routing. */
1321                         memset(&routing, 0, sizeof(routing));
1322                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1323                 }
1324                 break;
1325         }
1326         case KVM_SET_DEVICE_ATTR: {
1327                 r = -EFAULT;
1328                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1329                         break;
1330                 r = kvm_s390_vm_set_attr(kvm, &attr);
1331                 break;
1332         }
1333         case KVM_GET_DEVICE_ATTR: {
1334                 r = -EFAULT;
1335                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1336                         break;
1337                 r = kvm_s390_vm_get_attr(kvm, &attr);
1338                 break;
1339         }
1340         case KVM_HAS_DEVICE_ATTR: {
1341                 r = -EFAULT;
1342                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1343                         break;
1344                 r = kvm_s390_vm_has_attr(kvm, &attr);
1345                 break;
1346         }
1347         case KVM_S390_GET_SKEYS: {
1348                 struct kvm_s390_skeys args;
1349
1350                 r = -EFAULT;
1351                 if (copy_from_user(&args, argp,
1352                                    sizeof(struct kvm_s390_skeys)))
1353                         break;
1354                 r = kvm_s390_get_skeys(kvm, &args);
1355                 break;
1356         }
1357         case KVM_S390_SET_SKEYS: {
1358                 struct kvm_s390_skeys args;
1359
1360                 r = -EFAULT;
1361                 if (copy_from_user(&args, argp,
1362                                    sizeof(struct kvm_s390_skeys)))
1363                         break;
1364                 r = kvm_s390_set_skeys(kvm, &args);
1365                 break;
1366         }
1367         default:
1368                 r = -ENOTTY;
1369         }
1370
1371         return r;
1372 }
1373
1374 static int kvm_s390_query_ap_config(u8 *config)
1375 {
1376         u32 fcn_code = 0x04000000UL;
1377         u32 cc = 0;
1378
1379         memset(config, 0, 128);
1380         asm volatile(
1381                 "lgr 0,%1\n"
1382                 "lgr 2,%2\n"
1383                 ".long 0xb2af0000\n"            /* PQAP(QCI) */
1384                 "0: ipm %0\n"
1385                 "srl %0,28\n"
1386                 "1:\n"
1387                 EX_TABLE(0b, 1b)
1388                 : "+r" (cc)
1389                 : "r" (fcn_code), "r" (config)
1390                 : "cc", "0", "2", "memory"
1391         );
1392
1393         return cc;
1394 }
1395
1396 static int kvm_s390_apxa_installed(void)
1397 {
1398         u8 config[128];
1399         int cc;
1400
1401         if (test_facility(12)) {
1402                 cc = kvm_s390_query_ap_config(config);
1403
1404                 if (cc)
1405                         pr_err("PQAP(QCI) failed with cc=%d", cc);
1406                 else
1407                         return config[0] & 0x40;
1408         }
1409
1410         return 0;
1411 }
1412
1413 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1414 {
1415         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1416
1417         if (kvm_s390_apxa_installed())
1418                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1419         else
1420                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1421 }
1422
1423 static u64 kvm_s390_get_initial_cpuid(void)
1424 {
1425         struct cpuid cpuid;
1426
1427         get_cpu_id(&cpuid);
1428         cpuid.version = 0xff;
1429         return *((u64 *) &cpuid);
1430 }
1431
1432 static void kvm_s390_crypto_init(struct kvm *kvm)
1433 {
1434         if (!test_kvm_facility(kvm, 76))
1435                 return;
1436
1437         kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1438         kvm_s390_set_crycb_format(kvm);
1439
1440         /* Enable AES/DEA protected key functions by default */
1441         kvm->arch.crypto.aes_kw = 1;
1442         kvm->arch.crypto.dea_kw = 1;
1443         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1444                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1445         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1446                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1447 }
1448
1449 static void sca_dispose(struct kvm *kvm)
1450 {
1451         if (kvm->arch.use_esca)
1452                 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1453         else
1454                 free_page((unsigned long)(kvm->arch.sca));
1455         kvm->arch.sca = NULL;
1456 }
1457
1458 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1459 {
1460         gfp_t alloc_flags = GFP_KERNEL;
1461         int i, rc;
1462         char debug_name[16];
1463         static unsigned long sca_offset;
1464
1465         rc = -EINVAL;
1466 #ifdef CONFIG_KVM_S390_UCONTROL
1467         if (type & ~KVM_VM_S390_UCONTROL)
1468                 goto out_err;
1469         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1470                 goto out_err;
1471 #else
1472         if (type)
1473                 goto out_err;
1474 #endif
1475
1476         rc = s390_enable_sie();
1477         if (rc)
1478                 goto out_err;
1479
1480         rc = -ENOMEM;
1481
1482         ratelimit_state_init(&kvm->arch.sthyi_limit, 5 * HZ, 500);
1483
1484         kvm->arch.use_esca = 0; /* start with basic SCA */
1485         if (!sclp.has_64bscao)
1486                 alloc_flags |= GFP_DMA;
1487         rwlock_init(&kvm->arch.sca_lock);
1488         kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1489         if (!kvm->arch.sca)
1490                 goto out_err;
1491         spin_lock(&kvm_lock);
1492         sca_offset += 16;
1493         if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1494                 sca_offset = 0;
1495         kvm->arch.sca = (struct bsca_block *)
1496                         ((char *) kvm->arch.sca + sca_offset);
1497         spin_unlock(&kvm_lock);
1498
1499         sprintf(debug_name, "kvm-%u", current->pid);
1500
1501         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1502         if (!kvm->arch.dbf)
1503                 goto out_err;
1504
1505         kvm->arch.sie_page2 =
1506              (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1507         if (!kvm->arch.sie_page2)
1508                 goto out_err;
1509
1510         /* Populate the facility mask initially. */
1511         memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1512                sizeof(S390_lowcore.stfle_fac_list));
1513         for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1514                 if (i < kvm_s390_fac_list_mask_size())
1515                         kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1516                 else
1517                         kvm->arch.model.fac_mask[i] = 0UL;
1518         }
1519
1520         /* Populate the facility list initially. */
1521         kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1522         memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1523                S390_ARCH_FAC_LIST_SIZE_BYTE);
1524
1525         set_kvm_facility(kvm->arch.model.fac_mask, 74);
1526         set_kvm_facility(kvm->arch.model.fac_list, 74);
1527
1528         kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
1529         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1530
1531         kvm_s390_crypto_init(kvm);
1532
1533         mutex_init(&kvm->arch.float_int.ais_lock);
1534         kvm->arch.float_int.simm = 0;
1535         kvm->arch.float_int.nimm = 0;
1536         kvm->arch.float_int.ais_enabled = 0;
1537         spin_lock_init(&kvm->arch.float_int.lock);
1538         for (i = 0; i < FIRQ_LIST_COUNT; i++)
1539                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1540         init_waitqueue_head(&kvm->arch.ipte_wq);
1541         mutex_init(&kvm->arch.ipte_mutex);
1542
1543         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1544         VM_EVENT(kvm, 3, "vm created with type %lu", type);
1545
1546         if (type & KVM_VM_S390_UCONTROL) {
1547                 kvm->arch.gmap = NULL;
1548                 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1549         } else {
1550                 if (sclp.hamax == U64_MAX)
1551                         kvm->arch.mem_limit = TASK_SIZE_MAX;
1552                 else
1553                         kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
1554                                                     sclp.hamax + 1);
1555                 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
1556                 if (!kvm->arch.gmap)
1557                         goto out_err;
1558                 kvm->arch.gmap->private = kvm;
1559                 kvm->arch.gmap->pfault_enabled = 0;
1560         }
1561
1562         kvm->arch.css_support = 0;
1563         kvm->arch.use_irqchip = 0;
1564         kvm->arch.epoch = 0;
1565
1566         spin_lock_init(&kvm->arch.start_stop_lock);
1567         kvm_s390_vsie_init(kvm);
1568         KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1569
1570         return 0;
1571 out_err:
1572         free_page((unsigned long)kvm->arch.sie_page2);
1573         debug_unregister(kvm->arch.dbf);
1574         sca_dispose(kvm);
1575         KVM_EVENT(3, "creation of vm failed: %d", rc);
1576         return rc;
1577 }
1578
1579 bool kvm_arch_has_vcpu_debugfs(void)
1580 {
1581         return false;
1582 }
1583
1584 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
1585 {
1586         return 0;
1587 }
1588
1589 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1590 {
1591         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1592         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1593         kvm_s390_clear_local_irqs(vcpu);
1594         kvm_clear_async_pf_completion_queue(vcpu);
1595         if (!kvm_is_ucontrol(vcpu->kvm))
1596                 sca_del_vcpu(vcpu);
1597
1598         if (kvm_is_ucontrol(vcpu->kvm))
1599                 gmap_remove(vcpu->arch.gmap);
1600
1601         if (vcpu->kvm->arch.use_cmma)
1602                 kvm_s390_vcpu_unsetup_cmma(vcpu);
1603         free_page((unsigned long)(vcpu->arch.sie_block));
1604
1605         kvm_vcpu_uninit(vcpu);
1606         kmem_cache_free(kvm_vcpu_cache, vcpu);
1607 }
1608
1609 static void kvm_free_vcpus(struct kvm *kvm)
1610 {
1611         unsigned int i;
1612         struct kvm_vcpu *vcpu;
1613
1614         kvm_for_each_vcpu(i, vcpu, kvm)
1615                 kvm_arch_vcpu_destroy(vcpu);
1616
1617         mutex_lock(&kvm->lock);
1618         for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1619                 kvm->vcpus[i] = NULL;
1620
1621         atomic_set(&kvm->online_vcpus, 0);
1622         mutex_unlock(&kvm->lock);
1623 }
1624
1625 void kvm_arch_destroy_vm(struct kvm *kvm)
1626 {
1627         kvm_free_vcpus(kvm);
1628         sca_dispose(kvm);
1629         debug_unregister(kvm->arch.dbf);
1630         free_page((unsigned long)kvm->arch.sie_page2);
1631         if (!kvm_is_ucontrol(kvm))
1632                 gmap_remove(kvm->arch.gmap);
1633         kvm_s390_destroy_adapters(kvm);
1634         kvm_s390_clear_float_irqs(kvm);
1635         kvm_s390_vsie_destroy(kvm);
1636         KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
1637 }
1638
1639 /* Section: vcpu related */
1640 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1641 {
1642         vcpu->arch.gmap = gmap_create(current->mm, -1UL);
1643         if (!vcpu->arch.gmap)
1644                 return -ENOMEM;
1645         vcpu->arch.gmap->private = vcpu->kvm;
1646
1647         return 0;
1648 }
1649
1650 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
1651 {
1652         if (!kvm_s390_use_sca_entries())
1653                 return;
1654         read_lock(&vcpu->kvm->arch.sca_lock);
1655         if (vcpu->kvm->arch.use_esca) {
1656                 struct esca_block *sca = vcpu->kvm->arch.sca;
1657
1658                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1659                 sca->cpu[vcpu->vcpu_id].sda = 0;
1660         } else {
1661                 struct bsca_block *sca = vcpu->kvm->arch.sca;
1662
1663                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1664                 sca->cpu[vcpu->vcpu_id].sda = 0;
1665         }
1666         read_unlock(&vcpu->kvm->arch.sca_lock);
1667 }
1668
1669 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
1670 {
1671         if (!kvm_s390_use_sca_entries()) {
1672                 struct bsca_block *sca = vcpu->kvm->arch.sca;
1673
1674                 /* we still need the basic sca for the ipte control */
1675                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1676                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
1677         }
1678         read_lock(&vcpu->kvm->arch.sca_lock);
1679         if (vcpu->kvm->arch.use_esca) {
1680                 struct esca_block *sca = vcpu->kvm->arch.sca;
1681
1682                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1683                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1684                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
1685                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
1686                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1687         } else {
1688                 struct bsca_block *sca = vcpu->kvm->arch.sca;
1689
1690                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1691                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1692                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
1693                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1694         }
1695         read_unlock(&vcpu->kvm->arch.sca_lock);
1696 }
1697
1698 /* Basic SCA to Extended SCA data copy routines */
1699 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
1700 {
1701         d->sda = s->sda;
1702         d->sigp_ctrl.c = s->sigp_ctrl.c;
1703         d->sigp_ctrl.scn = s->sigp_ctrl.scn;
1704 }
1705
1706 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
1707 {
1708         int i;
1709
1710         d->ipte_control = s->ipte_control;
1711         d->mcn[0] = s->mcn;
1712         for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
1713                 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
1714 }
1715
1716 static int sca_switch_to_extended(struct kvm *kvm)
1717 {
1718         struct bsca_block *old_sca = kvm->arch.sca;
1719         struct esca_block *new_sca;
1720         struct kvm_vcpu *vcpu;
1721         unsigned int vcpu_idx;
1722         u32 scaol, scaoh;
1723
1724         new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
1725         if (!new_sca)
1726                 return -ENOMEM;
1727
1728         scaoh = (u32)((u64)(new_sca) >> 32);
1729         scaol = (u32)(u64)(new_sca) & ~0x3fU;
1730
1731         kvm_s390_vcpu_block_all(kvm);
1732         write_lock(&kvm->arch.sca_lock);
1733
1734         sca_copy_b_to_e(new_sca, old_sca);
1735
1736         kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
1737                 vcpu->arch.sie_block->scaoh = scaoh;
1738                 vcpu->arch.sie_block->scaol = scaol;
1739                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
1740         }
1741         kvm->arch.sca = new_sca;
1742         kvm->arch.use_esca = 1;
1743
1744         write_unlock(&kvm->arch.sca_lock);
1745         kvm_s390_vcpu_unblock_all(kvm);
1746
1747         free_page((unsigned long)old_sca);
1748
1749         VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
1750                  old_sca, kvm->arch.sca);
1751         return 0;
1752 }
1753
1754 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
1755 {
1756         int rc;
1757
1758         if (!kvm_s390_use_sca_entries()) {
1759                 if (id < KVM_MAX_VCPUS)
1760                         return true;
1761                 return false;
1762         }
1763         if (id < KVM_S390_BSCA_CPU_SLOTS)
1764                 return true;
1765         if (!sclp.has_esca || !sclp.has_64bscao)
1766                 return false;
1767
1768         mutex_lock(&kvm->lock);
1769         rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
1770         mutex_unlock(&kvm->lock);
1771
1772         return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
1773 }
1774
1775 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1776 {
1777         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1778         kvm_clear_async_pf_completion_queue(vcpu);
1779         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
1780                                     KVM_SYNC_GPRS |
1781                                     KVM_SYNC_ACRS |
1782                                     KVM_SYNC_CRS |
1783                                     KVM_SYNC_ARCH0 |
1784                                     KVM_SYNC_PFAULT;
1785         kvm_s390_set_prefix(vcpu, 0);
1786         if (test_kvm_facility(vcpu->kvm, 64))
1787                 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
1788         if (test_kvm_facility(vcpu->kvm, 133))
1789                 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
1790         /* fprs can be synchronized via vrs, even if the guest has no vx. With
1791          * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
1792          */
1793         if (MACHINE_HAS_VX)
1794                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
1795         else
1796                 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
1797
1798         if (kvm_is_ucontrol(vcpu->kvm))
1799                 return __kvm_ucontrol_vcpu_init(vcpu);
1800
1801         return 0;
1802 }
1803
1804 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1805 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1806 {
1807         WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
1808         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1809         vcpu->arch.cputm_start = get_tod_clock_fast();
1810         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1811 }
1812
1813 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1814 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1815 {
1816         WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
1817         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1818         vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1819         vcpu->arch.cputm_start = 0;
1820         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1821 }
1822
1823 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1824 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1825 {
1826         WARN_ON_ONCE(vcpu->arch.cputm_enabled);
1827         vcpu->arch.cputm_enabled = true;
1828         __start_cpu_timer_accounting(vcpu);
1829 }
1830
1831 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1832 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1833 {
1834         WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
1835         __stop_cpu_timer_accounting(vcpu);
1836         vcpu->arch.cputm_enabled = false;
1837 }
1838
1839 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1840 {
1841         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1842         __enable_cpu_timer_accounting(vcpu);
1843         preempt_enable();
1844 }
1845
1846 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1847 {
1848         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1849         __disable_cpu_timer_accounting(vcpu);
1850         preempt_enable();
1851 }
1852
1853 /* set the cpu timer - may only be called from the VCPU thread itself */
1854 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
1855 {
1856         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1857         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1858         if (vcpu->arch.cputm_enabled)
1859                 vcpu->arch.cputm_start = get_tod_clock_fast();
1860         vcpu->arch.sie_block->cputm = cputm;
1861         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1862         preempt_enable();
1863 }
1864
1865 /* update and get the cpu timer - can also be called from other VCPU threads */
1866 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
1867 {
1868         unsigned int seq;
1869         __u64 value;
1870
1871         if (unlikely(!vcpu->arch.cputm_enabled))
1872                 return vcpu->arch.sie_block->cputm;
1873
1874         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1875         do {
1876                 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
1877                 /*
1878                  * If the writer would ever execute a read in the critical
1879                  * section, e.g. in irq context, we have a deadlock.
1880                  */
1881                 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
1882                 value = vcpu->arch.sie_block->cputm;
1883                 /* if cputm_start is 0, accounting is being started/stopped */
1884                 if (likely(vcpu->arch.cputm_start))
1885                         value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1886         } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
1887         preempt_enable();
1888         return value;
1889 }
1890
1891 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1892 {
1893
1894         gmap_enable(vcpu->arch.enabled_gmap);
1895         atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1896         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1897                 __start_cpu_timer_accounting(vcpu);
1898         vcpu->cpu = cpu;
1899 }
1900
1901 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1902 {
1903         vcpu->cpu = -1;
1904         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1905                 __stop_cpu_timer_accounting(vcpu);
1906         atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1907         vcpu->arch.enabled_gmap = gmap_get_enabled();
1908         gmap_disable(vcpu->arch.enabled_gmap);
1909
1910 }
1911
1912 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
1913 {
1914         /* this equals initial cpu reset in pop, but we don't switch to ESA */
1915         vcpu->arch.sie_block->gpsw.mask = 0UL;
1916         vcpu->arch.sie_block->gpsw.addr = 0UL;
1917         kvm_s390_set_prefix(vcpu, 0);
1918         kvm_s390_set_cpu_timer(vcpu, 0);
1919         vcpu->arch.sie_block->ckc       = 0UL;
1920         vcpu->arch.sie_block->todpr     = 0;
1921         memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
1922         vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
1923         vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
1924         /* make sure the new fpc will be lazily loaded */
1925         save_fpu_regs();
1926         current->thread.fpu.fpc = 0;
1927         vcpu->arch.sie_block->gbea = 1;
1928         vcpu->arch.sie_block->pp = 0;
1929         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1930         kvm_clear_async_pf_completion_queue(vcpu);
1931         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
1932                 kvm_s390_vcpu_stop(vcpu);
1933         kvm_s390_clear_local_irqs(vcpu);
1934 }
1935
1936 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
1937 {
1938         mutex_lock(&vcpu->kvm->lock);
1939         preempt_disable();
1940         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
1941         preempt_enable();
1942         mutex_unlock(&vcpu->kvm->lock);
1943         if (!kvm_is_ucontrol(vcpu->kvm)) {
1944                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
1945                 sca_add_vcpu(vcpu);
1946         }
1947         if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
1948                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
1949         /* make vcpu_load load the right gmap on the first trigger */
1950         vcpu->arch.enabled_gmap = vcpu->arch.gmap;
1951 }
1952
1953 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
1954 {
1955         if (!test_kvm_facility(vcpu->kvm, 76))
1956                 return;
1957
1958         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
1959
1960         if (vcpu->kvm->arch.crypto.aes_kw)
1961                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
1962         if (vcpu->kvm->arch.crypto.dea_kw)
1963                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
1964
1965         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
1966 }
1967
1968 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
1969 {
1970         free_page(vcpu->arch.sie_block->cbrlo);
1971         vcpu->arch.sie_block->cbrlo = 0;
1972 }
1973
1974 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
1975 {
1976         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
1977         if (!vcpu->arch.sie_block->cbrlo)
1978                 return -ENOMEM;
1979
1980         vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
1981         vcpu->arch.sie_block->ecb2 &= ~ECB2_PFMFI;
1982         return 0;
1983 }
1984
1985 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
1986 {
1987         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
1988
1989         vcpu->arch.sie_block->ibc = model->ibc;
1990         if (test_kvm_facility(vcpu->kvm, 7))
1991                 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
1992 }
1993
1994 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1995 {
1996         int rc = 0;
1997
1998         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
1999                                                     CPUSTAT_SM |
2000                                                     CPUSTAT_STOPPED);
2001
2002         if (test_kvm_facility(vcpu->kvm, 78))
2003                 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
2004         else if (test_kvm_facility(vcpu->kvm, 8))
2005                 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
2006
2007         kvm_s390_vcpu_setup_model(vcpu);
2008
2009         /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
2010         if (MACHINE_HAS_ESOP)
2011                 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
2012         if (test_kvm_facility(vcpu->kvm, 9))
2013                 vcpu->arch.sie_block->ecb |= ECB_SRSI;
2014         if (test_kvm_facility(vcpu->kvm, 73))
2015                 vcpu->arch.sie_block->ecb |= ECB_TE;
2016
2017         if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
2018                 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
2019         if (test_kvm_facility(vcpu->kvm, 130))
2020                 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
2021         vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
2022         if (sclp.has_cei)
2023                 vcpu->arch.sie_block->eca |= ECA_CEI;
2024         if (sclp.has_ib)
2025                 vcpu->arch.sie_block->eca |= ECA_IB;
2026         if (sclp.has_siif)
2027                 vcpu->arch.sie_block->eca |= ECA_SII;
2028         if (sclp.has_sigpif)
2029                 vcpu->arch.sie_block->eca |= ECA_SIGPI;
2030         if (test_kvm_facility(vcpu->kvm, 129)) {
2031                 vcpu->arch.sie_block->eca |= ECA_VX;
2032                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
2033         }
2034         vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
2035                                         | SDNXC;
2036         vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
2037
2038         if (sclp.has_kss)
2039                 atomic_or(CPUSTAT_KSS, &vcpu->arch.sie_block->cpuflags);
2040         else
2041                 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
2042
2043         if (vcpu->kvm->arch.use_cmma) {
2044                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
2045                 if (rc)
2046                         return rc;
2047         }
2048         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2049         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
2050
2051         kvm_s390_vcpu_crypto_setup(vcpu);
2052
2053         return rc;
2054 }
2055
2056 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
2057                                       unsigned int id)
2058 {
2059         struct kvm_vcpu *vcpu;
2060         struct sie_page *sie_page;
2061         int rc = -EINVAL;
2062
2063         if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
2064                 goto out;
2065
2066         rc = -ENOMEM;
2067
2068         vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2069         if (!vcpu)
2070                 goto out;
2071
2072         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2073         if (!sie_page)
2074                 goto out_free_cpu;
2075
2076         vcpu->arch.sie_block = &sie_page->sie_block;
2077         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2078
2079         /* the real guest size will always be smaller than msl */
2080         vcpu->arch.sie_block->mso = 0;
2081         vcpu->arch.sie_block->msl = sclp.hamax;
2082
2083         vcpu->arch.sie_block->icpua = id;
2084         spin_lock_init(&vcpu->arch.local_int.lock);
2085         vcpu->arch.local_int.float_int = &kvm->arch.float_int;
2086         vcpu->arch.local_int.wq = &vcpu->wq;
2087         vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
2088         seqcount_init(&vcpu->arch.cputm_seqcount);
2089
2090         rc = kvm_vcpu_init(vcpu, kvm, id);
2091         if (rc)
2092                 goto out_free_sie_block;
2093         VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2094                  vcpu->arch.sie_block);
2095         trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2096
2097         return vcpu;
2098 out_free_sie_block:
2099         free_page((unsigned long)(vcpu->arch.sie_block));
2100 out_free_cpu:
2101         kmem_cache_free(kvm_vcpu_cache, vcpu);
2102 out:
2103         return ERR_PTR(rc);
2104 }
2105
2106 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2107 {
2108         return kvm_s390_vcpu_has_irq(vcpu, 0);
2109 }
2110
2111 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2112 {
2113         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2114         exit_sie(vcpu);
2115 }
2116
2117 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2118 {
2119         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2120 }
2121
2122 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2123 {
2124         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2125         exit_sie(vcpu);
2126 }
2127
2128 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2129 {
2130         atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2131 }
2132
2133 /*
2134  * Kick a guest cpu out of SIE and wait until SIE is not running.
2135  * If the CPU is not running (e.g. waiting as idle) the function will
2136  * return immediately. */
2137 void exit_sie(struct kvm_vcpu *vcpu)
2138 {
2139         atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
2140         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2141                 cpu_relax();
2142 }
2143
2144 /* Kick a guest cpu out of SIE to process a request synchronously */
2145 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2146 {
2147         kvm_make_request(req, vcpu);
2148         kvm_s390_vcpu_request(vcpu);
2149 }
2150
2151 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2152                               unsigned long end)
2153 {
2154         struct kvm *kvm = gmap->private;
2155         struct kvm_vcpu *vcpu;
2156         unsigned long prefix;
2157         int i;
2158
2159         if (gmap_is_shadow(gmap))
2160                 return;
2161         if (start >= 1UL << 31)
2162                 /* We are only interested in prefix pages */
2163                 return;
2164         kvm_for_each_vcpu(i, vcpu, kvm) {
2165                 /* match against both prefix pages */
2166                 prefix = kvm_s390_get_prefix(vcpu);
2167                 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2168                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2169                                    start, end);
2170                         kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2171                 }
2172         }
2173 }
2174
2175 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2176 {
2177         /* kvm common code refers to this, but never calls it */
2178         BUG();
2179         return 0;
2180 }
2181
2182 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2183                                            struct kvm_one_reg *reg)
2184 {
2185         int r = -EINVAL;
2186
2187         switch (reg->id) {
2188         case KVM_REG_S390_TODPR:
2189                 r = put_user(vcpu->arch.sie_block->todpr,
2190                              (u32 __user *)reg->addr);
2191                 break;
2192         case KVM_REG_S390_EPOCHDIFF:
2193                 r = put_user(vcpu->arch.sie_block->epoch,
2194                              (u64 __user *)reg->addr);
2195                 break;
2196         case KVM_REG_S390_CPU_TIMER:
2197                 r = put_user(kvm_s390_get_cpu_timer(vcpu),
2198                              (u64 __user *)reg->addr);
2199                 break;
2200         case KVM_REG_S390_CLOCK_COMP:
2201                 r = put_user(vcpu->arch.sie_block->ckc,
2202                              (u64 __user *)reg->addr);
2203                 break;
2204         case KVM_REG_S390_PFTOKEN:
2205                 r = put_user(vcpu->arch.pfault_token,
2206                              (u64 __user *)reg->addr);
2207                 break;
2208         case KVM_REG_S390_PFCOMPARE:
2209                 r = put_user(vcpu->arch.pfault_compare,
2210                              (u64 __user *)reg->addr);
2211                 break;
2212         case KVM_REG_S390_PFSELECT:
2213                 r = put_user(vcpu->arch.pfault_select,
2214                              (u64 __user *)reg->addr);
2215                 break;
2216         case KVM_REG_S390_PP:
2217                 r = put_user(vcpu->arch.sie_block->pp,
2218                              (u64 __user *)reg->addr);
2219                 break;
2220         case KVM_REG_S390_GBEA:
2221                 r = put_user(vcpu->arch.sie_block->gbea,
2222                              (u64 __user *)reg->addr);
2223                 break;
2224         default:
2225                 break;
2226         }
2227
2228         return r;
2229 }
2230
2231 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2232                                            struct kvm_one_reg *reg)
2233 {
2234         int r = -EINVAL;
2235         __u64 val;
2236
2237         switch (reg->id) {
2238         case KVM_REG_S390_TODPR:
2239                 r = get_user(vcpu->arch.sie_block->todpr,
2240                              (u32 __user *)reg->addr);
2241                 break;
2242         case KVM_REG_S390_EPOCHDIFF:
2243                 r = get_user(vcpu->arch.sie_block->epoch,
2244                              (u64 __user *)reg->addr);
2245                 break;
2246         case KVM_REG_S390_CPU_TIMER:
2247                 r = get_user(val, (u64 __user *)reg->addr);
2248                 if (!r)
2249                         kvm_s390_set_cpu_timer(vcpu, val);
2250                 break;
2251         case KVM_REG_S390_CLOCK_COMP:
2252                 r = get_user(vcpu->arch.sie_block->ckc,
2253                              (u64 __user *)reg->addr);
2254                 break;
2255         case KVM_REG_S390_PFTOKEN:
2256                 r = get_user(vcpu->arch.pfault_token,
2257                              (u64 __user *)reg->addr);
2258                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2259                         kvm_clear_async_pf_completion_queue(vcpu);
2260                 break;
2261         case KVM_REG_S390_PFCOMPARE:
2262                 r = get_user(vcpu->arch.pfault_compare,
2263                              (u64 __user *)reg->addr);
2264                 break;
2265         case KVM_REG_S390_PFSELECT:
2266                 r = get_user(vcpu->arch.pfault_select,
2267                              (u64 __user *)reg->addr);
2268                 break;
2269         case KVM_REG_S390_PP:
2270                 r = get_user(vcpu->arch.sie_block->pp,
2271                              (u64 __user *)reg->addr);
2272                 break;
2273         case KVM_REG_S390_GBEA:
2274                 r = get_user(vcpu->arch.sie_block->gbea,
2275                              (u64 __user *)reg->addr);
2276                 break;
2277         default:
2278                 break;
2279         }
2280
2281         return r;
2282 }
2283
2284 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2285 {
2286         kvm_s390_vcpu_initial_reset(vcpu);
2287         return 0;
2288 }
2289
2290 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2291 {
2292         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
2293         return 0;
2294 }
2295
2296 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2297 {
2298         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2299         return 0;
2300 }
2301
2302 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2303                                   struct kvm_sregs *sregs)
2304 {
2305         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2306         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2307         return 0;
2308 }
2309
2310 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2311                                   struct kvm_sregs *sregs)
2312 {
2313         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2314         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2315         return 0;
2316 }
2317
2318 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2319 {
2320         if (test_fp_ctl(fpu->fpc))
2321                 return -EINVAL;
2322         vcpu->run->s.regs.fpc = fpu->fpc;
2323         if (MACHINE_HAS_VX)
2324                 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
2325                                  (freg_t *) fpu->fprs);
2326         else
2327                 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
2328         return 0;
2329 }
2330
2331 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2332 {
2333         /* make sure we have the latest values */
2334         save_fpu_regs();
2335         if (MACHINE_HAS_VX)
2336                 convert_vx_to_fp((freg_t *) fpu->fprs,
2337                                  (__vector128 *) vcpu->run->s.regs.vrs);
2338         else
2339                 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
2340         fpu->fpc = vcpu->run->s.regs.fpc;
2341         return 0;
2342 }
2343
2344 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2345 {
2346         int rc = 0;
2347
2348         if (!is_vcpu_stopped(vcpu))
2349                 rc = -EBUSY;
2350         else {
2351                 vcpu->run->psw_mask = psw.mask;
2352                 vcpu->run->psw_addr = psw.addr;
2353         }
2354         return rc;
2355 }
2356
2357 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2358                                   struct kvm_translation *tr)
2359 {
2360         return -EINVAL; /* not implemented yet */
2361 }
2362
2363 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2364                               KVM_GUESTDBG_USE_HW_BP | \
2365                               KVM_GUESTDBG_ENABLE)
2366
2367 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2368                                         struct kvm_guest_debug *dbg)
2369 {
2370         int rc = 0;
2371
2372         vcpu->guest_debug = 0;
2373         kvm_s390_clear_bp_data(vcpu);
2374
2375         if (dbg->control & ~VALID_GUESTDBG_FLAGS)
2376                 return -EINVAL;
2377         if (!sclp.has_gpere)
2378                 return -EINVAL;
2379
2380         if (dbg->control & KVM_GUESTDBG_ENABLE) {
2381                 vcpu->guest_debug = dbg->control;
2382                 /* enforce guest PER */
2383                 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2384
2385                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2386                         rc = kvm_s390_import_bp_data(vcpu, dbg);
2387         } else {
2388                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2389                 vcpu->arch.guestdbg.last_bp = 0;
2390         }
2391
2392         if (rc) {
2393                 vcpu->guest_debug = 0;
2394                 kvm_s390_clear_bp_data(vcpu);
2395                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2396         }
2397
2398         return rc;
2399 }
2400
2401 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2402                                     struct kvm_mp_state *mp_state)
2403 {
2404         /* CHECK_STOP and LOAD are not supported yet */
2405         return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2406                                        KVM_MP_STATE_OPERATING;
2407 }
2408
2409 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2410                                     struct kvm_mp_state *mp_state)
2411 {
2412         int rc = 0;
2413
2414         /* user space knows about this interface - let it control the state */
2415         vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2416
2417         switch (mp_state->mp_state) {
2418         case KVM_MP_STATE_STOPPED:
2419                 kvm_s390_vcpu_stop(vcpu);
2420                 break;
2421         case KVM_MP_STATE_OPERATING:
2422                 kvm_s390_vcpu_start(vcpu);
2423                 break;
2424         case KVM_MP_STATE_LOAD:
2425         case KVM_MP_STATE_CHECK_STOP:
2426                 /* fall through - CHECK_STOP and LOAD are not supported yet */
2427         default:
2428                 rc = -ENXIO;
2429         }
2430
2431         return rc;
2432 }
2433
2434 static bool ibs_enabled(struct kvm_vcpu *vcpu)
2435 {
2436         return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
2437 }
2438
2439 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2440 {
2441 retry:
2442         kvm_s390_vcpu_request_handled(vcpu);
2443         if (!vcpu->requests)
2444                 return 0;
2445         /*
2446          * We use MMU_RELOAD just to re-arm the ipte notifier for the
2447          * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
2448          * This ensures that the ipte instruction for this request has
2449          * already finished. We might race against a second unmapper that
2450          * wants to set the blocking bit. Lets just retry the request loop.
2451          */
2452         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2453                 int rc;
2454                 rc = gmap_mprotect_notify(vcpu->arch.gmap,
2455                                           kvm_s390_get_prefix(vcpu),
2456                                           PAGE_SIZE * 2, PROT_WRITE);
2457                 if (rc) {
2458                         kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
2459                         return rc;
2460                 }
2461                 goto retry;
2462         }
2463
2464         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2465                 vcpu->arch.sie_block->ihcpu = 0xffff;
2466                 goto retry;
2467         }
2468
2469         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
2470                 if (!ibs_enabled(vcpu)) {
2471                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
2472                         atomic_or(CPUSTAT_IBS,
2473                                         &vcpu->arch.sie_block->cpuflags);
2474                 }
2475                 goto retry;
2476         }
2477
2478         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2479                 if (ibs_enabled(vcpu)) {
2480                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2481                         atomic_andnot(CPUSTAT_IBS,
2482                                           &vcpu->arch.sie_block->cpuflags);
2483                 }
2484                 goto retry;
2485         }
2486
2487         if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
2488                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2489                 goto retry;
2490         }
2491
2492         /* nothing to do, just clear the request */
2493         kvm_clear_request(KVM_REQ_UNHALT, vcpu);
2494
2495         return 0;
2496 }
2497
2498 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2499 {
2500         struct kvm_vcpu *vcpu;
2501         int i;
2502
2503         mutex_lock(&kvm->lock);
2504         preempt_disable();
2505         kvm->arch.epoch = tod - get_tod_clock();
2506         kvm_s390_vcpu_block_all(kvm);
2507         kvm_for_each_vcpu(i, vcpu, kvm)
2508                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2509         kvm_s390_vcpu_unblock_all(kvm);
2510         preempt_enable();
2511         mutex_unlock(&kvm->lock);
2512 }
2513
2514 /**
2515  * kvm_arch_fault_in_page - fault-in guest page if necessary
2516  * @vcpu: The corresponding virtual cpu
2517  * @gpa: Guest physical address
2518  * @writable: Whether the page should be writable or not
2519  *
2520  * Make sure that a guest page has been faulted-in on the host.
2521  *
2522  * Return: Zero on success, negative error code otherwise.
2523  */
2524 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
2525 {
2526         return gmap_fault(vcpu->arch.gmap, gpa,
2527                           writable ? FAULT_FLAG_WRITE : 0);
2528 }
2529
2530 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
2531                                       unsigned long token)
2532 {
2533         struct kvm_s390_interrupt inti;
2534         struct kvm_s390_irq irq;
2535
2536         if (start_token) {
2537                 irq.u.ext.ext_params2 = token;
2538                 irq.type = KVM_S390_INT_PFAULT_INIT;
2539                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
2540         } else {
2541                 inti.type = KVM_S390_INT_PFAULT_DONE;
2542                 inti.parm64 = token;
2543                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
2544         }
2545 }
2546
2547 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
2548                                      struct kvm_async_pf *work)
2549 {
2550         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
2551         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
2552 }
2553
2554 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
2555                                  struct kvm_async_pf *work)
2556 {
2557         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
2558         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
2559 }
2560
2561 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
2562                                struct kvm_async_pf *work)
2563 {
2564         /* s390 will always inject the page directly */
2565 }
2566
2567 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
2568 {
2569         /*
2570          * s390 will always inject the page directly,
2571          * but we still want check_async_completion to cleanup
2572          */
2573         return true;
2574 }
2575
2576 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
2577 {
2578         hva_t hva;
2579         struct kvm_arch_async_pf arch;
2580         int rc;
2581
2582         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2583                 return 0;
2584         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
2585             vcpu->arch.pfault_compare)
2586                 return 0;
2587         if (psw_extint_disabled(vcpu))
2588                 return 0;
2589         if (kvm_s390_vcpu_has_irq(vcpu, 0))
2590                 return 0;
2591         if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
2592                 return 0;
2593         if (!vcpu->arch.gmap->pfault_enabled)
2594                 return 0;
2595
2596         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
2597         hva += current->thread.gmap_addr & ~PAGE_MASK;
2598         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
2599                 return 0;
2600
2601         rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
2602         return rc;
2603 }
2604
2605 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
2606 {
2607         int rc, cpuflags;
2608
2609         /*
2610          * On s390 notifications for arriving pages will be delivered directly
2611          * to the guest but the house keeping for completed pfaults is
2612          * handled outside the worker.
2613          */
2614         kvm_check_async_pf_completion(vcpu);
2615
2616         vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
2617         vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
2618
2619         if (need_resched())
2620                 schedule();
2621
2622         if (test_cpu_flag(CIF_MCCK_PENDING))
2623                 s390_handle_mcck();
2624
2625         if (!kvm_is_ucontrol(vcpu->kvm)) {
2626                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
2627                 if (rc)
2628                         return rc;
2629         }
2630
2631         rc = kvm_s390_handle_requests(vcpu);
2632         if (rc)
2633                 return rc;
2634
2635         if (guestdbg_enabled(vcpu)) {
2636                 kvm_s390_backup_guest_per_regs(vcpu);
2637                 kvm_s390_patch_guest_per_regs(vcpu);
2638         }
2639
2640         vcpu->arch.sie_block->icptcode = 0;
2641         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
2642         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
2643         trace_kvm_s390_sie_enter(vcpu, cpuflags);
2644
2645         return 0;
2646 }
2647
2648 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
2649 {
2650         struct kvm_s390_pgm_info pgm_info = {
2651                 .code = PGM_ADDRESSING,
2652         };
2653         u8 opcode, ilen;
2654         int rc;
2655
2656         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
2657         trace_kvm_s390_sie_fault(vcpu);
2658
2659         /*
2660          * We want to inject an addressing exception, which is defined as a
2661          * suppressing or terminating exception. However, since we came here
2662          * by a DAT access exception, the PSW still points to the faulting
2663          * instruction since DAT exceptions are nullifying. So we've got
2664          * to look up the current opcode to get the length of the instruction
2665          * to be able to forward the PSW.
2666          */
2667         rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
2668         ilen = insn_length(opcode);
2669         if (rc < 0) {
2670                 return rc;
2671         } else if (rc) {
2672                 /* Instruction-Fetching Exceptions - we can't detect the ilen.
2673                  * Forward by arbitrary ilc, injection will take care of
2674                  * nullification if necessary.
2675                  */
2676                 pgm_info = vcpu->arch.pgm;
2677                 ilen = 4;
2678         }
2679         pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
2680         kvm_s390_forward_psw(vcpu, ilen);
2681         return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
2682 }
2683
2684 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
2685 {
2686         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
2687                    vcpu->arch.sie_block->icptcode);
2688         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
2689
2690         if (guestdbg_enabled(vcpu))
2691                 kvm_s390_restore_guest_per_regs(vcpu);
2692
2693         vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
2694         vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
2695
2696         if (vcpu->arch.sie_block->icptcode > 0) {
2697                 int rc = kvm_handle_sie_intercept(vcpu);
2698
2699                 if (rc != -EOPNOTSUPP)
2700                         return rc;
2701                 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
2702                 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
2703                 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
2704                 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
2705                 return -EREMOTE;
2706         } else if (exit_reason != -EFAULT) {
2707                 vcpu->stat.exit_null++;
2708                 return 0;
2709         } else if (kvm_is_ucontrol(vcpu->kvm)) {
2710                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
2711                 vcpu->run->s390_ucontrol.trans_exc_code =
2712                                                 current->thread.gmap_addr;
2713                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
2714                 return -EREMOTE;
2715         } else if (current->thread.gmap_pfault) {
2716                 trace_kvm_s390_major_guest_pfault(vcpu);
2717                 current->thread.gmap_pfault = 0;
2718                 if (kvm_arch_setup_async_pf(vcpu))
2719                         return 0;
2720                 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
2721         }
2722         return vcpu_post_run_fault_in_sie(vcpu);
2723 }
2724
2725 static int __vcpu_run(struct kvm_vcpu *vcpu)
2726 {
2727         int rc, exit_reason;
2728
2729         /*
2730          * We try to hold kvm->srcu during most of vcpu_run (except when run-
2731          * ning the guest), so that memslots (and other stuff) are protected
2732          */
2733         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2734
2735         do {
2736                 rc = vcpu_pre_run(vcpu);
2737                 if (rc)
2738                         break;
2739
2740                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2741                 /*
2742                  * As PF_VCPU will be used in fault handler, between
2743                  * guest_enter and guest_exit should be no uaccess.
2744                  */
2745                 local_irq_disable();
2746                 guest_enter_irqoff();
2747                 __disable_cpu_timer_accounting(vcpu);
2748                 local_irq_enable();
2749                 exit_reason = sie64a(vcpu->arch.sie_block,
2750                                      vcpu->run->s.regs.gprs);
2751                 local_irq_disable();
2752                 __enable_cpu_timer_accounting(vcpu);
2753                 guest_exit_irqoff();
2754                 local_irq_enable();
2755                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2756
2757                 rc = vcpu_post_run(vcpu, exit_reason);
2758         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
2759
2760         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2761         return rc;
2762 }
2763
2764 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2765 {
2766         struct runtime_instr_cb *riccb;
2767         struct gs_cb *gscb;
2768
2769         riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
2770         gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
2771         vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
2772         vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
2773         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
2774                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
2775         if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
2776                 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
2777                 /* some control register changes require a tlb flush */
2778                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2779         }
2780         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
2781                 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
2782                 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
2783                 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
2784                 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
2785                 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
2786         }
2787         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
2788                 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
2789                 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
2790                 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
2791                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2792                         kvm_clear_async_pf_completion_queue(vcpu);
2793         }
2794         /*
2795          * If userspace sets the riccb (e.g. after migration) to a valid state,
2796          * we should enable RI here instead of doing the lazy enablement.
2797          */
2798         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
2799             test_kvm_facility(vcpu->kvm, 64) &&
2800             riccb->valid &&
2801             !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
2802                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
2803                 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
2804         }
2805         /*
2806          * If userspace sets the gscb (e.g. after migration) to non-zero,
2807          * we should enable GS here instead of doing the lazy enablement.
2808          */
2809         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
2810             test_kvm_facility(vcpu->kvm, 133) &&
2811             gscb->gssm &&
2812             !vcpu->arch.gs_enabled) {
2813                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
2814                 vcpu->arch.sie_block->ecb |= ECB_GS;
2815                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
2816                 vcpu->arch.gs_enabled = 1;
2817         }
2818         save_access_regs(vcpu->arch.host_acrs);
2819         restore_access_regs(vcpu->run->s.regs.acrs);
2820         /* save host (userspace) fprs/vrs */
2821         save_fpu_regs();
2822         vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
2823         vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
2824         if (MACHINE_HAS_VX)
2825                 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
2826         else
2827                 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
2828         current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
2829         if (test_fp_ctl(current->thread.fpu.fpc))
2830                 /* User space provided an invalid FPC, let's clear it */
2831                 current->thread.fpu.fpc = 0;
2832         if (MACHINE_HAS_GS) {
2833                 preempt_disable();
2834                 __ctl_set_bit(2, 4);
2835                 if (current->thread.gs_cb) {
2836                         vcpu->arch.host_gscb = current->thread.gs_cb;
2837                         save_gs_cb(vcpu->arch.host_gscb);
2838                 }
2839                 if (vcpu->arch.gs_enabled) {
2840                         current->thread.gs_cb = (struct gs_cb *)
2841                                                 &vcpu->run->s.regs.gscb;
2842                         restore_gs_cb(current->thread.gs_cb);
2843                 }
2844                 preempt_enable();
2845         }
2846
2847         kvm_run->kvm_dirty_regs = 0;
2848 }
2849
2850 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2851 {
2852         kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
2853         kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
2854         kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
2855         memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
2856         kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
2857         kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
2858         kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
2859         kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
2860         kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
2861         kvm_run->s.regs.pft = vcpu->arch.pfault_token;
2862         kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
2863         kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
2864         save_access_regs(vcpu->run->s.regs.acrs);
2865         restore_access_regs(vcpu->arch.host_acrs);
2866         /* Save guest register state */
2867         save_fpu_regs();
2868         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
2869         /* Restore will be done lazily at return */
2870         current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
2871         current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
2872         if (MACHINE_HAS_GS) {
2873                 __ctl_set_bit(2, 4);
2874                 if (vcpu->arch.gs_enabled)
2875                         save_gs_cb(current->thread.gs_cb);
2876                 preempt_disable();
2877                 current->thread.gs_cb = vcpu->arch.host_gscb;
2878                 restore_gs_cb(vcpu->arch.host_gscb);
2879                 preempt_enable();
2880                 if (!vcpu->arch.host_gscb)
2881                         __ctl_clear_bit(2, 4);
2882                 vcpu->arch.host_gscb = NULL;
2883         }
2884
2885 }
2886
2887 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2888 {
2889         int rc;
2890         sigset_t sigsaved;
2891
2892         if (kvm_run->immediate_exit)
2893                 return -EINTR;
2894
2895         if (guestdbg_exit_pending(vcpu)) {
2896                 kvm_s390_prepare_debug_exit(vcpu);
2897                 return 0;
2898         }
2899
2900         if (vcpu->sigset_active)
2901                 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
2902
2903         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
2904                 kvm_s390_vcpu_start(vcpu);
2905         } else if (is_vcpu_stopped(vcpu)) {
2906                 pr_err_ratelimited("can't run stopped vcpu %d\n",
2907                                    vcpu->vcpu_id);
2908                 return -EINVAL;
2909         }
2910
2911         sync_regs(vcpu, kvm_run);
2912         enable_cpu_timer_accounting(vcpu);
2913
2914         might_fault();
2915         rc = __vcpu_run(vcpu);
2916
2917         if (signal_pending(current) && !rc) {
2918                 kvm_run->exit_reason = KVM_EXIT_INTR;
2919                 rc = -EINTR;
2920         }
2921
2922         if (guestdbg_exit_pending(vcpu) && !rc)  {
2923                 kvm_s390_prepare_debug_exit(vcpu);
2924                 rc = 0;
2925         }
2926
2927         if (rc == -EREMOTE) {
2928                 /* userspace support is needed, kvm_run has been prepared */
2929                 rc = 0;
2930         }
2931
2932         disable_cpu_timer_accounting(vcpu);
2933         store_regs(vcpu, kvm_run);
2934
2935         if (vcpu->sigset_active)
2936                 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
2937
2938         vcpu->stat.exit_userspace++;
2939         return rc;
2940 }
2941
2942 /*
2943  * store status at address
2944  * we use have two special cases:
2945  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
2946  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
2947  */
2948 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
2949 {
2950         unsigned char archmode = 1;
2951         freg_t fprs[NUM_FPRS];
2952         unsigned int px;
2953         u64 clkcomp, cputm;
2954         int rc;
2955
2956         px = kvm_s390_get_prefix(vcpu);
2957         if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
2958                 if (write_guest_abs(vcpu, 163, &archmode, 1))
2959                         return -EFAULT;
2960                 gpa = 0;
2961         } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
2962                 if (write_guest_real(vcpu, 163, &archmode, 1))
2963                         return -EFAULT;
2964                 gpa = px;
2965         } else
2966                 gpa -= __LC_FPREGS_SAVE_AREA;
2967
2968         /* manually convert vector registers if necessary */
2969         if (MACHINE_HAS_VX) {
2970                 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
2971                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2972                                      fprs, 128);
2973         } else {
2974                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2975                                      vcpu->run->s.regs.fprs, 128);
2976         }
2977         rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
2978                               vcpu->run->s.regs.gprs, 128);
2979         rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
2980                               &vcpu->arch.sie_block->gpsw, 16);
2981         rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
2982                               &px, 4);
2983         rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
2984                               &vcpu->run->s.regs.fpc, 4);
2985         rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
2986                               &vcpu->arch.sie_block->todpr, 4);
2987         cputm = kvm_s390_get_cpu_timer(vcpu);
2988         rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
2989                               &cputm, 8);
2990         clkcomp = vcpu->arch.sie_block->ckc >> 8;
2991         rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
2992                               &clkcomp, 8);
2993         rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
2994                               &vcpu->run->s.regs.acrs, 64);
2995         rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
2996                               &vcpu->arch.sie_block->gcr, 128);
2997         return rc ? -EFAULT : 0;
2998 }
2999
3000 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
3001 {
3002         /*
3003          * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
3004          * switch in the run ioctl. Let's update our copies before we save
3005          * it into the save area
3006          */
3007         save_fpu_regs();
3008         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3009         save_access_regs(vcpu->run->s.regs.acrs);
3010
3011         return kvm_s390_store_status_unloaded(vcpu, addr);
3012 }
3013
3014 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3015 {
3016         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
3017         kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
3018 }
3019
3020 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
3021 {
3022         unsigned int i;
3023         struct kvm_vcpu *vcpu;
3024
3025         kvm_for_each_vcpu(i, vcpu, kvm) {
3026                 __disable_ibs_on_vcpu(vcpu);
3027         }
3028 }
3029
3030 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3031 {
3032         if (!sclp.has_ibs)
3033                 return;
3034         kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
3035         kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
3036 }
3037
3038 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
3039 {
3040         int i, online_vcpus, started_vcpus = 0;
3041
3042         if (!is_vcpu_stopped(vcpu))
3043                 return;
3044
3045         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
3046         /* Only one cpu at a time may enter/leave the STOPPED state. */
3047         spin_lock(&vcpu->kvm->arch.start_stop_lock);
3048         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3049
3050         for (i = 0; i < online_vcpus; i++) {
3051                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
3052                         started_vcpus++;
3053         }
3054
3055         if (started_vcpus == 0) {
3056                 /* we're the only active VCPU -> speed it up */
3057                 __enable_ibs_on_vcpu(vcpu);
3058         } else if (started_vcpus == 1) {
3059                 /*
3060                  * As we are starting a second VCPU, we have to disable
3061                  * the IBS facility on all VCPUs to remove potentially
3062                  * oustanding ENABLE requests.
3063                  */
3064                 __disable_ibs_on_all_vcpus(vcpu->kvm);
3065         }
3066
3067         atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
3068         /*
3069          * Another VCPU might have used IBS while we were offline.
3070          * Let's play safe and flush the VCPU at startup.
3071          */
3072         kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3073         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3074         return;
3075 }
3076
3077 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
3078 {
3079         int i, online_vcpus, started_vcpus = 0;
3080         struct kvm_vcpu *started_vcpu = NULL;
3081
3082         if (is_vcpu_stopped(vcpu))
3083                 return;
3084
3085         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
3086         /* Only one cpu at a time may enter/leave the STOPPED state. */
3087         spin_lock(&vcpu->kvm->arch.start_stop_lock);
3088         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3089
3090         /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
3091         kvm_s390_clear_stop_irq(vcpu);
3092
3093         atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
3094         __disable_ibs_on_vcpu(vcpu);
3095
3096         for (i = 0; i < online_vcpus; i++) {
3097                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
3098                         started_vcpus++;
3099                         started_vcpu = vcpu->kvm->vcpus[i];
3100                 }
3101         }
3102
3103         if (started_vcpus == 1) {
3104                 /*
3105                  * As we only have one VCPU left, we want to enable the
3106                  * IBS facility for that VCPU to speed it up.
3107                  */
3108                 __enable_ibs_on_vcpu(started_vcpu);
3109         }
3110
3111         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3112         return;
3113 }
3114
3115 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
3116                                      struct kvm_enable_cap *cap)
3117 {
3118         int r;
3119
3120         if (cap->flags)
3121                 return -EINVAL;
3122
3123         switch (cap->cap) {
3124         case KVM_CAP_S390_CSS_SUPPORT:
3125                 if (!vcpu->kvm->arch.css_support) {
3126                         vcpu->kvm->arch.css_support = 1;
3127                         VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
3128                         trace_kvm_s390_enable_css(vcpu->kvm);
3129                 }
3130                 r = 0;
3131                 break;
3132         default:
3133                 r = -EINVAL;
3134                 break;
3135         }
3136         return r;
3137 }
3138
3139 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3140                                   struct kvm_s390_mem_op *mop)
3141 {
3142         void __user *uaddr = (void __user *)mop->buf;
3143         void *tmpbuf = NULL;
3144         int r, srcu_idx;
3145         const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3146                                     | KVM_S390_MEMOP_F_CHECK_ONLY;
3147
3148         if (mop->flags & ~supported_flags)
3149                 return -EINVAL;
3150
3151         if (mop->size > MEM_OP_MAX_SIZE)
3152                 return -E2BIG;
3153
3154         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3155                 tmpbuf = vmalloc(mop->size);
3156                 if (!tmpbuf)
3157                         return -ENOMEM;
3158         }
3159
3160         srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3161
3162         switch (mop->op) {
3163         case KVM_S390_MEMOP_LOGICAL_READ:
3164                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3165                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3166                                             mop->size, GACC_FETCH);
3167                         break;
3168                 }
3169                 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3170                 if (r == 0) {
3171                         if (copy_to_user(uaddr, tmpbuf, mop->size))
3172                                 r = -EFAULT;
3173                 }
3174                 break;
3175         case KVM_S390_MEMOP_LOGICAL_WRITE:
3176                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3177                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3178                                             mop->size, GACC_STORE);
3179                         break;
3180                 }
3181                 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
3182                         r = -EFAULT;
3183                         break;
3184                 }
3185                 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3186                 break;
3187         default:
3188                 r = -EINVAL;
3189         }
3190
3191         srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
3192
3193         if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
3194                 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
3195
3196         vfree(tmpbuf);
3197         return r;
3198 }
3199
3200 long kvm_arch_vcpu_ioctl(struct file *filp,
3201                          unsigned int ioctl, unsigned long arg)
3202 {
3203         struct kvm_vcpu *vcpu = filp->private_data;
3204         void __user *argp = (void __user *)arg;
3205         int idx;
3206         long r;
3207
3208         switch (ioctl) {
3209         case KVM_S390_IRQ: {
3210                 struct kvm_s390_irq s390irq;
3211
3212                 r = -EFAULT;
3213                 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3214                         break;
3215                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3216                 break;
3217         }
3218         case KVM_S390_INTERRUPT: {
3219                 struct kvm_s390_interrupt s390int;
3220                 struct kvm_s390_irq s390irq;
3221
3222                 r = -EFAULT;
3223                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
3224                         break;
3225                 if (s390int_to_s390irq(&s390int, &s390irq))
3226                         return -EINVAL;
3227                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3228                 break;
3229         }
3230         case KVM_S390_STORE_STATUS:
3231                 idx = srcu_read_lock(&vcpu->kvm->srcu);
3232                 r = kvm_s390_vcpu_store_status(vcpu, arg);
3233                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
3234                 break;
3235         case KVM_S390_SET_INITIAL_PSW: {
3236                 psw_t psw;
3237
3238                 r = -EFAULT;
3239                 if (copy_from_user(&psw, argp, sizeof(psw)))
3240                         break;
3241                 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3242                 break;
3243         }
3244         case KVM_S390_INITIAL_RESET:
3245                 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3246                 break;
3247         case KVM_SET_ONE_REG:
3248         case KVM_GET_ONE_REG: {
3249                 struct kvm_one_reg reg;
3250                 r = -EFAULT;
3251                 if (copy_from_user(&reg, argp, sizeof(reg)))
3252                         break;
3253                 if (ioctl == KVM_SET_ONE_REG)
3254                         r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
3255                 else
3256                         r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
3257                 break;
3258         }
3259 #ifdef CONFIG_KVM_S390_UCONTROL
3260         case KVM_S390_UCAS_MAP: {
3261                 struct kvm_s390_ucas_mapping ucasmap;
3262
3263                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3264                         r = -EFAULT;
3265                         break;
3266                 }
3267
3268                 if (!kvm_is_ucontrol(vcpu->kvm)) {
3269                         r = -EINVAL;
3270                         break;
3271                 }
3272
3273                 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
3274                                      ucasmap.vcpu_addr, ucasmap.length);
3275                 break;
3276         }
3277         case KVM_S390_UCAS_UNMAP: {
3278                 struct kvm_s390_ucas_mapping ucasmap;
3279
3280                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3281                         r = -EFAULT;
3282                         break;
3283                 }
3284
3285                 if (!kvm_is_ucontrol(vcpu->kvm)) {
3286                         r = -EINVAL;
3287                         break;
3288                 }
3289
3290                 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
3291                         ucasmap.length);
3292                 break;
3293         }
3294 #endif
3295         case KVM_S390_VCPU_FAULT: {
3296                 r = gmap_fault(vcpu->arch.gmap, arg, 0);
3297                 break;
3298         }
3299         case KVM_ENABLE_CAP:
3300         {
3301                 struct kvm_enable_cap cap;
3302                 r = -EFAULT;
3303                 if (copy_from_user(&cap, argp, sizeof(cap)))
3304                         break;
3305                 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
3306                 break;
3307         }
3308         case KVM_S390_MEM_OP: {
3309                 struct kvm_s390_mem_op mem_op;
3310
3311                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3312                         r = kvm_s390_guest_mem_op(vcpu, &mem_op);
3313                 else
3314                         r = -EFAULT;
3315                 break;
3316         }
3317         case KVM_S390_SET_IRQ_STATE: {
3318                 struct kvm_s390_irq_state irq_state;
3319
3320                 r = -EFAULT;
3321                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3322                         break;
3323                 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
3324                     irq_state.len == 0 ||
3325                     irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
3326                         r = -EINVAL;
3327                         break;
3328                 }
3329                 r = kvm_s390_set_irq_state(vcpu,
3330                                            (void __user *) irq_state.buf,
3331                                            irq_state.len);
3332                 break;
3333         }
3334         case KVM_S390_GET_IRQ_STATE: {
3335                 struct kvm_s390_irq_state irq_state;
3336
3337                 r = -EFAULT;
3338                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3339                         break;
3340                 if (irq_state.len == 0) {
3341                         r = -EINVAL;
3342                         break;
3343                 }
3344                 r = kvm_s390_get_irq_state(vcpu,
3345                                            (__u8 __user *)  irq_state.buf,
3346                                            irq_state.len);
3347                 break;
3348         }
3349         default:
3350                 r = -ENOTTY;
3351         }
3352         return r;
3353 }
3354
3355 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3356 {
3357 #ifdef CONFIG_KVM_S390_UCONTROL
3358         if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
3359                  && (kvm_is_ucontrol(vcpu->kvm))) {
3360                 vmf->page = virt_to_page(vcpu->arch.sie_block);
3361                 get_page(vmf->page);
3362                 return 0;
3363         }
3364 #endif
3365         return VM_FAULT_SIGBUS;
3366 }
3367
3368 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
3369                             unsigned long npages)
3370 {
3371         return 0;
3372 }
3373
3374 /* Section: memory related */
3375 int kvm_arch_prepare_memory_region(struct kvm *kvm,
3376                                    struct kvm_memory_slot *memslot,
3377                                    const struct kvm_userspace_memory_region *mem,
3378                                    enum kvm_mr_change change)
3379 {
3380         /* A few sanity checks. We can have memory slots which have to be
3381            located/ended at a segment boundary (1MB). The memory in userland is
3382            ok to be fragmented into various different vmas. It is okay to mmap()
3383            and munmap() stuff in this slot after doing this call at any time */
3384
3385         if (mem->userspace_addr & 0xffffful)
3386                 return -EINVAL;
3387
3388         if (mem->memory_size & 0xffffful)
3389                 return -EINVAL;
3390
3391         if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
3392                 return -EINVAL;
3393
3394         return 0;
3395 }
3396
3397 void kvm_arch_commit_memory_region(struct kvm *kvm,
3398                                 const struct kvm_userspace_memory_region *mem,
3399                                 const struct kvm_memory_slot *old,
3400                                 const struct kvm_memory_slot *new,
3401                                 enum kvm_mr_change change)
3402 {
3403         int rc;
3404
3405         /* If the basics of the memslot do not change, we do not want
3406          * to update the gmap. Every update causes several unnecessary
3407          * segment translation exceptions. This is usually handled just
3408          * fine by the normal fault handler + gmap, but it will also
3409          * cause faults on the prefix page of running guest CPUs.
3410          */
3411         if (old->userspace_addr == mem->userspace_addr &&
3412             old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
3413             old->npages * PAGE_SIZE == mem->memory_size)
3414                 return;
3415
3416         rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
3417                 mem->guest_phys_addr, mem->memory_size);
3418         if (rc)
3419                 pr_warn("failed to commit memory region\n");
3420         return;
3421 }
3422
3423 static inline unsigned long nonhyp_mask(int i)
3424 {
3425         unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
3426
3427         return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
3428 }
3429
3430 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
3431 {
3432         vcpu->valid_wakeup = false;
3433 }
3434
3435 static int __init kvm_s390_init(void)
3436 {
3437         int i;
3438
3439         if (!sclp.has_sief2) {
3440                 pr_info("SIE not available\n");
3441                 return -ENODEV;
3442         }
3443
3444         for (i = 0; i < 16; i++)
3445                 kvm_s390_fac_list_mask[i] |=
3446                         S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
3447
3448         return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
3449 }
3450
3451 static void __exit kvm_s390_exit(void)
3452 {
3453         kvm_exit();
3454 }
3455
3456 module_init(kvm_s390_init);
3457 module_exit(kvm_s390_exit);
3458
3459 /*
3460  * Enable autoloading of the kvm module.
3461  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
3462  * since x86 takes a different approach.
3463  */
3464 #include <linux/miscdevice.h>
3465 MODULE_ALIAS_MISCDEV(KVM_MINOR);
3466 MODULE_ALIAS("devname:kvm");