arch/s390/kvm/kvm-s390.c

   1 /*
   2  * hosting zSeries kernel virtual machines
   3  *
   4  * Copyright IBM Corp. 2008, 2009
   5  *
   6  * This program is free software; you can redistribute it and/or modify
   7  * it under the terms of the GNU General Public License (version 2 only)
   8  * as published by the Free Software Foundation.
   9  *
  10  *    Author(s): Carsten Otte <cotte@de.ibm.com>
  11  *               Christian Borntraeger <borntraeger@de.ibm.com>
  12  *               Heiko Carstens <heiko.carstens@de.ibm.com>
  13  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
  14  *               Jason J. Herne <jjherne@us.ibm.com>
  15  */
  16
  17 #include <linux/compiler.h>
  18 #include <linux/err.h>
  19 #include <linux/fs.h>
  20 #include <linux/hrtimer.h>
  21 #include <linux/init.h>
  22 #include <linux/kvm.h>
  23 #include <linux/kvm_host.h>
  24 #include <linux/module.h>
  25 #include <linux/random.h>
  26 #include <linux/slab.h>
  27 #include <linux/timer.h>
  28 #include <linux/vmalloc.h>
  29 #include <asm/asm-offsets.h>
  30 #include <asm/lowcore.h>
  31 #include <asm/etr.h>
  32 #include <asm/pgtable.h>
  33 #include <asm/nmi.h>
  34 #include <asm/switch_to.h>
  35 #include <asm/isc.h>
  36 #include <asm/sclp.h>
  37 #include "kvm-s390.h"
  38 #include "gaccess.h"
  39
  40 #define KMSG_COMPONENT "kvm-s390"
  41 #undef pr_fmt
  42 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  43
  44 #define CREATE_TRACE_POINTS
  45 #include "trace.h"
  46 #include "trace-s390.h"
  47
  48 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
  49 #define LOCAL_IRQS 32
  50 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
  51                            (KVM_MAX_VCPUS + LOCAL_IRQS))
  52
  53 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
  54
  55 struct kvm_stats_debugfs_item debugfs_entries[] = {
  56         { "userspace_handled", VCPU_STAT(exit_userspace) },
  57         { "exit_null", VCPU_STAT(exit_null) },
  58         { "exit_validity", VCPU_STAT(exit_validity) },
  59         { "exit_stop_request", VCPU_STAT(exit_stop_request) },
  60         { "exit_external_request", VCPU_STAT(exit_external_request) },
  61         { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
  62         { "exit_instruction", VCPU_STAT(exit_instruction) },
  63         { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
  64         { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
  65         { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
  66         { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
  67         { "halt_wakeup", VCPU_STAT(halt_wakeup) },
  68         { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
  69         { "instruction_lctl", VCPU_STAT(instruction_lctl) },
  70         { "instruction_stctl", VCPU_STAT(instruction_stctl) },
  71         { "instruction_stctg", VCPU_STAT(instruction_stctg) },
  72         { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
  73         { "deliver_external_call", VCPU_STAT(deliver_external_call) },
  74         { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
  75         { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
  76         { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
  77         { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
  78         { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
  79         { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
  80         { "exit_wait_state", VCPU_STAT(exit_wait_state) },
  81         { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
  82         { "instruction_stidp", VCPU_STAT(instruction_stidp) },
  83         { "instruction_spx", VCPU_STAT(instruction_spx) },
  84         { "instruction_stpx", VCPU_STAT(instruction_stpx) },
  85         { "instruction_stap", VCPU_STAT(instruction_stap) },
  86         { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
  87         { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
  88         { "instruction_stsch", VCPU_STAT(instruction_stsch) },
  89         { "instruction_chsc", VCPU_STAT(instruction_chsc) },
  90         { "instruction_essa", VCPU_STAT(instruction_essa) },
  91         { "instruction_stsi", VCPU_STAT(instruction_stsi) },
  92         { "instruction_stfl", VCPU_STAT(instruction_stfl) },
  93         { "instruction_tprot", VCPU_STAT(instruction_tprot) },
  94         { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
  95         { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
  96         { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
  97         { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
  98         { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
  99         { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
 100         { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
 101         { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
 102         { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
 103         { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
 104         { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
 105         { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
 106         { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
 107         { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
 108         { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
 109         { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
 110         { "diagnose_10", VCPU_STAT(diagnose_10) },
 111         { "diagnose_44", VCPU_STAT(diagnose_44) },
 112         { "diagnose_9c", VCPU_STAT(diagnose_9c) },
 113         { "diagnose_258", VCPU_STAT(diagnose_258) },
 114         { "diagnose_308", VCPU_STAT(diagnose_308) },
 115         { "diagnose_500", VCPU_STAT(diagnose_500) },
 116         { NULL }
 117 };
 118
 119 /* upper facilities limit for kvm */
 120 unsigned long kvm_s390_fac_list_mask[] = {
 121         0xffe6fffbfcfdfc40UL,
 122         0x005e800000000000UL,
 123 };
 124
 125 unsigned long kvm_s390_fac_list_mask_size(void)
 126 {
 127         BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
 128         return ARRAY_SIZE(kvm_s390_fac_list_mask);
 129 }
 130
 131 static struct gmap_notifier gmap_notifier;
 132 debug_info_t *kvm_s390_dbf;
 133
 134 /* Section: not file related */
 135 int kvm_arch_hardware_enable(void)
 136 {
 137         /* every s390 is virtualization enabled ;-) */
 138         return 0;
 139 }
 140
 141 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address);
 142
 143 /*
 144  * This callback is executed during stop_machine(). All CPUs are therefore
 145  * temporarily stopped. In order not to change guest behavior, we have to
 146  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
 147  * so a CPU won't be stopped while calculating with the epoch.
 148  */
 149 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
 150                           void *v)
 151 {
 152         struct kvm *kvm;
 153         struct kvm_vcpu *vcpu;
 154         int i;
 155         unsigned long long *delta = v;
 156
 157         list_for_each_entry(kvm, &vm_list, vm_list) {
 158                 kvm->arch.epoch -= *delta;
 159                 kvm_for_each_vcpu(i, vcpu, kvm) {
 160                         vcpu->arch.sie_block->epoch -= *delta;
 161                 }
 162         }
 163         return NOTIFY_OK;
 164 }
 165
 166 static struct notifier_block kvm_clock_notifier = {
 167         .notifier_call = kvm_clock_sync,
 168 };
 169
 170 int kvm_arch_hardware_setup(void)
 171 {
 172         gmap_notifier.notifier_call = kvm_gmap_notifier;
 173         gmap_register_ipte_notifier(&gmap_notifier);
 174         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
 175                                        &kvm_clock_notifier);
 176         return 0;
 177 }
 178
 179 void kvm_arch_hardware_unsetup(void)
 180 {
 181         gmap_unregister_ipte_notifier(&gmap_notifier);
 182         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
 183                                          &kvm_clock_notifier);
 184 }
 185
 186 int kvm_arch_init(void *opaque)
 187 {
 188         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
 189         if (!kvm_s390_dbf)
 190                 return -ENOMEM;
 191
 192         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
 193                 debug_unregister(kvm_s390_dbf);
 194                 return -ENOMEM;
 195         }
 196
 197         /* Register floating interrupt controller interface. */
 198         return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
 199 }
 200
 201 void kvm_arch_exit(void)
 202 {
 203         debug_unregister(kvm_s390_dbf);
 204 }
 205
 206 /* Section: device related */
 207 long kvm_arch_dev_ioctl(struct file *filp,
 208                         unsigned int ioctl, unsigned long arg)
 209 {
 210         if (ioctl == KVM_S390_ENABLE_SIE)
 211                 return s390_enable_sie();
 212         return -EINVAL;
 213 }
 214
 215 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 216 {
 217         int r;
 218
 219         switch (ext) {
 220         case KVM_CAP_S390_PSW:
 221         case KVM_CAP_S390_GMAP:
 222         case KVM_CAP_SYNC_MMU:
 223 #ifdef CONFIG_KVM_S390_UCONTROL
 224         case KVM_CAP_S390_UCONTROL:
 225 #endif
 226         case KVM_CAP_ASYNC_PF:
 227         case KVM_CAP_SYNC_REGS:
 228         case KVM_CAP_ONE_REG:
 229         case KVM_CAP_ENABLE_CAP:
 230         case KVM_CAP_S390_CSS_SUPPORT:
 231         case KVM_CAP_IOEVENTFD:
 232         case KVM_CAP_DEVICE_CTRL:
 233         case KVM_CAP_ENABLE_CAP_VM:
 234         case KVM_CAP_S390_IRQCHIP:
 235         case KVM_CAP_VM_ATTRIBUTES:
 236         case KVM_CAP_MP_STATE:
 237         case KVM_CAP_S390_INJECT_IRQ:
 238         case KVM_CAP_S390_USER_SIGP:
 239         case KVM_CAP_S390_USER_STSI:
 240         case KVM_CAP_S390_SKEYS:
 241         case KVM_CAP_S390_IRQ_STATE:
 242                 r = 1;
 243                 break;
 244         case KVM_CAP_S390_MEM_OP:
 245                 r = MEM_OP_MAX_SIZE;
 246                 break;
 247         case KVM_CAP_NR_VCPUS:
 248         case KVM_CAP_MAX_VCPUS:
 249                 r = KVM_MAX_VCPUS;
 250                 break;
 251         case KVM_CAP_NR_MEMSLOTS:
 252                 r = KVM_USER_MEM_SLOTS;
 253                 break;
 254         case KVM_CAP_S390_COW:
 255                 r = MACHINE_HAS_ESOP;
 256                 break;
 257         case KVM_CAP_S390_VECTOR_REGISTERS:
 258                 r = MACHINE_HAS_VX;
 259                 break;
 260         default:
 261                 r = 0;
 262         }
 263         return r;
 264 }
 265
 266 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
 267                                         struct kvm_memory_slot *memslot)
 268 {
 269         gfn_t cur_gfn, last_gfn;
 270         unsigned long address;
 271         struct gmap *gmap = kvm->arch.gmap;
 272
 273         down_read(&gmap->mm->mmap_sem);
 274         /* Loop over all guest pages */
 275         last_gfn = memslot->base_gfn + memslot->npages;
 276         for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
 277                 address = gfn_to_hva_memslot(memslot, cur_gfn);
 278
 279                 if (gmap_test_and_clear_dirty(address, gmap))
 280                         mark_page_dirty(kvm, cur_gfn);
 281         }
 282         up_read(&gmap->mm->mmap_sem);
 283 }
 284
 285 /* Section: vm related */
 286 /*
 287  * Get (and clear) the dirty memory log for a memory slot.
 288  */
 289 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 290                                struct kvm_dirty_log *log)
 291 {
 292         int r;
 293         unsigned long n;
 294         struct kvm_memslots *slots;
 295         struct kvm_memory_slot *memslot;
 296         int is_dirty = 0;
 297
 298         mutex_lock(&kvm->slots_lock);
 299
 300         r = -EINVAL;
 301         if (log->slot >= KVM_USER_MEM_SLOTS)
 302                 goto out;
 303
 304         slots = kvm_memslots(kvm);
 305         memslot = id_to_memslot(slots, log->slot);
 306         r = -ENOENT;
 307         if (!memslot->dirty_bitmap)
 308                 goto out;
 309
 310         kvm_s390_sync_dirty_log(kvm, memslot);
 311         r = kvm_get_dirty_log(kvm, log, &is_dirty);
 312         if (r)
 313                 goto out;
 314
 315         /* Clear the dirty log */
 316         if (is_dirty) {
 317                 n = kvm_dirty_bitmap_bytes(memslot);
 318                 memset(memslot->dirty_bitmap, 0, n);
 319         }
 320         r = 0;
 321 out:
 322         mutex_unlock(&kvm->slots_lock);
 323         return r;
 324 }
 325
 326 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 327 {
 328         int r;
 329
 330         if (cap->flags)
 331                 return -EINVAL;
 332
 333         switch (cap->cap) {
 334         case KVM_CAP_S390_IRQCHIP:
 335                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
 336                 kvm->arch.use_irqchip = 1;
 337                 r = 0;
 338                 break;
 339         case KVM_CAP_S390_USER_SIGP:
 340                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
 341                 kvm->arch.user_sigp = 1;
 342                 r = 0;
 343                 break;
 344         case KVM_CAP_S390_VECTOR_REGISTERS:
 345                 if (MACHINE_HAS_VX) {
 346                         set_kvm_facility(kvm->arch.model.fac->mask, 129);
 347                         set_kvm_facility(kvm->arch.model.fac->list, 129);
 348                         r = 0;
 349                 } else
 350                         r = -EINVAL;
 351                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
 352                          r ? "(not available)" : "(success)");
 353                 break;
 354         case KVM_CAP_S390_USER_STSI:
 355                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
 356                 kvm->arch.user_stsi = 1;
 357                 r = 0;
 358                 break;
 359         default:
 360                 r = -EINVAL;
 361                 break;
 362         }
 363         return r;
 364 }
 365
 366 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 367 {
 368         int ret;
 369
 370         switch (attr->attr) {
 371         case KVM_S390_VM_MEM_LIMIT_SIZE:
 372                 ret = 0;
 373                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
 374                          kvm->arch.gmap->asce_end);
 375                 if (put_user(kvm->arch.gmap->asce_end, (u64 __user *)attr->addr))
 376                         ret = -EFAULT;
 377                 break;
 378         default:
 379                 ret = -ENXIO;
 380                 break;
 381         }
 382         return ret;
 383 }
 384
 385 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 386 {
 387         int ret;
 388         unsigned int idx;
 389         switch (attr->attr) {
 390         case KVM_S390_VM_MEM_ENABLE_CMMA:
 391                 /* enable CMMA only for z10 and later (EDAT_1) */
 392                 ret = -EINVAL;
 393                 if (!MACHINE_IS_LPAR || !MACHINE_HAS_EDAT1)
 394                         break;
 395
 396                 ret = -EBUSY;
 397                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
 398                 mutex_lock(&kvm->lock);
 399                 if (atomic_read(&kvm->online_vcpus) == 0) {
 400                         kvm->arch.use_cmma = 1;
 401                         ret = 0;
 402                 }
 403                 mutex_unlock(&kvm->lock);
 404                 break;
 405         case KVM_S390_VM_MEM_CLR_CMMA:
 406                 ret = -EINVAL;
 407                 if (!kvm->arch.use_cmma)
 408                         break;
 409
 410                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
 411                 mutex_lock(&kvm->lock);
 412                 idx = srcu_read_lock(&kvm->srcu);
 413                 s390_reset_cmma(kvm->arch.gmap->mm);
 414                 srcu_read_unlock(&kvm->srcu, idx);
 415                 mutex_unlock(&kvm->lock);
 416                 ret = 0;
 417                 break;
 418         case KVM_S390_VM_MEM_LIMIT_SIZE: {
 419                 unsigned long new_limit;
 420
 421                 if (kvm_is_ucontrol(kvm))
 422                         return -EINVAL;
 423
 424                 if (get_user(new_limit, (u64 __user *)attr->addr))
 425                         return -EFAULT;
 426
 427                 if (new_limit > kvm->arch.gmap->asce_end)
 428                         return -E2BIG;
 429
 430                 ret = -EBUSY;
 431                 mutex_lock(&kvm->lock);
 432                 if (atomic_read(&kvm->online_vcpus) == 0) {
 433                         /* gmap_alloc will round the limit up */
 434                         struct gmap *new = gmap_alloc(current->mm, new_limit);
 435
 436                         if (!new) {
 437                                 ret = -ENOMEM;
 438                         } else {
 439                                 gmap_free(kvm->arch.gmap);
 440                                 new->private = kvm;
 441                                 kvm->arch.gmap = new;
 442                                 ret = 0;
 443                         }
 444                 }
 445                 mutex_unlock(&kvm->lock);
 446                 VM_EVENT(kvm, 3, "SET: max guest memory: %lu bytes", new_limit);
 447                 break;
 448         }
 449         default:
 450                 ret = -ENXIO;
 451                 break;
 452         }
 453         return ret;
 454 }
 455
 456 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
 457
 458 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
 459 {
 460         struct kvm_vcpu *vcpu;
 461         int i;
 462
 463         if (!test_kvm_facility(kvm, 76))
 464                 return -EINVAL;
 465
 466         mutex_lock(&kvm->lock);
 467         switch (attr->attr) {
 468         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
 469                 get_random_bytes(
 470                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
 471                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 472                 kvm->arch.crypto.aes_kw = 1;
 473                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
 474                 break;
 475         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
 476                 get_random_bytes(
 477                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
 478                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 479                 kvm->arch.crypto.dea_kw = 1;
 480                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
 481                 break;
 482         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
 483                 kvm->arch.crypto.aes_kw = 0;
 484                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
 485                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 486                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
 487                 break;
 488         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
 489                 kvm->arch.crypto.dea_kw = 0;
 490                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
 491                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 492                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
 493                 break;
 494         default:
 495                 mutex_unlock(&kvm->lock);
 496                 return -ENXIO;
 497         }
 498
 499         kvm_for_each_vcpu(i, vcpu, kvm) {
 500                 kvm_s390_vcpu_crypto_setup(vcpu);
 501                 exit_sie(vcpu);
 502         }
 503         mutex_unlock(&kvm->lock);
 504         return 0;
 505 }
 506
 507 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
 508 {
 509         u8 gtod_high;
 510
 511         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
 512                                            sizeof(gtod_high)))
 513                 return -EFAULT;
 514
 515         if (gtod_high != 0)
 516                 return -EINVAL;
 517         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x\n", gtod_high);
 518
 519         return 0;
 520 }
 521
 522 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
 523 {
 524         struct kvm_vcpu *cur_vcpu;
 525         unsigned int vcpu_idx;
 526         u64 host_tod, gtod;
 527         int r;
 528
 529         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
 530                 return -EFAULT;
 531
 532         r = store_tod_clock(&host_tod);
 533         if (r)
 534                 return r;
 535
 536         mutex_lock(&kvm->lock);
 537         preempt_disable();
 538         kvm->arch.epoch = gtod - host_tod;
 539         kvm_s390_vcpu_block_all(kvm);
 540         kvm_for_each_vcpu(vcpu_idx, cur_vcpu, kvm)
 541                 cur_vcpu->arch.sie_block->epoch = kvm->arch.epoch;
 542         kvm_s390_vcpu_unblock_all(kvm);
 543         preempt_enable();
 544         mutex_unlock(&kvm->lock);
 545         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx\n", gtod);
 546         return 0;
 547 }
 548
 549 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
 550 {
 551         int ret;
 552
 553         if (attr->flags)
 554                 return -EINVAL;
 555
 556         switch (attr->attr) {
 557         case KVM_S390_VM_TOD_HIGH:
 558                 ret = kvm_s390_set_tod_high(kvm, attr);
 559                 break;
 560         case KVM_S390_VM_TOD_LOW:
 561                 ret = kvm_s390_set_tod_low(kvm, attr);
 562                 break;
 563         default:
 564                 ret = -ENXIO;
 565                 break;
 566         }
 567         return ret;
 568 }
 569
 570 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
 571 {
 572         u8 gtod_high = 0;
 573
 574         if (copy_to_user((void __user *)attr->addr, &gtod_high,
 575                                          sizeof(gtod_high)))
 576                 return -EFAULT;
 577         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x\n", gtod_high);
 578
 579         return 0;
 580 }
 581
 582 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
 583 {
 584         u64 host_tod, gtod;
 585         int r;
 586
 587         r = store_tod_clock(&host_tod);
 588         if (r)
 589                 return r;
 590
 591         preempt_disable();
 592         gtod = host_tod + kvm->arch.epoch;
 593         preempt_enable();
 594         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
 595                 return -EFAULT;
 596         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx\n", gtod);
 597
 598         return 0;
 599 }
 600
 601 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
 602 {
 603         int ret;
 604
 605         if (attr->flags)
 606                 return -EINVAL;
 607
 608         switch (attr->attr) {
 609         case KVM_S390_VM_TOD_HIGH:
 610                 ret = kvm_s390_get_tod_high(kvm, attr);
 611                 break;
 612         case KVM_S390_VM_TOD_LOW:
 613                 ret = kvm_s390_get_tod_low(kvm, attr);
 614                 break;
 615         default:
 616                 ret = -ENXIO;
 617                 break;
 618         }
 619         return ret;
 620 }
 621
 622 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
 623 {
 624         struct kvm_s390_vm_cpu_processor *proc;
 625         int ret = 0;
 626
 627         mutex_lock(&kvm->lock);
 628         if (atomic_read(&kvm->online_vcpus)) {
 629                 ret = -EBUSY;
 630                 goto out;
 631         }
 632         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
 633         if (!proc) {
 634                 ret = -ENOMEM;
 635                 goto out;
 636         }
 637         if (!copy_from_user(proc, (void __user *)attr->addr,
 638                             sizeof(*proc))) {
 639                 memcpy(&kvm->arch.model.cpu_id, &proc->cpuid,
 640                        sizeof(struct cpuid));
 641                 kvm->arch.model.ibc = proc->ibc;
 642                 memcpy(kvm->arch.model.fac->list, proc->fac_list,
 643                        S390_ARCH_FAC_LIST_SIZE_BYTE);
 644         } else
 645                 ret = -EFAULT;
 646         kfree(proc);
 647 out:
 648         mutex_unlock(&kvm->lock);
 649         return ret;
 650 }
 651
 652 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
 653 {
 654         int ret = -ENXIO;
 655
 656         switch (attr->attr) {
 657         case KVM_S390_VM_CPU_PROCESSOR:
 658                 ret = kvm_s390_set_processor(kvm, attr);
 659                 break;
 660         }
 661         return ret;
 662 }
 663
 664 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
 665 {
 666         struct kvm_s390_vm_cpu_processor *proc;
 667         int ret = 0;
 668
 669         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
 670         if (!proc) {
 671                 ret = -ENOMEM;
 672                 goto out;
 673         }
 674         memcpy(&proc->cpuid, &kvm->arch.model.cpu_id, sizeof(struct cpuid));
 675         proc->ibc = kvm->arch.model.ibc;
 676         memcpy(&proc->fac_list, kvm->arch.model.fac->list, S390_ARCH_FAC_LIST_SIZE_BYTE);
 677         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
 678                 ret = -EFAULT;
 679         kfree(proc);
 680 out:
 681         return ret;
 682 }
 683
 684 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
 685 {
 686         struct kvm_s390_vm_cpu_machine *mach;
 687         int ret = 0;
 688
 689         mach = kzalloc(sizeof(*mach), GFP_KERNEL);
 690         if (!mach) {
 691                 ret = -ENOMEM;
 692                 goto out;
 693         }
 694         get_cpu_id((struct cpuid *) &mach->cpuid);
 695         mach->ibc = sclp.ibc;
 696         memcpy(&mach->fac_mask, kvm->arch.model.fac->mask,
 697                S390_ARCH_FAC_LIST_SIZE_BYTE);
 698         memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
 699                S390_ARCH_FAC_LIST_SIZE_BYTE);
 700         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
 701                 ret = -EFAULT;
 702         kfree(mach);
 703 out:
 704         return ret;
 705 }
 706
 707 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
 708 {
 709         int ret = -ENXIO;
 710
 711         switch (attr->attr) {
 712         case KVM_S390_VM_CPU_PROCESSOR:
 713                 ret = kvm_s390_get_processor(kvm, attr);
 714                 break;
 715         case KVM_S390_VM_CPU_MACHINE:
 716                 ret = kvm_s390_get_machine(kvm, attr);
 717                 break;
 718         }
 719         return ret;
 720 }
 721
 722 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
 723 {
 724         int ret;
 725
 726         switch (attr->group) {
 727         case KVM_S390_VM_MEM_CTRL:
 728                 ret = kvm_s390_set_mem_control(kvm, attr);
 729                 break;
 730         case KVM_S390_VM_TOD:
 731                 ret = kvm_s390_set_tod(kvm, attr);
 732                 break;
 733         case KVM_S390_VM_CPU_MODEL:
 734                 ret = kvm_s390_set_cpu_model(kvm, attr);
 735                 break;
 736         case KVM_S390_VM_CRYPTO:
 737                 ret = kvm_s390_vm_set_crypto(kvm, attr);
 738                 break;
 739         default:
 740                 ret = -ENXIO;
 741                 break;
 742         }
 743
 744         return ret;
 745 }
 746
 747 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
 748 {
 749         int ret;
 750
 751         switch (attr->group) {
 752         case KVM_S390_VM_MEM_CTRL:
 753                 ret = kvm_s390_get_mem_control(kvm, attr);
 754                 break;
 755         case KVM_S390_VM_TOD:
 756                 ret = kvm_s390_get_tod(kvm, attr);
 757                 break;
 758         case KVM_S390_VM_CPU_MODEL:
 759                 ret = kvm_s390_get_cpu_model(kvm, attr);
 760                 break;
 761         default:
 762                 ret = -ENXIO;
 763                 break;
 764         }
 765
 766         return ret;
 767 }
 768
 769 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
 770 {
 771         int ret;
 772
 773         switch (attr->group) {
 774         case KVM_S390_VM_MEM_CTRL:
 775                 switch (attr->attr) {
 776                 case KVM_S390_VM_MEM_ENABLE_CMMA:
 777                 case KVM_S390_VM_MEM_CLR_CMMA:
 778                 case KVM_S390_VM_MEM_LIMIT_SIZE:
 779                         ret = 0;
 780                         break;
 781                 default:
 782                         ret = -ENXIO;
 783                         break;
 784                 }
 785                 break;
 786         case KVM_S390_VM_TOD:
 787                 switch (attr->attr) {
 788                 case KVM_S390_VM_TOD_LOW:
 789                 case KVM_S390_VM_TOD_HIGH:
 790                         ret = 0;
 791                         break;
 792                 default:
 793                         ret = -ENXIO;
 794                         break;
 795                 }
 796                 break;
 797         case KVM_S390_VM_CPU_MODEL:
 798                 switch (attr->attr) {
 799                 case KVM_S390_VM_CPU_PROCESSOR:
 800                 case KVM_S390_VM_CPU_MACHINE:
 801                         ret = 0;
 802                         break;
 803                 default:
 804                         ret = -ENXIO;
 805                         break;
 806                 }
 807                 break;
 808         case KVM_S390_VM_CRYPTO:
 809                 switch (attr->attr) {
 810                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
 811                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
 812                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
 813                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
 814                         ret = 0;
 815                         break;
 816                 default:
 817                         ret = -ENXIO;
 818                         break;
 819                 }
 820                 break;
 821         default:
 822                 ret = -ENXIO;
 823                 break;
 824         }
 825
 826         return ret;
 827 }
 828
 829 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
 830 {
 831         uint8_t *keys;
 832         uint64_t hva;
 833         unsigned long curkey;
 834         int i, r = 0;
 835
 836         if (args->flags != 0)
 837                 return -EINVAL;
 838
 839         /* Is this guest using storage keys? */
 840         if (!mm_use_skey(current->mm))
 841                 return KVM_S390_GET_SKEYS_NONE;
 842
 843         /* Enforce sane limit on memory allocation */
 844         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
 845                 return -EINVAL;
 846
 847         keys = kmalloc_array(args->count, sizeof(uint8_t),
 848                              GFP_KERNEL | __GFP_NOWARN);
 849         if (!keys)
 850                 keys = vmalloc(sizeof(uint8_t) * args->count);
 851         if (!keys)
 852                 return -ENOMEM;
 853
 854         for (i = 0; i < args->count; i++) {
 855                 hva = gfn_to_hva(kvm, args->start_gfn + i);
 856                 if (kvm_is_error_hva(hva)) {
 857                         r = -EFAULT;
 858                         goto out;
 859                 }
 860
 861                 curkey = get_guest_storage_key(current->mm, hva);
 862                 if (IS_ERR_VALUE(curkey)) {
 863                         r = curkey;
 864                         goto out;
 865                 }
 866                 keys[i] = curkey;
 867         }
 868
 869         r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
 870                          sizeof(uint8_t) * args->count);
 871         if (r)
 872                 r = -EFAULT;
 873 out:
 874         kvfree(keys);
 875         return r;
 876 }
 877
 878 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
 879 {
 880         uint8_t *keys;
 881         uint64_t hva;
 882         int i, r = 0;
 883
 884         if (args->flags != 0)
 885                 return -EINVAL;
 886
 887         /* Enforce sane limit on memory allocation */
 888         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
 889                 return -EINVAL;
 890
 891         keys = kmalloc_array(args->count, sizeof(uint8_t),
 892                              GFP_KERNEL | __GFP_NOWARN);
 893         if (!keys)
 894                 keys = vmalloc(sizeof(uint8_t) * args->count);
 895         if (!keys)
 896                 return -ENOMEM;
 897
 898         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
 899                            sizeof(uint8_t) * args->count);
 900         if (r) {
 901                 r = -EFAULT;
 902                 goto out;
 903         }
 904
 905         /* Enable storage key handling for the guest */
 906         r = s390_enable_skey();
 907         if (r)
 908                 goto out;
 909
 910         for (i = 0; i < args->count; i++) {
 911                 hva = gfn_to_hva(kvm, args->start_gfn + i);
 912                 if (kvm_is_error_hva(hva)) {
 913                         r = -EFAULT;
 914                         goto out;
 915                 }
 916
 917                 /* Lowest order bit is reserved */
 918                 if (keys[i] & 0x01) {
 919                         r = -EINVAL;
 920                         goto out;
 921                 }
 922
 923                 r = set_guest_storage_key(current->mm, hva,
 924                                           (unsigned long)keys[i], 0);
 925                 if (r)
 926                         goto out;
 927         }
 928 out:
 929         kvfree(keys);
 930         return r;
 931 }
 932
 933 long kvm_arch_vm_ioctl(struct file *filp,
 934                        unsigned int ioctl, unsigned long arg)
 935 {
 936         struct kvm *kvm = filp->private_data;
 937         void __user *argp = (void __user *)arg;
 938         struct kvm_device_attr attr;
 939         int r;
 940
 941         switch (ioctl) {
 942         case KVM_S390_INTERRUPT: {
 943                 struct kvm_s390_interrupt s390int;
 944
 945                 r = -EFAULT;
 946                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
 947                         break;
 948                 r = kvm_s390_inject_vm(kvm, &s390int);
 949                 break;
 950         }
 951         case KVM_ENABLE_CAP: {
 952                 struct kvm_enable_cap cap;
 953                 r = -EFAULT;
 954                 if (copy_from_user(&cap, argp, sizeof(cap)))
 955                         break;
 956                 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
 957                 break;
 958         }
 959         case KVM_CREATE_IRQCHIP: {
 960                 struct kvm_irq_routing_entry routing;
 961
 962                 r = -EINVAL;
 963                 if (kvm->arch.use_irqchip) {
 964                         /* Set up dummy routing. */
 965                         memset(&routing, 0, sizeof(routing));
 966                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
 967                 }
 968                 break;
 969         }
 970         case KVM_SET_DEVICE_ATTR: {
 971                 r = -EFAULT;
 972                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
 973                         break;
 974                 r = kvm_s390_vm_set_attr(kvm, &attr);
 975                 break;
 976         }
 977         case KVM_GET_DEVICE_ATTR: {
 978                 r = -EFAULT;
 979                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
 980                         break;
 981                 r = kvm_s390_vm_get_attr(kvm, &attr);
 982                 break;
 983         }
 984         case KVM_HAS_DEVICE_ATTR: {
 985                 r = -EFAULT;
 986                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
 987                         break;
 988                 r = kvm_s390_vm_has_attr(kvm, &attr);
 989                 break;
 990         }
 991         case KVM_S390_GET_SKEYS: {
 992                 struct kvm_s390_skeys args;
 993
 994                 r = -EFAULT;
 995                 if (copy_from_user(&args, argp,
 996                                    sizeof(struct kvm_s390_skeys)))
 997                         break;
 998                 r = kvm_s390_get_skeys(kvm, &args);
 999                 break;
1000         }
1001         case KVM_S390_SET_SKEYS: {
1002                 struct kvm_s390_skeys args;
1003
1004                 r = -EFAULT;
1005                 if (copy_from_user(&args, argp,
1006                                    sizeof(struct kvm_s390_skeys)))
1007                         break;
1008                 r = kvm_s390_set_skeys(kvm, &args);
1009                 break;
1010         }
1011         default:
1012                 r = -ENOTTY;
1013         }
1014
1015         return r;
1016 }
1017
1018 static int kvm_s390_query_ap_config(u8 *config)
1019 {
1020         u32 fcn_code = 0x04000000UL;
1021         u32 cc = 0;
1022
1023         memset(config, 0, 128);
1024         asm volatile(
1025                 "lgr 0,%1\n"
1026                 "lgr 2,%2\n"
1027                 ".long 0xb2af0000\n"            /* PQAP(QCI) */
1028                 "0: ipm %0\n"
1029                 "srl %0,28\n"
1030                 "1:\n"
1031                 EX_TABLE(0b, 1b)
1032                 : "+r" (cc)
1033                 : "r" (fcn_code), "r" (config)
1034                 : "cc", "0", "2", "memory"
1035         );
1036
1037         return cc;
1038 }
1039
1040 static int kvm_s390_apxa_installed(void)
1041 {
1042         u8 config[128];
1043         int cc;
1044
1045         if (test_facility(2) && test_facility(12)) {
1046                 cc = kvm_s390_query_ap_config(config);
1047
1048                 if (cc)
1049                         pr_err("PQAP(QCI) failed with cc=%d", cc);
1050                 else
1051                         return config[0] & 0x40;
1052         }
1053
1054         return 0;
1055 }
1056
1057 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1058 {
1059         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1060
1061         if (kvm_s390_apxa_installed())
1062                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1063         else
1064                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1065 }
1066
1067 static void kvm_s390_get_cpu_id(struct cpuid *cpu_id)
1068 {
1069         get_cpu_id(cpu_id);
1070         cpu_id->version = 0xff;
1071 }
1072
1073 static int kvm_s390_crypto_init(struct kvm *kvm)
1074 {
1075         if (!test_kvm_facility(kvm, 76))
1076                 return 0;
1077
1078         kvm->arch.crypto.crycb = kzalloc(sizeof(*kvm->arch.crypto.crycb),
1079                                          GFP_KERNEL | GFP_DMA);
1080         if (!kvm->arch.crypto.crycb)
1081                 return -ENOMEM;
1082
1083         kvm_s390_set_crycb_format(kvm);
1084
1085         /* Enable AES/DEA protected key functions by default */
1086         kvm->arch.crypto.aes_kw = 1;
1087         kvm->arch.crypto.dea_kw = 1;
1088         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1089                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1090         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1091                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1092
1093         return 0;
1094 }
1095
1096 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1097 {
1098         int i, rc;
1099         char debug_name[16];
1100         static unsigned long sca_offset;
1101
1102         rc = -EINVAL;
1103 #ifdef CONFIG_KVM_S390_UCONTROL
1104         if (type & ~KVM_VM_S390_UCONTROL)
1105                 goto out_err;
1106         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1107                 goto out_err;
1108 #else
1109         if (type)
1110                 goto out_err;
1111 #endif
1112
1113         rc = s390_enable_sie();
1114         if (rc)
1115                 goto out_err;
1116
1117         rc = -ENOMEM;
1118
1119         kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL);
1120         if (!kvm->arch.sca)
1121                 goto out_err;
1122         spin_lock(&kvm_lock);
1123         sca_offset = (sca_offset + 16) & 0x7f0;
1124         kvm->arch.sca = (struct sca_block *) ((char *) kvm->arch.sca + sca_offset);
1125         spin_unlock(&kvm_lock);
1126
1127         sprintf(debug_name, "kvm-%u", current->pid);
1128
1129         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1130         if (!kvm->arch.dbf)
1131                 goto out_err;
1132
1133         /*
1134          * The architectural maximum amount of facilities is 16 kbit. To store
1135          * this amount, 2 kbyte of memory is required. Thus we need a full
1136          * page to hold the guest facility list (arch.model.fac->list) and the
1137          * facility mask (arch.model.fac->mask). Its address size has to be
1138          * 31 bits and word aligned.
1139          */
1140         kvm->arch.model.fac =
1141                 (struct kvm_s390_fac *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1142         if (!kvm->arch.model.fac)
1143                 goto out_err;
1144
1145         /* Populate the facility mask initially. */
1146         memcpy(kvm->arch.model.fac->mask, S390_lowcore.stfle_fac_list,
1147                S390_ARCH_FAC_LIST_SIZE_BYTE);
1148         for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1149                 if (i < kvm_s390_fac_list_mask_size())
1150                         kvm->arch.model.fac->mask[i] &= kvm_s390_fac_list_mask[i];
1151                 else
1152                         kvm->arch.model.fac->mask[i] = 0UL;
1153         }
1154
1155         /* Populate the facility list initially. */
1156         memcpy(kvm->arch.model.fac->list, kvm->arch.model.fac->mask,
1157                S390_ARCH_FAC_LIST_SIZE_BYTE);
1158
1159         kvm_s390_get_cpu_id(&kvm->arch.model.cpu_id);
1160         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1161
1162         if (kvm_s390_crypto_init(kvm) < 0)
1163                 goto out_err;
1164
1165         spin_lock_init(&kvm->arch.float_int.lock);
1166         for (i = 0; i < FIRQ_LIST_COUNT; i++)
1167                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1168         init_waitqueue_head(&kvm->arch.ipte_wq);
1169         mutex_init(&kvm->arch.ipte_mutex);
1170
1171         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1172         VM_EVENT(kvm, 3, "vm created with type %lu", type);
1173
1174         if (type & KVM_VM_S390_UCONTROL) {
1175                 kvm->arch.gmap = NULL;
1176         } else {
1177                 kvm->arch.gmap = gmap_alloc(current->mm, (1UL << 44) - 1);
1178                 if (!kvm->arch.gmap)
1179                         goto out_err;
1180                 kvm->arch.gmap->private = kvm;
1181                 kvm->arch.gmap->pfault_enabled = 0;
1182         }
1183
1184         kvm->arch.css_support = 0;
1185         kvm->arch.use_irqchip = 0;
1186         kvm->arch.epoch = 0;
1187
1188         spin_lock_init(&kvm->arch.start_stop_lock);
1189         KVM_EVENT(3, "vm 0x%p created by pid %u", kvm, current->pid);
1190
1191         return 0;
1192 out_err:
1193         kfree(kvm->arch.crypto.crycb);
1194         free_page((unsigned long)kvm->arch.model.fac);
1195         debug_unregister(kvm->arch.dbf);
1196         free_page((unsigned long)(kvm->arch.sca));
1197         KVM_EVENT(3, "creation of vm failed: %d", rc);
1198         return rc;
1199 }
1200
1201 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1202 {
1203         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1204         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1205         kvm_s390_clear_local_irqs(vcpu);
1206         kvm_clear_async_pf_completion_queue(vcpu);
1207         if (!kvm_is_ucontrol(vcpu->kvm)) {
1208                 clear_bit(63 - vcpu->vcpu_id,
1209                           (unsigned long *) &vcpu->kvm->arch.sca->mcn);
1210                 if (vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda ==
1211                     (__u64) vcpu->arch.sie_block)
1212                         vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda = 0;
1213         }
1214         smp_mb();
1215
1216         if (kvm_is_ucontrol(vcpu->kvm))
1217                 gmap_free(vcpu->arch.gmap);
1218
1219         if (vcpu->kvm->arch.use_cmma)
1220                 kvm_s390_vcpu_unsetup_cmma(vcpu);
1221         free_page((unsigned long)(vcpu->arch.sie_block));
1222
1223         kvm_vcpu_uninit(vcpu);
1224         kmem_cache_free(kvm_vcpu_cache, vcpu);
1225 }
1226
1227 static void kvm_free_vcpus(struct kvm *kvm)
1228 {
1229         unsigned int i;
1230         struct kvm_vcpu *vcpu;
1231
1232         kvm_for_each_vcpu(i, vcpu, kvm)
1233                 kvm_arch_vcpu_destroy(vcpu);
1234
1235         mutex_lock(&kvm->lock);
1236         for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1237                 kvm->vcpus[i] = NULL;
1238
1239         atomic_set(&kvm->online_vcpus, 0);
1240         mutex_unlock(&kvm->lock);
1241 }
1242
1243 void kvm_arch_destroy_vm(struct kvm *kvm)
1244 {
1245         kvm_free_vcpus(kvm);
1246         free_page((unsigned long)kvm->arch.model.fac);
1247         free_page((unsigned long)(kvm->arch.sca));
1248         debug_unregister(kvm->arch.dbf);
1249         kfree(kvm->arch.crypto.crycb);
1250         if (!kvm_is_ucontrol(kvm))
1251                 gmap_free(kvm->arch.gmap);
1252         kvm_s390_destroy_adapters(kvm);
1253         kvm_s390_clear_float_irqs(kvm);
1254         KVM_EVENT(3, "vm 0x%p destroyed", kvm);
1255 }
1256
1257 /* Section: vcpu related */
1258 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1259 {
1260         vcpu->arch.gmap = gmap_alloc(current->mm, -1UL);
1261         if (!vcpu->arch.gmap)
1262                 return -ENOMEM;
1263         vcpu->arch.gmap->private = vcpu->kvm;
1264
1265         return 0;
1266 }
1267
1268 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1269 {
1270         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1271         kvm_clear_async_pf_completion_queue(vcpu);
1272         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
1273                                     KVM_SYNC_GPRS |
1274                                     KVM_SYNC_ACRS |
1275                                     KVM_SYNC_CRS |
1276                                     KVM_SYNC_ARCH0 |
1277                                     KVM_SYNC_PFAULT;
1278         if (test_kvm_facility(vcpu->kvm, 129))
1279                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
1280
1281         if (kvm_is_ucontrol(vcpu->kvm))
1282                 return __kvm_ucontrol_vcpu_init(vcpu);
1283
1284         return 0;
1285 }
1286
1287 /*
1288  * Backs up the current FP/VX register save area on a particular
1289  * destination.  Used to switch between different register save
1290  * areas.
1291  */
1292 static inline void save_fpu_to(struct fpu *dst)
1293 {
1294         dst->fpc = current->thread.fpu.fpc;
1295         dst->flags = current->thread.fpu.flags;
1296         dst->regs = current->thread.fpu.regs;
1297 }
1298
1299 /*
1300  * Switches the FP/VX register save area from which to lazy
1301  * restore register contents.
1302  */
1303 static inline void load_fpu_from(struct fpu *from)
1304 {
1305         current->thread.fpu.fpc = from->fpc;
1306         current->thread.fpu.flags = from->flags;
1307         current->thread.fpu.regs = from->regs;
1308 }
1309
1310 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1311 {
1312         /* Save host register state */
1313         save_fpu_regs();
1314         save_fpu_to(&vcpu->arch.host_fpregs);
1315
1316         if (test_kvm_facility(vcpu->kvm, 129)) {
1317                 current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
1318                 current->thread.fpu.flags = FPU_USE_VX;
1319                 /*
1320                  * Use the register save area in the SIE-control block
1321                  * for register restore and save in kvm_arch_vcpu_put()
1322                  */
1323                 current->thread.fpu.vxrs =
1324                         (__vector128 *)&vcpu->run->s.regs.vrs;
1325                 /* Always enable the vector extension for KVM */
1326                 __ctl_set_vx();
1327         } else
1328                 load_fpu_from(&vcpu->arch.guest_fpregs);
1329
1330         if (test_fp_ctl(current->thread.fpu.fpc))
1331                 /* User space provided an invalid FPC, let's clear it */
1332                 current->thread.fpu.fpc = 0;
1333
1334         save_access_regs(vcpu->arch.host_acrs);
1335         restore_access_regs(vcpu->run->s.regs.acrs);
1336         gmap_enable(vcpu->arch.gmap);
1337         atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1338 }
1339
1340 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1341 {
1342         atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1343         gmap_disable(vcpu->arch.gmap);
1344
1345         save_fpu_regs();
1346
1347         if (test_kvm_facility(vcpu->kvm, 129))
1348                 /*
1349                  * kvm_arch_vcpu_load() set up the register save area to
1350                  * the &vcpu->run->s.regs.vrs and, thus, the vector registers
1351                  * are already saved.  Only the floating-point control must be
1352                  * copied.
1353                  */
1354                 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
1355         else
1356                 save_fpu_to(&vcpu->arch.guest_fpregs);
1357         load_fpu_from(&vcpu->arch.host_fpregs);
1358
1359         save_access_regs(vcpu->run->s.regs.acrs);
1360         restore_access_regs(vcpu->arch.host_acrs);
1361 }
1362
1363 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
1364 {
1365         /* this equals initial cpu reset in pop, but we don't switch to ESA */
1366         vcpu->arch.sie_block->gpsw.mask = 0UL;
1367         vcpu->arch.sie_block->gpsw.addr = 0UL;
1368         kvm_s390_set_prefix(vcpu, 0);
1369         vcpu->arch.sie_block->cputm     = 0UL;
1370         vcpu->arch.sie_block->ckc       = 0UL;
1371         vcpu->arch.sie_block->todpr     = 0;
1372         memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
1373         vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
1374         vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
1375         vcpu->arch.guest_fpregs.fpc = 0;
1376         asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc));
1377         vcpu->arch.sie_block->gbea = 1;
1378         vcpu->arch.sie_block->pp = 0;
1379         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1380         kvm_clear_async_pf_completion_queue(vcpu);
1381         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
1382                 kvm_s390_vcpu_stop(vcpu);
1383         kvm_s390_clear_local_irqs(vcpu);
1384 }
1385
1386 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
1387 {
1388         mutex_lock(&vcpu->kvm->lock);
1389         preempt_disable();
1390         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
1391         preempt_enable();
1392         mutex_unlock(&vcpu->kvm->lock);
1393         if (!kvm_is_ucontrol(vcpu->kvm))
1394                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
1395 }
1396
1397 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
1398 {
1399         if (!test_kvm_facility(vcpu->kvm, 76))
1400                 return;
1401
1402         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
1403
1404         if (vcpu->kvm->arch.crypto.aes_kw)
1405                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
1406         if (vcpu->kvm->arch.crypto.dea_kw)
1407                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
1408
1409         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
1410 }
1411
1412 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
1413 {
1414         free_page(vcpu->arch.sie_block->cbrlo);
1415         vcpu->arch.sie_block->cbrlo = 0;
1416 }
1417
1418 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
1419 {
1420         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
1421         if (!vcpu->arch.sie_block->cbrlo)
1422                 return -ENOMEM;
1423
1424         vcpu->arch.sie_block->ecb2 |= 0x80;
1425         vcpu->arch.sie_block->ecb2 &= ~0x08;
1426         return 0;
1427 }
1428
1429 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
1430 {
1431         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
1432
1433         vcpu->arch.cpu_id = model->cpu_id;
1434         vcpu->arch.sie_block->ibc = model->ibc;
1435         vcpu->arch.sie_block->fac = (int) (long) model->fac->list;
1436 }
1437
1438 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1439 {
1440         int rc = 0;
1441
1442         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
1443                                                     CPUSTAT_SM |
1444                                                     CPUSTAT_STOPPED);
1445
1446         if (test_kvm_facility(vcpu->kvm, 78))
1447                 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
1448         else if (test_kvm_facility(vcpu->kvm, 8))
1449                 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
1450
1451         kvm_s390_vcpu_setup_model(vcpu);
1452
1453         vcpu->arch.sie_block->ecb   = 6;
1454         if (test_kvm_facility(vcpu->kvm, 50) && test_kvm_facility(vcpu->kvm, 73))
1455                 vcpu->arch.sie_block->ecb |= 0x10;
1456
1457         vcpu->arch.sie_block->ecb2  = 8;
1458         vcpu->arch.sie_block->eca   = 0xC1002000U;
1459         if (sclp.has_siif)
1460                 vcpu->arch.sie_block->eca |= 1;
1461         if (sclp.has_sigpif)
1462                 vcpu->arch.sie_block->eca |= 0x10000000U;
1463         if (test_kvm_facility(vcpu->kvm, 129)) {
1464                 vcpu->arch.sie_block->eca |= 0x00020000;
1465                 vcpu->arch.sie_block->ecd |= 0x20000000;
1466         }
1467         vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
1468
1469         if (vcpu->kvm->arch.use_cmma) {
1470                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
1471                 if (rc)
1472                         return rc;
1473         }
1474         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1475         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
1476
1477         kvm_s390_vcpu_crypto_setup(vcpu);
1478
1479         return rc;
1480 }
1481
1482 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
1483                                       unsigned int id)
1484 {
1485         struct kvm_vcpu *vcpu;
1486         struct sie_page *sie_page;
1487         int rc = -EINVAL;
1488
1489         if (id >= KVM_MAX_VCPUS)
1490                 goto out;
1491
1492         rc = -ENOMEM;
1493
1494         vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
1495         if (!vcpu)
1496                 goto out;
1497
1498         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
1499         if (!sie_page)
1500                 goto out_free_cpu;
1501
1502         vcpu->arch.sie_block = &sie_page->sie_block;
1503         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
1504
1505         vcpu->arch.sie_block->icpua = id;
1506         if (!kvm_is_ucontrol(kvm)) {
1507                 if (!kvm->arch.sca) {
1508                         WARN_ON_ONCE(1);
1509                         goto out_free_cpu;
1510                 }
1511                 if (!kvm->arch.sca->cpu[id].sda)
1512                         kvm->arch.sca->cpu[id].sda =
1513                                 (__u64) vcpu->arch.sie_block;
1514                 vcpu->arch.sie_block->scaoh =
1515                         (__u32)(((__u64)kvm->arch.sca) >> 32);
1516                 vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca;
1517                 set_bit(63 - id, (unsigned long *) &kvm->arch.sca->mcn);
1518         }
1519
1520         spin_lock_init(&vcpu->arch.local_int.lock);
1521         vcpu->arch.local_int.float_int = &kvm->arch.float_int;
1522         vcpu->arch.local_int.wq = &vcpu->wq;
1523         vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
1524
1525         /*
1526          * Allocate a save area for floating-point registers.  If the vector
1527          * extension is available, register contents are saved in the SIE
1528          * control block.  The allocated save area is still required in
1529          * particular places, for example, in kvm_s390_vcpu_store_status().
1530          */
1531         vcpu->arch.guest_fpregs.fprs = kzalloc(sizeof(freg_t) * __NUM_FPRS,
1532                                                GFP_KERNEL);
1533         if (!vcpu->arch.guest_fpregs.fprs) {
1534                 rc = -ENOMEM;
1535                 goto out_free_sie_block;
1536         }
1537
1538         rc = kvm_vcpu_init(vcpu, kvm, id);
1539         if (rc)
1540                 goto out_free_sie_block;
1541         VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu,
1542                  vcpu->arch.sie_block);
1543         trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
1544
1545         return vcpu;
1546 out_free_sie_block:
1547         free_page((unsigned long)(vcpu->arch.sie_block));
1548 out_free_cpu:
1549         kmem_cache_free(kvm_vcpu_cache, vcpu);
1550 out:
1551         return ERR_PTR(rc);
1552 }
1553
1554 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
1555 {
1556         return kvm_s390_vcpu_has_irq(vcpu, 0);
1557 }
1558
1559 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
1560 {
1561         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1562         exit_sie(vcpu);
1563 }
1564
1565 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
1566 {
1567         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1568 }
1569
1570 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
1571 {
1572         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
1573         exit_sie(vcpu);
1574 }
1575
1576 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
1577 {
1578         atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
1579 }
1580
1581 /*
1582  * Kick a guest cpu out of SIE and wait until SIE is not running.
1583  * If the CPU is not running (e.g. waiting as idle) the function will
1584  * return immediately. */
1585 void exit_sie(struct kvm_vcpu *vcpu)
1586 {
1587         atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
1588         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
1589                 cpu_relax();
1590 }
1591
1592 /* Kick a guest cpu out of SIE to process a request synchronously */
1593 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
1594 {
1595         kvm_make_request(req, vcpu);
1596         kvm_s390_vcpu_request(vcpu);
1597 }
1598
1599 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address)
1600 {
1601         int i;
1602         struct kvm *kvm = gmap->private;
1603         struct kvm_vcpu *vcpu;
1604
1605         kvm_for_each_vcpu(i, vcpu, kvm) {
1606                 /* match against both prefix pages */
1607                 if (kvm_s390_get_prefix(vcpu) == (address & ~0x1000UL)) {
1608                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx", address);
1609                         kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
1610                 }
1611         }
1612 }
1613
1614 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
1615 {
1616         /* kvm common code refers to this, but never calls it */
1617         BUG();
1618         return 0;
1619 }
1620
1621 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
1622                                            struct kvm_one_reg *reg)
1623 {
1624         int r = -EINVAL;
1625
1626         switch (reg->id) {
1627         case KVM_REG_S390_TODPR:
1628                 r = put_user(vcpu->arch.sie_block->todpr,
1629                              (u32 __user *)reg->addr);
1630                 break;
1631         case KVM_REG_S390_EPOCHDIFF:
1632                 r = put_user(vcpu->arch.sie_block->epoch,
1633                              (u64 __user *)reg->addr);
1634                 break;
1635         case KVM_REG_S390_CPU_TIMER:
1636                 r = put_user(vcpu->arch.sie_block->cputm,
1637                              (u64 __user *)reg->addr);
1638                 break;
1639         case KVM_REG_S390_CLOCK_COMP:
1640                 r = put_user(vcpu->arch.sie_block->ckc,
1641                              (u64 __user *)reg->addr);
1642                 break;
1643         case KVM_REG_S390_PFTOKEN:
1644                 r = put_user(vcpu->arch.pfault_token,
1645                              (u64 __user *)reg->addr);
1646                 break;
1647         case KVM_REG_S390_PFCOMPARE:
1648                 r = put_user(vcpu->arch.pfault_compare,
1649                              (u64 __user *)reg->addr);
1650                 break;
1651         case KVM_REG_S390_PFSELECT:
1652                 r = put_user(vcpu->arch.pfault_select,
1653                              (u64 __user *)reg->addr);
1654                 break;
1655         case KVM_REG_S390_PP:
1656                 r = put_user(vcpu->arch.sie_block->pp,
1657                              (u64 __user *)reg->addr);
1658                 break;
1659         case KVM_REG_S390_GBEA:
1660                 r = put_user(vcpu->arch.sie_block->gbea,
1661                              (u64 __user *)reg->addr);
1662                 break;
1663         default:
1664                 break;
1665         }
1666
1667         return r;
1668 }
1669
1670 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
1671                                            struct kvm_one_reg *reg)
1672 {
1673         int r = -EINVAL;
1674
1675         switch (reg->id) {
1676         case KVM_REG_S390_TODPR:
1677                 r = get_user(vcpu->arch.sie_block->todpr,
1678                              (u32 __user *)reg->addr);
1679                 break;
1680         case KVM_REG_S390_EPOCHDIFF:
1681                 r = get_user(vcpu->arch.sie_block->epoch,
1682                              (u64 __user *)reg->addr);
1683                 break;
1684         case KVM_REG_S390_CPU_TIMER:
1685                 r = get_user(vcpu->arch.sie_block->cputm,
1686                              (u64 __user *)reg->addr);
1687                 break;
1688         case KVM_REG_S390_CLOCK_COMP:
1689                 r = get_user(vcpu->arch.sie_block->ckc,
1690                              (u64 __user *)reg->addr);
1691                 break;
1692         case KVM_REG_S390_PFTOKEN:
1693                 r = get_user(vcpu->arch.pfault_token,
1694                              (u64 __user *)reg->addr);
1695                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
1696                         kvm_clear_async_pf_completion_queue(vcpu);
1697                 break;
1698         case KVM_REG_S390_PFCOMPARE:
1699                 r = get_user(vcpu->arch.pfault_compare,
1700                              (u64 __user *)reg->addr);
1701                 break;
1702         case KVM_REG_S390_PFSELECT:
1703                 r = get_user(vcpu->arch.pfault_select,
1704                              (u64 __user *)reg->addr);
1705                 break;
1706         case KVM_REG_S390_PP:
1707                 r = get_user(vcpu->arch.sie_block->pp,
1708                              (u64 __user *)reg->addr);
1709                 break;
1710         case KVM_REG_S390_GBEA:
1711                 r = get_user(vcpu->arch.sie_block->gbea,
1712                              (u64 __user *)reg->addr);
1713                 break;
1714         default:
1715                 break;
1716         }
1717
1718         return r;
1719 }
1720
1721 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
1722 {
1723         kvm_s390_vcpu_initial_reset(vcpu);
1724         return 0;
1725 }
1726
1727 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1728 {
1729         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
1730         return 0;
1731 }
1732
1733 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1734 {
1735         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
1736         return 0;
1737 }
1738
1739 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
1740                                   struct kvm_sregs *sregs)
1741 {
1742         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
1743         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
1744         restore_access_regs(vcpu->run->s.regs.acrs);
1745         return 0;
1746 }
1747
1748 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
1749                                   struct kvm_sregs *sregs)
1750 {
1751         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
1752         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
1753         return 0;
1754 }
1755
1756 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1757 {
1758         if (test_fp_ctl(fpu->fpc))
1759                 return -EINVAL;
1760         memcpy(vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs));
1761         vcpu->arch.guest_fpregs.fpc = fpu->fpc;
1762         save_fpu_regs();
1763         load_fpu_from(&vcpu->arch.guest_fpregs);
1764         return 0;
1765 }
1766
1767 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1768 {
1769         memcpy(&fpu->fprs, vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs));
1770         fpu->fpc = vcpu->arch.guest_fpregs.fpc;
1771         return 0;
1772 }
1773
1774 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
1775 {
1776         int rc = 0;
1777
1778         if (!is_vcpu_stopped(vcpu))
1779                 rc = -EBUSY;
1780         else {
1781                 vcpu->run->psw_mask = psw.mask;
1782                 vcpu->run->psw_addr = psw.addr;
1783         }
1784         return rc;
1785 }
1786
1787 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
1788                                   struct kvm_translation *tr)
1789 {
1790         return -EINVAL; /* not implemented yet */
1791 }
1792
1793 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
1794                               KVM_GUESTDBG_USE_HW_BP | \
1795                               KVM_GUESTDBG_ENABLE)
1796
1797 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
1798                                         struct kvm_guest_debug *dbg)
1799 {
1800         int rc = 0;
1801
1802         vcpu->guest_debug = 0;
1803         kvm_s390_clear_bp_data(vcpu);
1804
1805         if (dbg->control & ~VALID_GUESTDBG_FLAGS)
1806                 return -EINVAL;
1807
1808         if (dbg->control & KVM_GUESTDBG_ENABLE) {
1809                 vcpu->guest_debug = dbg->control;
1810                 /* enforce guest PER */
1811                 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1812
1813                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
1814                         rc = kvm_s390_import_bp_data(vcpu, dbg);
1815         } else {
1816                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1817                 vcpu->arch.guestdbg.last_bp = 0;
1818         }
1819
1820         if (rc) {
1821                 vcpu->guest_debug = 0;
1822                 kvm_s390_clear_bp_data(vcpu);
1823                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1824         }
1825
1826         return rc;
1827 }
1828
1829 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
1830                                     struct kvm_mp_state *mp_state)
1831 {
1832         /* CHECK_STOP and LOAD are not supported yet */
1833         return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
1834                                        KVM_MP_STATE_OPERATING;
1835 }
1836
1837 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
1838                                     struct kvm_mp_state *mp_state)
1839 {
1840         int rc = 0;
1841
1842         /* user space knows about this interface - let it control the state */
1843         vcpu->kvm->arch.user_cpu_state_ctrl = 1;
1844
1845         switch (mp_state->mp_state) {
1846         case KVM_MP_STATE_STOPPED:
1847                 kvm_s390_vcpu_stop(vcpu);
1848                 break;
1849         case KVM_MP_STATE_OPERATING:
1850                 kvm_s390_vcpu_start(vcpu);
1851                 break;
1852         case KVM_MP_STATE_LOAD:
1853         case KVM_MP_STATE_CHECK_STOP:
1854                 /* fall through - CHECK_STOP and LOAD are not supported yet */
1855         default:
1856                 rc = -ENXIO;
1857         }
1858
1859         return rc;
1860 }
1861
1862 static bool ibs_enabled(struct kvm_vcpu *vcpu)
1863 {
1864         return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
1865 }
1866
1867 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
1868 {
1869 retry:
1870         kvm_s390_vcpu_request_handled(vcpu);
1871         if (!vcpu->requests)
1872                 return 0;
1873         /*
1874          * We use MMU_RELOAD just to re-arm the ipte notifier for the
1875          * guest prefix page. gmap_ipte_notify will wait on the ptl lock.
1876          * This ensures that the ipte instruction for this request has
1877          * already finished. We might race against a second unmapper that
1878          * wants to set the blocking bit. Lets just retry the request loop.
1879          */
1880         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
1881                 int rc;
1882                 rc = gmap_ipte_notify(vcpu->arch.gmap,
1883                                       kvm_s390_get_prefix(vcpu),
1884                                       PAGE_SIZE * 2);
1885                 if (rc)
1886                         return rc;
1887                 goto retry;
1888         }
1889
1890         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
1891                 vcpu->arch.sie_block->ihcpu = 0xffff;
1892                 goto retry;
1893         }
1894
1895         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
1896                 if (!ibs_enabled(vcpu)) {
1897                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
1898                         atomic_or(CPUSTAT_IBS,
1899                                         &vcpu->arch.sie_block->cpuflags);
1900                 }
1901                 goto retry;
1902         }
1903
1904         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
1905                 if (ibs_enabled(vcpu)) {
1906                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
1907                         atomic_andnot(CPUSTAT_IBS,
1908                                           &vcpu->arch.sie_block->cpuflags);
1909                 }
1910                 goto retry;
1911         }
1912
1913         /* nothing to do, just clear the request */
1914         clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
1915
1916         return 0;
1917 }
1918
1919 /**
1920  * kvm_arch_fault_in_page - fault-in guest page if necessary
1921  * @vcpu: The corresponding virtual cpu
1922  * @gpa: Guest physical address
1923  * @writable: Whether the page should be writable or not
1924  *
1925  * Make sure that a guest page has been faulted-in on the host.
1926  *
1927  * Return: Zero on success, negative error code otherwise.
1928  */
1929 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
1930 {
1931         return gmap_fault(vcpu->arch.gmap, gpa,
1932                           writable ? FAULT_FLAG_WRITE : 0);
1933 }
1934
1935 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
1936                                       unsigned long token)
1937 {
1938         struct kvm_s390_interrupt inti;
1939         struct kvm_s390_irq irq;
1940
1941         if (start_token) {
1942                 irq.u.ext.ext_params2 = token;
1943                 irq.type = KVM_S390_INT_PFAULT_INIT;
1944                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
1945         } else {
1946                 inti.type = KVM_S390_INT_PFAULT_DONE;
1947                 inti.parm64 = token;
1948                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
1949         }
1950 }
1951
1952 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
1953                                      struct kvm_async_pf *work)
1954 {
1955         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
1956         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
1957 }
1958
1959 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
1960                                  struct kvm_async_pf *work)
1961 {
1962         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
1963         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
1964 }
1965
1966 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
1967                                struct kvm_async_pf *work)
1968 {
1969         /* s390 will always inject the page directly */
1970 }
1971
1972 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
1973 {
1974         /*
1975          * s390 will always inject the page directly,
1976          * but we still want check_async_completion to cleanup
1977          */
1978         return true;
1979 }
1980
1981 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
1982 {
1983         hva_t hva;
1984         struct kvm_arch_async_pf arch;
1985         int rc;
1986
1987         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
1988                 return 0;
1989         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
1990             vcpu->arch.pfault_compare)
1991                 return 0;
1992         if (psw_extint_disabled(vcpu))
1993                 return 0;
1994         if (kvm_s390_vcpu_has_irq(vcpu, 0))
1995                 return 0;
1996         if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
1997                 return 0;
1998         if (!vcpu->arch.gmap->pfault_enabled)
1999                 return 0;
2000
2001         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
2002         hva += current->thread.gmap_addr & ~PAGE_MASK;
2003         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
2004                 return 0;
2005
2006         rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
2007         return rc;
2008 }
2009
2010 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
2011 {
2012         int rc, cpuflags;
2013
2014         /*
2015          * On s390 notifications for arriving pages will be delivered directly
2016          * to the guest but the house keeping for completed pfaults is
2017          * handled outside the worker.
2018          */
2019         kvm_check_async_pf_completion(vcpu);
2020
2021         memcpy(&vcpu->arch.sie_block->gg14, &vcpu->run->s.regs.gprs[14], 16);
2022
2023         if (need_resched())
2024                 schedule();
2025
2026         if (test_cpu_flag(CIF_MCCK_PENDING))
2027                 s390_handle_mcck();
2028
2029         if (!kvm_is_ucontrol(vcpu->kvm)) {
2030                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
2031                 if (rc)
2032                         return rc;
2033         }
2034
2035         rc = kvm_s390_handle_requests(vcpu);
2036         if (rc)
2037                 return rc;
2038
2039         if (guestdbg_enabled(vcpu)) {
2040                 kvm_s390_backup_guest_per_regs(vcpu);
2041                 kvm_s390_patch_guest_per_regs(vcpu);
2042         }
2043
2044         vcpu->arch.sie_block->icptcode = 0;
2045         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
2046         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
2047         trace_kvm_s390_sie_enter(vcpu, cpuflags);
2048
2049         return 0;
2050 }
2051
2052 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
2053 {
2054         psw_t *psw = &vcpu->arch.sie_block->gpsw;
2055         u8 opcode;
2056         int rc;
2057
2058         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
2059         trace_kvm_s390_sie_fault(vcpu);
2060
2061         /*
2062          * We want to inject an addressing exception, which is defined as a
2063          * suppressing or terminating exception. However, since we came here
2064          * by a DAT access exception, the PSW still points to the faulting
2065          * instruction since DAT exceptions are nullifying. So we've got
2066          * to look up the current opcode to get the length of the instruction
2067          * to be able to forward the PSW.
2068          */
2069         rc = read_guest(vcpu, psw->addr, 0, &opcode, 1);
2070         if (rc)
2071                 return kvm_s390_inject_prog_cond(vcpu, rc);
2072         psw->addr = __rewind_psw(*psw, -insn_length(opcode));
2073
2074         return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
2075 }
2076
2077 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
2078 {
2079         int rc = -1;
2080
2081         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
2082                    vcpu->arch.sie_block->icptcode);
2083         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
2084
2085         if (guestdbg_enabled(vcpu))
2086                 kvm_s390_restore_guest_per_regs(vcpu);
2087
2088         if (exit_reason >= 0) {
2089                 rc = 0;
2090         } else if (kvm_is_ucontrol(vcpu->kvm)) {
2091                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
2092                 vcpu->run->s390_ucontrol.trans_exc_code =
2093                                                 current->thread.gmap_addr;
2094                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
2095                 rc = -EREMOTE;
2096
2097         } else if (current->thread.gmap_pfault) {
2098                 trace_kvm_s390_major_guest_pfault(vcpu);
2099                 current->thread.gmap_pfault = 0;
2100                 if (kvm_arch_setup_async_pf(vcpu)) {
2101                         rc = 0;
2102                 } else {
2103                         gpa_t gpa = current->thread.gmap_addr;
2104                         rc = kvm_arch_fault_in_page(vcpu, gpa, 1);
2105                 }
2106         }
2107
2108         if (rc == -1)
2109                 rc = vcpu_post_run_fault_in_sie(vcpu);
2110
2111         memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16);
2112
2113         if (rc == 0) {
2114                 if (kvm_is_ucontrol(vcpu->kvm))
2115                         /* Don't exit for host interrupts. */
2116                         rc = vcpu->arch.sie_block->icptcode ? -EOPNOTSUPP : 0;
2117                 else
2118                         rc = kvm_handle_sie_intercept(vcpu);
2119         }
2120
2121         return rc;
2122 }
2123
2124 static int __vcpu_run(struct kvm_vcpu *vcpu)
2125 {
2126         int rc, exit_reason;
2127
2128         /*
2129          * We try to hold kvm->srcu during most of vcpu_run (except when run-
2130          * ning the guest), so that memslots (and other stuff) are protected
2131          */
2132         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2133
2134         do {
2135                 rc = vcpu_pre_run(vcpu);
2136                 if (rc)
2137                         break;
2138
2139                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2140                 /*
2141                  * As PF_VCPU will be used in fault handler, between
2142                  * guest_enter and guest_exit should be no uaccess.
2143                  */
2144                 local_irq_disable();
2145                 __kvm_guest_enter();
2146                 local_irq_enable();
2147                 exit_reason = sie64a(vcpu->arch.sie_block,
2148                                      vcpu->run->s.regs.gprs);
2149                 local_irq_disable();
2150                 __kvm_guest_exit();
2151                 local_irq_enable();
2152                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2153
2154                 rc = vcpu_post_run(vcpu, exit_reason);
2155         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
2156
2157         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2158         return rc;
2159 }
2160
2161 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2162 {
2163         vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
2164         vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
2165         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
2166                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
2167         if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
2168                 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
2169                 /* some control register changes require a tlb flush */
2170                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2171         }
2172         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
2173                 vcpu->arch.sie_block->cputm = kvm_run->s.regs.cputm;
2174                 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
2175                 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
2176                 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
2177                 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
2178         }
2179         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
2180                 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
2181                 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
2182                 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
2183                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2184                         kvm_clear_async_pf_completion_queue(vcpu);
2185         }
2186         kvm_run->kvm_dirty_regs = 0;
2187 }
2188
2189 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2190 {
2191         kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
2192         kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
2193         kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
2194         memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
2195         kvm_run->s.regs.cputm = vcpu->arch.sie_block->cputm;
2196         kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
2197         kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
2198         kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
2199         kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
2200         kvm_run->s.regs.pft = vcpu->arch.pfault_token;
2201         kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
2202         kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
2203 }
2204
2205 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2206 {
2207         int rc;
2208         sigset_t sigsaved;
2209
2210         if (guestdbg_exit_pending(vcpu)) {
2211                 kvm_s390_prepare_debug_exit(vcpu);
2212                 return 0;
2213         }
2214
2215         if (vcpu->sigset_active)
2216                 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
2217
2218         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
2219                 kvm_s390_vcpu_start(vcpu);
2220         } else if (is_vcpu_stopped(vcpu)) {
2221                 pr_err_ratelimited("can't run stopped vcpu %d\n",
2222                                    vcpu->vcpu_id);
2223                 return -EINVAL;
2224         }
2225
2226         sync_regs(vcpu, kvm_run);
2227
2228         might_fault();
2229         rc = __vcpu_run(vcpu);
2230
2231         if (signal_pending(current) && !rc) {
2232                 kvm_run->exit_reason = KVM_EXIT_INTR;
2233                 rc = -EINTR;
2234         }
2235
2236         if (guestdbg_exit_pending(vcpu) && !rc)  {
2237                 kvm_s390_prepare_debug_exit(vcpu);
2238                 rc = 0;
2239         }
2240
2241         if (rc == -EOPNOTSUPP) {
2242                 /* intercept cannot be handled in-kernel, prepare kvm-run */
2243                 kvm_run->exit_reason         = KVM_EXIT_S390_SIEIC;
2244                 kvm_run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
2245                 kvm_run->s390_sieic.ipa      = vcpu->arch.sie_block->ipa;
2246                 kvm_run->s390_sieic.ipb      = vcpu->arch.sie_block->ipb;
2247                 rc = 0;
2248         }
2249
2250         if (rc == -EREMOTE) {
2251                 /* intercept was handled, but userspace support is needed
2252                  * kvm_run has been prepared by the handler */
2253                 rc = 0;
2254         }
2255
2256         store_regs(vcpu, kvm_run);
2257
2258         if (vcpu->sigset_active)
2259                 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
2260
2261         vcpu->stat.exit_userspace++;
2262         return rc;
2263 }
2264
2265 /*
2266  * store status at address
2267  * we use have two special cases:
2268  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
2269  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
2270  */
2271 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
2272 {
2273         unsigned char archmode = 1;
2274         unsigned int px;
2275         u64 clkcomp;
2276         int rc;
2277
2278         if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
2279                 if (write_guest_abs(vcpu, 163, &archmode, 1))
2280                         return -EFAULT;
2281                 gpa = SAVE_AREA_BASE;
2282         } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
2283                 if (write_guest_real(vcpu, 163, &archmode, 1))
2284                         return -EFAULT;
2285                 gpa = kvm_s390_real_to_abs(vcpu, SAVE_AREA_BASE);
2286         }
2287         rc = write_guest_abs(vcpu, gpa + offsetof(struct save_area, fp_regs),
2288                              vcpu->arch.guest_fpregs.fprs, 128);
2289         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, gp_regs),
2290                               vcpu->run->s.regs.gprs, 128);
2291         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, psw),
2292                               &vcpu->arch.sie_block->gpsw, 16);
2293         px = kvm_s390_get_prefix(vcpu);
2294         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, pref_reg),
2295                               &px, 4);
2296         rc |= write_guest_abs(vcpu,
2297                               gpa + offsetof(struct save_area, fp_ctrl_reg),
2298                               &vcpu->arch.guest_fpregs.fpc, 4);
2299         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, tod_reg),
2300                               &vcpu->arch.sie_block->todpr, 4);
2301         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, timer),
2302                               &vcpu->arch.sie_block->cputm, 8);
2303         clkcomp = vcpu->arch.sie_block->ckc >> 8;
2304         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, clk_cmp),
2305                               &clkcomp, 8);
2306         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, acc_regs),
2307                               &vcpu->run->s.regs.acrs, 64);
2308         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, ctrl_regs),
2309                               &vcpu->arch.sie_block->gcr, 128);
2310         return rc ? -EFAULT : 0;
2311 }
2312
2313 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
2314 {
2315         /*
2316          * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
2317          * copying in vcpu load/put. Lets update our copies before we save
2318          * it into the save area
2319          */
2320         save_fpu_regs();
2321         if (test_kvm_facility(vcpu->kvm, 129)) {
2322                 /*
2323                  * If the vector extension is available, the vector registers
2324                  * which overlaps with floating-point registers are saved in
2325                  * the SIE-control block.  Hence, extract the floating-point
2326                  * registers and the FPC value and store them in the
2327                  * guest_fpregs structure.
2328                  */
2329                 WARN_ON(!is_vx_task(current));    /* XXX remove later */
2330                 vcpu->arch.guest_fpregs.fpc = current->thread.fpu.fpc;
2331                 convert_vx_to_fp(vcpu->arch.guest_fpregs.fprs,
2332                                  current->thread.fpu.vxrs);
2333         } else
2334                 save_fpu_to(&vcpu->arch.guest_fpregs);
2335         save_access_regs(vcpu->run->s.regs.acrs);
2336
2337         return kvm_s390_store_status_unloaded(vcpu, addr);
2338 }
2339
2340 /*
2341  * store additional status at address
2342  */
2343 int kvm_s390_store_adtl_status_unloaded(struct kvm_vcpu *vcpu,
2344                                         unsigned long gpa)
2345 {
2346         /* Only bits 0-53 are used for address formation */
2347         if (!(gpa & ~0x3ff))
2348                 return 0;
2349
2350         return write_guest_abs(vcpu, gpa & ~0x3ff,
2351                                (void *)&vcpu->run->s.regs.vrs, 512);
2352 }
2353
2354 int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu *vcpu, unsigned long addr)
2355 {
2356         if (!test_kvm_facility(vcpu->kvm, 129))
2357                 return 0;
2358
2359         /*
2360          * The guest VXRS are in the host VXRs due to the lazy
2361          * copying in vcpu load/put. We can simply call save_fpu_regs()
2362          * to save the current register state because we are in the
2363          * middle of a load/put cycle.
2364          *
2365          * Let's update our copies before we save it into the save area.
2366          */
2367         save_fpu_regs();
2368
2369         return kvm_s390_store_adtl_status_unloaded(vcpu, addr);
2370 }
2371
2372 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2373 {
2374         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
2375         kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
2376 }
2377
2378 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
2379 {
2380         unsigned int i;
2381         struct kvm_vcpu *vcpu;
2382
2383         kvm_for_each_vcpu(i, vcpu, kvm) {
2384                 __disable_ibs_on_vcpu(vcpu);
2385         }
2386 }
2387
2388 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2389 {
2390         kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
2391         kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
2392 }
2393
2394 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
2395 {
2396         int i, online_vcpus, started_vcpus = 0;
2397
2398         if (!is_vcpu_stopped(vcpu))
2399                 return;
2400
2401         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
2402         /* Only one cpu at a time may enter/leave the STOPPED state. */
2403         spin_lock(&vcpu->kvm->arch.start_stop_lock);
2404         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2405
2406         for (i = 0; i < online_vcpus; i++) {
2407                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
2408                         started_vcpus++;
2409         }
2410
2411         if (started_vcpus == 0) {
2412                 /* we're the only active VCPU -> speed it up */
2413                 __enable_ibs_on_vcpu(vcpu);
2414         } else if (started_vcpus == 1) {
2415                 /*
2416                  * As we are starting a second VCPU, we have to disable
2417                  * the IBS facility on all VCPUs to remove potentially
2418                  * oustanding ENABLE requests.
2419                  */
2420                 __disable_ibs_on_all_vcpus(vcpu->kvm);
2421         }
2422
2423         atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2424         /*
2425          * Another VCPU might have used IBS while we were offline.
2426          * Let's play safe and flush the VCPU at startup.
2427          */
2428         kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2429         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2430         return;
2431 }
2432
2433 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
2434 {
2435         int i, online_vcpus, started_vcpus = 0;
2436         struct kvm_vcpu *started_vcpu = NULL;
2437
2438         if (is_vcpu_stopped(vcpu))
2439                 return;
2440
2441         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
2442         /* Only one cpu at a time may enter/leave the STOPPED state. */
2443         spin_lock(&vcpu->kvm->arch.start_stop_lock);
2444         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2445
2446         /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
2447         kvm_s390_clear_stop_irq(vcpu);
2448
2449         atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2450         __disable_ibs_on_vcpu(vcpu);
2451
2452         for (i = 0; i < online_vcpus; i++) {
2453                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
2454                         started_vcpus++;
2455                         started_vcpu = vcpu->kvm->vcpus[i];
2456                 }
2457         }
2458
2459         if (started_vcpus == 1) {
2460                 /*
2461                  * As we only have one VCPU left, we want to enable the
2462                  * IBS facility for that VCPU to speed it up.
2463                  */
2464                 __enable_ibs_on_vcpu(started_vcpu);
2465         }
2466
2467         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2468         return;
2469 }
2470
2471 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
2472                                      struct kvm_enable_cap *cap)
2473 {
2474         int r;
2475
2476         if (cap->flags)
2477                 return -EINVAL;
2478
2479         switch (cap->cap) {
2480         case KVM_CAP_S390_CSS_SUPPORT:
2481                 if (!vcpu->kvm->arch.css_support) {
2482                         vcpu->kvm->arch.css_support = 1;
2483                         VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
2484                         trace_kvm_s390_enable_css(vcpu->kvm);
2485                 }
2486                 r = 0;
2487                 break;
2488         default:
2489                 r = -EINVAL;
2490                 break;
2491         }
2492         return r;
2493 }
2494
2495 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
2496                                   struct kvm_s390_mem_op *mop)
2497 {
2498         void __user *uaddr = (void __user *)mop->buf;
2499         void *tmpbuf = NULL;
2500         int r, srcu_idx;
2501         const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
2502                                     | KVM_S390_MEMOP_F_CHECK_ONLY;
2503
2504         if (mop->flags & ~supported_flags)
2505                 return -EINVAL;
2506
2507         if (mop->size > MEM_OP_MAX_SIZE)
2508                 return -E2BIG;
2509
2510         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
2511                 tmpbuf = vmalloc(mop->size);
2512                 if (!tmpbuf)
2513                         return -ENOMEM;
2514         }
2515
2516         srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2517
2518         switch (mop->op) {
2519         case KVM_S390_MEMOP_LOGICAL_READ:
2520                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2521                         r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, false);
2522                         break;
2523                 }
2524                 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2525                 if (r == 0) {
2526                         if (copy_to_user(uaddr, tmpbuf, mop->size))
2527                                 r = -EFAULT;
2528                 }
2529                 break;
2530         case KVM_S390_MEMOP_LOGICAL_WRITE:
2531                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2532                         r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, true);
2533                         break;
2534                 }
2535                 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
2536                         r = -EFAULT;
2537                         break;
2538                 }
2539                 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2540                 break;
2541         default:
2542                 r = -EINVAL;
2543         }
2544
2545         srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
2546
2547         if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
2548                 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
2549
2550         vfree(tmpbuf);
2551         return r;
2552 }
2553
2554 long kvm_arch_vcpu_ioctl(struct file *filp,
2555                          unsigned int ioctl, unsigned long arg)
2556 {
2557         struct kvm_vcpu *vcpu = filp->private_data;
2558         void __user *argp = (void __user *)arg;
2559         int idx;
2560         long r;
2561
2562         switch (ioctl) {
2563         case KVM_S390_IRQ: {
2564                 struct kvm_s390_irq s390irq;
2565
2566                 r = -EFAULT;
2567                 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
2568                         break;
2569                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
2570                 break;
2571         }
2572         case KVM_S390_INTERRUPT: {
2573                 struct kvm_s390_interrupt s390int;
2574                 struct kvm_s390_irq s390irq;
2575
2576                 r = -EFAULT;
2577                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
2578                         break;
2579                 if (s390int_to_s390irq(&s390int, &s390irq))
2580                         return -EINVAL;
2581                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
2582                 break;
2583         }
2584         case KVM_S390_STORE_STATUS:
2585                 idx = srcu_read_lock(&vcpu->kvm->srcu);
2586                 r = kvm_s390_vcpu_store_status(vcpu, arg);
2587                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
2588                 break;
2589         case KVM_S390_SET_INITIAL_PSW: {
2590                 psw_t psw;
2591
2592                 r = -EFAULT;
2593                 if (copy_from_user(&psw, argp, sizeof(psw)))
2594                         break;
2595                 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
2596                 break;
2597         }
2598         case KVM_S390_INITIAL_RESET:
2599                 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
2600                 break;
2601         case KVM_SET_ONE_REG:
2602         case KVM_GET_ONE_REG: {
2603                 struct kvm_one_reg reg;
2604                 r = -EFAULT;
2605                 if (copy_from_user(&reg, argp, sizeof(reg)))
2606                         break;
2607                 if (ioctl == KVM_SET_ONE_REG)
2608                         r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
2609                 else
2610                         r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
2611                 break;
2612         }
2613 #ifdef CONFIG_KVM_S390_UCONTROL
2614         case KVM_S390_UCAS_MAP: {
2615                 struct kvm_s390_ucas_mapping ucasmap;
2616
2617                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
2618                         r = -EFAULT;
2619                         break;
2620                 }
2621
2622                 if (!kvm_is_ucontrol(vcpu->kvm)) {
2623                         r = -EINVAL;
2624                         break;
2625                 }
2626
2627                 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
2628                                      ucasmap.vcpu_addr, ucasmap.length);
2629                 break;
2630         }
2631         case KVM_S390_UCAS_UNMAP: {
2632                 struct kvm_s390_ucas_mapping ucasmap;
2633
2634                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
2635                         r = -EFAULT;
2636                         break;
2637                 }
2638
2639                 if (!kvm_is_ucontrol(vcpu->kvm)) {
2640                         r = -EINVAL;
2641                         break;
2642                 }
2643
2644                 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
2645                         ucasmap.length);
2646                 break;
2647         }
2648 #endif
2649         case KVM_S390_VCPU_FAULT: {
2650                 r = gmap_fault(vcpu->arch.gmap, arg, 0);
2651                 break;
2652         }
2653         case KVM_ENABLE_CAP:
2654         {
2655                 struct kvm_enable_cap cap;
2656                 r = -EFAULT;
2657                 if (copy_from_user(&cap, argp, sizeof(cap)))
2658                         break;
2659                 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
2660                 break;
2661         }
2662         case KVM_S390_MEM_OP: {
2663                 struct kvm_s390_mem_op mem_op;
2664
2665                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
2666                         r = kvm_s390_guest_mem_op(vcpu, &mem_op);
2667                 else
2668                         r = -EFAULT;
2669                 break;
2670         }
2671         case KVM_S390_SET_IRQ_STATE: {
2672                 struct kvm_s390_irq_state irq_state;
2673
2674                 r = -EFAULT;
2675                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
2676                         break;
2677                 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
2678                     irq_state.len == 0 ||
2679                     irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
2680                         r = -EINVAL;
2681                         break;
2682                 }
2683                 r = kvm_s390_set_irq_state(vcpu,
2684                                            (void __user *) irq_state.buf,
2685                                            irq_state.len);
2686                 break;
2687         }
2688         case KVM_S390_GET_IRQ_STATE: {
2689                 struct kvm_s390_irq_state irq_state;
2690
2691                 r = -EFAULT;
2692                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
2693                         break;
2694                 if (irq_state.len == 0) {
2695                         r = -EINVAL;
2696                         break;
2697                 }
2698                 r = kvm_s390_get_irq_state(vcpu,
2699                                            (__u8 __user *)  irq_state.buf,
2700                                            irq_state.len);
2701                 break;
2702         }
2703         default:
2704                 r = -ENOTTY;
2705         }
2706         return r;
2707 }
2708
2709 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
2710 {
2711 #ifdef CONFIG_KVM_S390_UCONTROL
2712         if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
2713                  && (kvm_is_ucontrol(vcpu->kvm))) {
2714                 vmf->page = virt_to_page(vcpu->arch.sie_block);
2715                 get_page(vmf->page);
2716                 return 0;
2717         }
2718 #endif
2719         return VM_FAULT_SIGBUS;
2720 }
2721
2722 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
2723                             unsigned long npages)
2724 {
2725         return 0;
2726 }
2727
2728 /* Section: memory related */
2729 int kvm_arch_prepare_memory_region(struct kvm *kvm,
2730                                    struct kvm_memory_slot *memslot,
2731                                    const struct kvm_userspace_memory_region *mem,
2732                                    enum kvm_mr_change change)
2733 {
2734         /* A few sanity checks. We can have memory slots which have to be
2735            located/ended at a segment boundary (1MB). The memory in userland is
2736            ok to be fragmented into various different vmas. It is okay to mmap()
2737            and munmap() stuff in this slot after doing this call at any time */
2738
2739         if (mem->userspace_addr & 0xffffful)
2740                 return -EINVAL;
2741
2742         if (mem->memory_size & 0xffffful)
2743                 return -EINVAL;
2744
2745         return 0;
2746 }
2747
2748 void kvm_arch_commit_memory_region(struct kvm *kvm,
2749                                 const struct kvm_userspace_memory_region *mem,
2750                                 const struct kvm_memory_slot *old,
2751                                 const struct kvm_memory_slot *new,
2752                                 enum kvm_mr_change change)
2753 {
2754         int rc;
2755
2756         /* If the basics of the memslot do not change, we do not want
2757          * to update the gmap. Every update causes several unnecessary
2758          * segment translation exceptions. This is usually handled just
2759          * fine by the normal fault handler + gmap, but it will also
2760          * cause faults on the prefix page of running guest CPUs.
2761          */
2762         if (old->userspace_addr == mem->userspace_addr &&
2763             old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
2764             old->npages * PAGE_SIZE == mem->memory_size)
2765                 return;
2766
2767         rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
2768                 mem->guest_phys_addr, mem->memory_size);
2769         if (rc)
2770                 pr_warn("failed to commit memory region\n");
2771         return;
2772 }
2773
2774 static int __init kvm_s390_init(void)
2775 {
2776         return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
2777 }
2778
2779 static void __exit kvm_s390_exit(void)
2780 {
2781         kvm_exit();
2782 }
2783
2784 module_init(kvm_s390_init);
2785 module_exit(kvm_s390_exit);
2786
2787 /*
2788  * Enable autoloading of the kvm module.
2789  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
2790  * since x86 takes a different approach.
2791  */
2792 #include <linux/miscdevice.h>
2793 MODULE_ALIAS_MISCDEV(KVM_MINOR);
2794 MODULE_ALIAS("devname:kvm");