arch/s390/kvm/kvm-s390.c

   1 /*
   2  * hosting zSeries kernel virtual machines
   3  *
   4  * Copyright IBM Corp. 2008, 2009
   5  *
   6  * This program is free software; you can redistribute it and/or modify
   7  * it under the terms of the GNU General Public License (version 2 only)
   8  * as published by the Free Software Foundation.
   9  *
  10  *    Author(s): Carsten Otte <cotte@de.ibm.com>
  11  *               Christian Borntraeger <borntraeger@de.ibm.com>
  12  *               Heiko Carstens <heiko.carstens@de.ibm.com>
  13  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
  14  *               Jason J. Herne <jjherne@us.ibm.com>
  15  */
  16
  17 #include <linux/compiler.h>
  18 #include <linux/err.h>
  19 #include <linux/fs.h>
  20 #include <linux/hrtimer.h>
  21 #include <linux/init.h>
  22 #include <linux/kvm.h>
  23 #include <linux/kvm_host.h>
  24 #include <linux/module.h>
  25 #include <linux/random.h>
  26 #include <linux/slab.h>
  27 #include <linux/timer.h>
  28 #include <linux/vmalloc.h>
  29 #include <asm/asm-offsets.h>
  30 #include <asm/lowcore.h>
  31 #include <asm/etr.h>
  32 #include <asm/pgtable.h>
  33 #include <asm/nmi.h>
  34 #include <asm/switch_to.h>
  35 #include <asm/isc.h>
  36 #include <asm/sclp.h>
  37 #include "kvm-s390.h"
  38 #include "gaccess.h"
  39
  40 #define KMSG_COMPONENT "kvm-s390"
  41 #undef pr_fmt
  42 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  43
  44 #define CREATE_TRACE_POINTS
  45 #include "trace.h"
  46 #include "trace-s390.h"
  47
  48 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
  49 #define LOCAL_IRQS 32
  50 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
  51                            (KVM_MAX_VCPUS + LOCAL_IRQS))
  52
  53 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
  54
  55 struct kvm_stats_debugfs_item debugfs_entries[] = {
  56         { "userspace_handled", VCPU_STAT(exit_userspace) },
  57         { "exit_null", VCPU_STAT(exit_null) },
  58         { "exit_validity", VCPU_STAT(exit_validity) },
  59         { "exit_stop_request", VCPU_STAT(exit_stop_request) },
  60         { "exit_external_request", VCPU_STAT(exit_external_request) },
  61         { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
  62         { "exit_instruction", VCPU_STAT(exit_instruction) },
  63         { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
  64         { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
  65         { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
  66         { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
  67         { "halt_wakeup", VCPU_STAT(halt_wakeup) },
  68         { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
  69         { "instruction_lctl", VCPU_STAT(instruction_lctl) },
  70         { "instruction_stctl", VCPU_STAT(instruction_stctl) },
  71         { "instruction_stctg", VCPU_STAT(instruction_stctg) },
  72         { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
  73         { "deliver_external_call", VCPU_STAT(deliver_external_call) },
  74         { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
  75         { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
  76         { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
  77         { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
  78         { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
  79         { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
  80         { "exit_wait_state", VCPU_STAT(exit_wait_state) },
  81         { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
  82         { "instruction_stidp", VCPU_STAT(instruction_stidp) },
  83         { "instruction_spx", VCPU_STAT(instruction_spx) },
  84         { "instruction_stpx", VCPU_STAT(instruction_stpx) },
  85         { "instruction_stap", VCPU_STAT(instruction_stap) },
  86         { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
  87         { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
  88         { "instruction_stsch", VCPU_STAT(instruction_stsch) },
  89         { "instruction_chsc", VCPU_STAT(instruction_chsc) },
  90         { "instruction_essa", VCPU_STAT(instruction_essa) },
  91         { "instruction_stsi", VCPU_STAT(instruction_stsi) },
  92         { "instruction_stfl", VCPU_STAT(instruction_stfl) },
  93         { "instruction_tprot", VCPU_STAT(instruction_tprot) },
  94         { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
  95         { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
  96         { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
  97         { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
  98         { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
  99         { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
 100         { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
 101         { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
 102         { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
 103         { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
 104         { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
 105         { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
 106         { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
 107         { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
 108         { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
 109         { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
 110         { "diagnose_10", VCPU_STAT(diagnose_10) },
 111         { "diagnose_44", VCPU_STAT(diagnose_44) },
 112         { "diagnose_9c", VCPU_STAT(diagnose_9c) },
 113         { "diagnose_258", VCPU_STAT(diagnose_258) },
 114         { "diagnose_308", VCPU_STAT(diagnose_308) },
 115         { "diagnose_500", VCPU_STAT(diagnose_500) },
 116         { NULL }
 117 };
 118
 119 /* upper facilities limit for kvm */
 120 unsigned long kvm_s390_fac_list_mask[] = {
 121         0xffe6fffbfcfdfc40UL,
 122         0x005e800000000000UL,
 123 };
 124
 125 unsigned long kvm_s390_fac_list_mask_size(void)
 126 {
 127         BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
 128         return ARRAY_SIZE(kvm_s390_fac_list_mask);
 129 }
 130
 131 static struct gmap_notifier gmap_notifier;
 132 debug_info_t *kvm_s390_dbf;
 133
 134 /* Section: not file related */
 135 int kvm_arch_hardware_enable(void)
 136 {
 137         /* every s390 is virtualization enabled ;-) */
 138         return 0;
 139 }
 140
 141 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address);
 142
 143 /*
 144  * This callback is executed during stop_machine(). All CPUs are therefore
 145  * temporarily stopped. In order not to change guest behavior, we have to
 146  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
 147  * so a CPU won't be stopped while calculating with the epoch.
 148  */
 149 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
 150                           void *v)
 151 {
 152         struct kvm *kvm;
 153         struct kvm_vcpu *vcpu;
 154         int i;
 155         unsigned long long *delta = v;
 156
 157         list_for_each_entry(kvm, &vm_list, vm_list) {
 158                 kvm->arch.epoch -= *delta;
 159                 kvm_for_each_vcpu(i, vcpu, kvm) {
 160                         vcpu->arch.sie_block->epoch -= *delta;
 161                 }
 162         }
 163         return NOTIFY_OK;
 164 }
 165
 166 static struct notifier_block kvm_clock_notifier = {
 167         .notifier_call = kvm_clock_sync,
 168 };
 169
 170 int kvm_arch_hardware_setup(void)
 171 {
 172         gmap_notifier.notifier_call = kvm_gmap_notifier;
 173         gmap_register_ipte_notifier(&gmap_notifier);
 174         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
 175                                        &kvm_clock_notifier);
 176         return 0;
 177 }
 178
 179 void kvm_arch_hardware_unsetup(void)
 180 {
 181         gmap_unregister_ipte_notifier(&gmap_notifier);
 182         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
 183                                          &kvm_clock_notifier);
 184 }
 185
 186 int kvm_arch_init(void *opaque)
 187 {
 188         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
 189         if (!kvm_s390_dbf)
 190                 return -ENOMEM;
 191
 192         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
 193                 debug_unregister(kvm_s390_dbf);
 194                 return -ENOMEM;
 195         }
 196
 197         /* Register floating interrupt controller interface. */
 198         return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
 199 }
 200
 201 void kvm_arch_exit(void)
 202 {
 203         debug_unregister(kvm_s390_dbf);
 204 }
 205
 206 /* Section: device related */
 207 long kvm_arch_dev_ioctl(struct file *filp,
 208                         unsigned int ioctl, unsigned long arg)
 209 {
 210         if (ioctl == KVM_S390_ENABLE_SIE)
 211                 return s390_enable_sie();
 212         return -EINVAL;
 213 }
 214
 215 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 216 {
 217         int r;
 218
 219         switch (ext) {
 220         case KVM_CAP_S390_PSW:
 221         case KVM_CAP_S390_GMAP:
 222         case KVM_CAP_SYNC_MMU:
 223 #ifdef CONFIG_KVM_S390_UCONTROL
 224         case KVM_CAP_S390_UCONTROL:
 225 #endif
 226         case KVM_CAP_ASYNC_PF:
 227         case KVM_CAP_SYNC_REGS:
 228         case KVM_CAP_ONE_REG:
 229         case KVM_CAP_ENABLE_CAP:
 230         case KVM_CAP_S390_CSS_SUPPORT:
 231         case KVM_CAP_IOEVENTFD:
 232         case KVM_CAP_DEVICE_CTRL:
 233         case KVM_CAP_ENABLE_CAP_VM:
 234         case KVM_CAP_S390_IRQCHIP:
 235         case KVM_CAP_VM_ATTRIBUTES:
 236         case KVM_CAP_MP_STATE:
 237         case KVM_CAP_S390_INJECT_IRQ:
 238         case KVM_CAP_S390_USER_SIGP:
 239         case KVM_CAP_S390_USER_STSI:
 240         case KVM_CAP_S390_SKEYS:
 241         case KVM_CAP_S390_IRQ_STATE:
 242                 r = 1;
 243                 break;
 244         case KVM_CAP_S390_MEM_OP:
 245                 r = MEM_OP_MAX_SIZE;
 246                 break;
 247         case KVM_CAP_NR_VCPUS:
 248         case KVM_CAP_MAX_VCPUS:
 249                 r = KVM_MAX_VCPUS;
 250                 break;
 251         case KVM_CAP_NR_MEMSLOTS:
 252                 r = KVM_USER_MEM_SLOTS;
 253                 break;
 254         case KVM_CAP_S390_COW:
 255                 r = MACHINE_HAS_ESOP;
 256                 break;
 257         case KVM_CAP_S390_VECTOR_REGISTERS:
 258                 r = MACHINE_HAS_VX;
 259                 break;
 260         default:
 261                 r = 0;
 262         }
 263         return r;
 264 }
 265
 266 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
 267                                         struct kvm_memory_slot *memslot)
 268 {
 269         gfn_t cur_gfn, last_gfn;
 270         unsigned long address;
 271         struct gmap *gmap = kvm->arch.gmap;
 272
 273         down_read(&gmap->mm->mmap_sem);
 274         /* Loop over all guest pages */
 275         last_gfn = memslot->base_gfn + memslot->npages;
 276         for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
 277                 address = gfn_to_hva_memslot(memslot, cur_gfn);
 278
 279                 if (gmap_test_and_clear_dirty(address, gmap))
 280                         mark_page_dirty(kvm, cur_gfn);
 281         }
 282         up_read(&gmap->mm->mmap_sem);
 283 }
 284
 285 /* Section: vm related */
 286 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
 287
 288 /*
 289  * Get (and clear) the dirty memory log for a memory slot.
 290  */
 291 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 292                                struct kvm_dirty_log *log)
 293 {
 294         int r;
 295         unsigned long n;
 296         struct kvm_memslots *slots;
 297         struct kvm_memory_slot *memslot;
 298         int is_dirty = 0;
 299
 300         mutex_lock(&kvm->slots_lock);
 301
 302         r = -EINVAL;
 303         if (log->slot >= KVM_USER_MEM_SLOTS)
 304                 goto out;
 305
 306         slots = kvm_memslots(kvm);
 307         memslot = id_to_memslot(slots, log->slot);
 308         r = -ENOENT;
 309         if (!memslot->dirty_bitmap)
 310                 goto out;
 311
 312         kvm_s390_sync_dirty_log(kvm, memslot);
 313         r = kvm_get_dirty_log(kvm, log, &is_dirty);
 314         if (r)
 315                 goto out;
 316
 317         /* Clear the dirty log */
 318         if (is_dirty) {
 319                 n = kvm_dirty_bitmap_bytes(memslot);
 320                 memset(memslot->dirty_bitmap, 0, n);
 321         }
 322         r = 0;
 323 out:
 324         mutex_unlock(&kvm->slots_lock);
 325         return r;
 326 }
 327
 328 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 329 {
 330         int r;
 331
 332         if (cap->flags)
 333                 return -EINVAL;
 334
 335         switch (cap->cap) {
 336         case KVM_CAP_S390_IRQCHIP:
 337                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
 338                 kvm->arch.use_irqchip = 1;
 339                 r = 0;
 340                 break;
 341         case KVM_CAP_S390_USER_SIGP:
 342                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
 343                 kvm->arch.user_sigp = 1;
 344                 r = 0;
 345                 break;
 346         case KVM_CAP_S390_VECTOR_REGISTERS:
 347                 mutex_lock(&kvm->lock);
 348                 if (atomic_read(&kvm->online_vcpus)) {
 349                         r = -EBUSY;
 350                 } else if (MACHINE_HAS_VX) {
 351                         set_kvm_facility(kvm->arch.model.fac->mask, 129);
 352                         set_kvm_facility(kvm->arch.model.fac->list, 129);
 353                         r = 0;
 354                 } else
 355                         r = -EINVAL;
 356                 mutex_unlock(&kvm->lock);
 357                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
 358                          r ? "(not available)" : "(success)");
 359                 break;
 360         case KVM_CAP_S390_USER_STSI:
 361                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
 362                 kvm->arch.user_stsi = 1;
 363                 r = 0;
 364                 break;
 365         default:
 366                 r = -EINVAL;
 367                 break;
 368         }
 369         return r;
 370 }
 371
 372 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 373 {
 374         int ret;
 375
 376         switch (attr->attr) {
 377         case KVM_S390_VM_MEM_LIMIT_SIZE:
 378                 ret = 0;
 379                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
 380                          kvm->arch.gmap->asce_end);
 381                 if (put_user(kvm->arch.gmap->asce_end, (u64 __user *)attr->addr))
 382                         ret = -EFAULT;
 383                 break;
 384         default:
 385                 ret = -ENXIO;
 386                 break;
 387         }
 388         return ret;
 389 }
 390
 391 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 392 {
 393         int ret;
 394         unsigned int idx;
 395         switch (attr->attr) {
 396         case KVM_S390_VM_MEM_ENABLE_CMMA:
 397                 /* enable CMMA only for z10 and later (EDAT_1) */
 398                 ret = -EINVAL;
 399                 if (!MACHINE_IS_LPAR || !MACHINE_HAS_EDAT1)
 400                         break;
 401
 402                 ret = -EBUSY;
 403                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
 404                 mutex_lock(&kvm->lock);
 405                 if (atomic_read(&kvm->online_vcpus) == 0) {
 406                         kvm->arch.use_cmma = 1;
 407                         ret = 0;
 408                 }
 409                 mutex_unlock(&kvm->lock);
 410                 break;
 411         case KVM_S390_VM_MEM_CLR_CMMA:
 412                 ret = -EINVAL;
 413                 if (!kvm->arch.use_cmma)
 414                         break;
 415
 416                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
 417                 mutex_lock(&kvm->lock);
 418                 idx = srcu_read_lock(&kvm->srcu);
 419                 s390_reset_cmma(kvm->arch.gmap->mm);
 420                 srcu_read_unlock(&kvm->srcu, idx);
 421                 mutex_unlock(&kvm->lock);
 422                 ret = 0;
 423                 break;
 424         case KVM_S390_VM_MEM_LIMIT_SIZE: {
 425                 unsigned long new_limit;
 426
 427                 if (kvm_is_ucontrol(kvm))
 428                         return -EINVAL;
 429
 430                 if (get_user(new_limit, (u64 __user *)attr->addr))
 431                         return -EFAULT;
 432
 433                 if (new_limit > kvm->arch.gmap->asce_end)
 434                         return -E2BIG;
 435
 436                 ret = -EBUSY;
 437                 mutex_lock(&kvm->lock);
 438                 if (atomic_read(&kvm->online_vcpus) == 0) {
 439                         /* gmap_alloc will round the limit up */
 440                         struct gmap *new = gmap_alloc(current->mm, new_limit);
 441
 442                         if (!new) {
 443                                 ret = -ENOMEM;
 444                         } else {
 445                                 gmap_free(kvm->arch.gmap);
 446                                 new->private = kvm;
 447                                 kvm->arch.gmap = new;
 448                                 ret = 0;
 449                         }
 450                 }
 451                 mutex_unlock(&kvm->lock);
 452                 VM_EVENT(kvm, 3, "SET: max guest memory: %lu bytes", new_limit);
 453                 break;
 454         }
 455         default:
 456                 ret = -ENXIO;
 457                 break;
 458         }
 459         return ret;
 460 }
 461
 462 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
 463
 464 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
 465 {
 466         struct kvm_vcpu *vcpu;
 467         int i;
 468
 469         if (!test_kvm_facility(kvm, 76))
 470                 return -EINVAL;
 471
 472         mutex_lock(&kvm->lock);
 473         switch (attr->attr) {
 474         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
 475                 get_random_bytes(
 476                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
 477                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 478                 kvm->arch.crypto.aes_kw = 1;
 479                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
 480                 break;
 481         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
 482                 get_random_bytes(
 483                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
 484                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 485                 kvm->arch.crypto.dea_kw = 1;
 486                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
 487                 break;
 488         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
 489                 kvm->arch.crypto.aes_kw = 0;
 490                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
 491                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 492                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
 493                 break;
 494         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
 495                 kvm->arch.crypto.dea_kw = 0;
 496                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
 497                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 498                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
 499                 break;
 500         default:
 501                 mutex_unlock(&kvm->lock);
 502                 return -ENXIO;
 503         }
 504
 505         kvm_for_each_vcpu(i, vcpu, kvm) {
 506                 kvm_s390_vcpu_crypto_setup(vcpu);
 507                 exit_sie(vcpu);
 508         }
 509         mutex_unlock(&kvm->lock);
 510         return 0;
 511 }
 512
 513 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
 514 {
 515         u8 gtod_high;
 516
 517         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
 518                                            sizeof(gtod_high)))
 519                 return -EFAULT;
 520
 521         if (gtod_high != 0)
 522                 return -EINVAL;
 523         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
 524
 525         return 0;
 526 }
 527
 528 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
 529 {
 530         u64 gtod;
 531
 532         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
 533                 return -EFAULT;
 534
 535         kvm_s390_set_tod_clock(kvm, gtod);
 536         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
 537         return 0;
 538 }
 539
 540 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
 541 {
 542         int ret;
 543
 544         if (attr->flags)
 545                 return -EINVAL;
 546
 547         switch (attr->attr) {
 548         case KVM_S390_VM_TOD_HIGH:
 549                 ret = kvm_s390_set_tod_high(kvm, attr);
 550                 break;
 551         case KVM_S390_VM_TOD_LOW:
 552                 ret = kvm_s390_set_tod_low(kvm, attr);
 553                 break;
 554         default:
 555                 ret = -ENXIO;
 556                 break;
 557         }
 558         return ret;
 559 }
 560
 561 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
 562 {
 563         u8 gtod_high = 0;
 564
 565         if (copy_to_user((void __user *)attr->addr, &gtod_high,
 566                                          sizeof(gtod_high)))
 567                 return -EFAULT;
 568         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
 569
 570         return 0;
 571 }
 572
 573 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
 574 {
 575         u64 gtod;
 576
 577         gtod = kvm_s390_get_tod_clock_fast(kvm);
 578         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
 579                 return -EFAULT;
 580         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
 581
 582         return 0;
 583 }
 584
 585 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
 586 {
 587         int ret;
 588
 589         if (attr->flags)
 590                 return -EINVAL;
 591
 592         switch (attr->attr) {
 593         case KVM_S390_VM_TOD_HIGH:
 594                 ret = kvm_s390_get_tod_high(kvm, attr);
 595                 break;
 596         case KVM_S390_VM_TOD_LOW:
 597                 ret = kvm_s390_get_tod_low(kvm, attr);
 598                 break;
 599         default:
 600                 ret = -ENXIO;
 601                 break;
 602         }
 603         return ret;
 604 }
 605
 606 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
 607 {
 608         struct kvm_s390_vm_cpu_processor *proc;
 609         int ret = 0;
 610
 611         mutex_lock(&kvm->lock);
 612         if (atomic_read(&kvm->online_vcpus)) {
 613                 ret = -EBUSY;
 614                 goto out;
 615         }
 616         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
 617         if (!proc) {
 618                 ret = -ENOMEM;
 619                 goto out;
 620         }
 621         if (!copy_from_user(proc, (void __user *)attr->addr,
 622                             sizeof(*proc))) {
 623                 memcpy(&kvm->arch.model.cpu_id, &proc->cpuid,
 624                        sizeof(struct cpuid));
 625                 kvm->arch.model.ibc = proc->ibc;
 626                 memcpy(kvm->arch.model.fac->list, proc->fac_list,
 627                        S390_ARCH_FAC_LIST_SIZE_BYTE);
 628         } else
 629                 ret = -EFAULT;
 630         kfree(proc);
 631 out:
 632         mutex_unlock(&kvm->lock);
 633         return ret;
 634 }
 635
 636 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
 637 {
 638         int ret = -ENXIO;
 639
 640         switch (attr->attr) {
 641         case KVM_S390_VM_CPU_PROCESSOR:
 642                 ret = kvm_s390_set_processor(kvm, attr);
 643                 break;
 644         }
 645         return ret;
 646 }
 647
 648 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
 649 {
 650         struct kvm_s390_vm_cpu_processor *proc;
 651         int ret = 0;
 652
 653         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
 654         if (!proc) {
 655                 ret = -ENOMEM;
 656                 goto out;
 657         }
 658         memcpy(&proc->cpuid, &kvm->arch.model.cpu_id, sizeof(struct cpuid));
 659         proc->ibc = kvm->arch.model.ibc;
 660         memcpy(&proc->fac_list, kvm->arch.model.fac->list, S390_ARCH_FAC_LIST_SIZE_BYTE);
 661         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
 662                 ret = -EFAULT;
 663         kfree(proc);
 664 out:
 665         return ret;
 666 }
 667
 668 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
 669 {
 670         struct kvm_s390_vm_cpu_machine *mach;
 671         int ret = 0;
 672
 673         mach = kzalloc(sizeof(*mach), GFP_KERNEL);
 674         if (!mach) {
 675                 ret = -ENOMEM;
 676                 goto out;
 677         }
 678         get_cpu_id((struct cpuid *) &mach->cpuid);
 679         mach->ibc = sclp.ibc;
 680         memcpy(&mach->fac_mask, kvm->arch.model.fac->mask,
 681                S390_ARCH_FAC_LIST_SIZE_BYTE);
 682         memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
 683                S390_ARCH_FAC_LIST_SIZE_BYTE);
 684         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
 685                 ret = -EFAULT;
 686         kfree(mach);
 687 out:
 688         return ret;
 689 }
 690
 691 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
 692 {
 693         int ret = -ENXIO;
 694
 695         switch (attr->attr) {
 696         case KVM_S390_VM_CPU_PROCESSOR:
 697                 ret = kvm_s390_get_processor(kvm, attr);
 698                 break;
 699         case KVM_S390_VM_CPU_MACHINE:
 700                 ret = kvm_s390_get_machine(kvm, attr);
 701                 break;
 702         }
 703         return ret;
 704 }
 705
 706 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
 707 {
 708         int ret;
 709
 710         switch (attr->group) {
 711         case KVM_S390_VM_MEM_CTRL:
 712                 ret = kvm_s390_set_mem_control(kvm, attr);
 713                 break;
 714         case KVM_S390_VM_TOD:
 715                 ret = kvm_s390_set_tod(kvm, attr);
 716                 break;
 717         case KVM_S390_VM_CPU_MODEL:
 718                 ret = kvm_s390_set_cpu_model(kvm, attr);
 719                 break;
 720         case KVM_S390_VM_CRYPTO:
 721                 ret = kvm_s390_vm_set_crypto(kvm, attr);
 722                 break;
 723         default:
 724                 ret = -ENXIO;
 725                 break;
 726         }
 727
 728         return ret;
 729 }
 730
 731 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
 732 {
 733         int ret;
 734
 735         switch (attr->group) {
 736         case KVM_S390_VM_MEM_CTRL:
 737                 ret = kvm_s390_get_mem_control(kvm, attr);
 738                 break;
 739         case KVM_S390_VM_TOD:
 740                 ret = kvm_s390_get_tod(kvm, attr);
 741                 break;
 742         case KVM_S390_VM_CPU_MODEL:
 743                 ret = kvm_s390_get_cpu_model(kvm, attr);
 744                 break;
 745         default:
 746                 ret = -ENXIO;
 747                 break;
 748         }
 749
 750         return ret;
 751 }
 752
 753 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
 754 {
 755         int ret;
 756
 757         switch (attr->group) {
 758         case KVM_S390_VM_MEM_CTRL:
 759                 switch (attr->attr) {
 760                 case KVM_S390_VM_MEM_ENABLE_CMMA:
 761                 case KVM_S390_VM_MEM_CLR_CMMA:
 762                 case KVM_S390_VM_MEM_LIMIT_SIZE:
 763                         ret = 0;
 764                         break;
 765                 default:
 766                         ret = -ENXIO;
 767                         break;
 768                 }
 769                 break;
 770         case KVM_S390_VM_TOD:
 771                 switch (attr->attr) {
 772                 case KVM_S390_VM_TOD_LOW:
 773                 case KVM_S390_VM_TOD_HIGH:
 774                         ret = 0;
 775                         break;
 776                 default:
 777                         ret = -ENXIO;
 778                         break;
 779                 }
 780                 break;
 781         case KVM_S390_VM_CPU_MODEL:
 782                 switch (attr->attr) {
 783                 case KVM_S390_VM_CPU_PROCESSOR:
 784                 case KVM_S390_VM_CPU_MACHINE:
 785                         ret = 0;
 786                         break;
 787                 default:
 788                         ret = -ENXIO;
 789                         break;
 790                 }
 791                 break;
 792         case KVM_S390_VM_CRYPTO:
 793                 switch (attr->attr) {
 794                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
 795                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
 796                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
 797                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
 798                         ret = 0;
 799                         break;
 800                 default:
 801                         ret = -ENXIO;
 802                         break;
 803                 }
 804                 break;
 805         default:
 806                 ret = -ENXIO;
 807                 break;
 808         }
 809
 810         return ret;
 811 }
 812
 813 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
 814 {
 815         uint8_t *keys;
 816         uint64_t hva;
 817         unsigned long curkey;
 818         int i, r = 0;
 819
 820         if (args->flags != 0)
 821                 return -EINVAL;
 822
 823         /* Is this guest using storage keys? */
 824         if (!mm_use_skey(current->mm))
 825                 return KVM_S390_GET_SKEYS_NONE;
 826
 827         /* Enforce sane limit on memory allocation */
 828         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
 829                 return -EINVAL;
 830
 831         keys = kmalloc_array(args->count, sizeof(uint8_t),
 832                              GFP_KERNEL | __GFP_NOWARN);
 833         if (!keys)
 834                 keys = vmalloc(sizeof(uint8_t) * args->count);
 835         if (!keys)
 836                 return -ENOMEM;
 837
 838         for (i = 0; i < args->count; i++) {
 839                 hva = gfn_to_hva(kvm, args->start_gfn + i);
 840                 if (kvm_is_error_hva(hva)) {
 841                         r = -EFAULT;
 842                         goto out;
 843                 }
 844
 845                 curkey = get_guest_storage_key(current->mm, hva);
 846                 if (IS_ERR_VALUE(curkey)) {
 847                         r = curkey;
 848                         goto out;
 849                 }
 850                 keys[i] = curkey;
 851         }
 852
 853         r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
 854                          sizeof(uint8_t) * args->count);
 855         if (r)
 856                 r = -EFAULT;
 857 out:
 858         kvfree(keys);
 859         return r;
 860 }
 861
 862 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
 863 {
 864         uint8_t *keys;
 865         uint64_t hva;
 866         int i, r = 0;
 867
 868         if (args->flags != 0)
 869                 return -EINVAL;
 870
 871         /* Enforce sane limit on memory allocation */
 872         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
 873                 return -EINVAL;
 874
 875         keys = kmalloc_array(args->count, sizeof(uint8_t),
 876                              GFP_KERNEL | __GFP_NOWARN);
 877         if (!keys)
 878                 keys = vmalloc(sizeof(uint8_t) * args->count);
 879         if (!keys)
 880                 return -ENOMEM;
 881
 882         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
 883                            sizeof(uint8_t) * args->count);
 884         if (r) {
 885                 r = -EFAULT;
 886                 goto out;
 887         }
 888
 889         /* Enable storage key handling for the guest */
 890         r = s390_enable_skey();
 891         if (r)
 892                 goto out;
 893
 894         for (i = 0; i < args->count; i++) {
 895                 hva = gfn_to_hva(kvm, args->start_gfn + i);
 896                 if (kvm_is_error_hva(hva)) {
 897                         r = -EFAULT;
 898                         goto out;
 899                 }
 900
 901                 /* Lowest order bit is reserved */
 902                 if (keys[i] & 0x01) {
 903                         r = -EINVAL;
 904                         goto out;
 905                 }
 906
 907                 r = set_guest_storage_key(current->mm, hva,
 908                                           (unsigned long)keys[i], 0);
 909                 if (r)
 910                         goto out;
 911         }
 912 out:
 913         kvfree(keys);
 914         return r;
 915 }
 916
 917 long kvm_arch_vm_ioctl(struct file *filp,
 918                        unsigned int ioctl, unsigned long arg)
 919 {
 920         struct kvm *kvm = filp->private_data;
 921         void __user *argp = (void __user *)arg;
 922         struct kvm_device_attr attr;
 923         int r;
 924
 925         switch (ioctl) {
 926         case KVM_S390_INTERRUPT: {
 927                 struct kvm_s390_interrupt s390int;
 928
 929                 r = -EFAULT;
 930                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
 931                         break;
 932                 r = kvm_s390_inject_vm(kvm, &s390int);
 933                 break;
 934         }
 935         case KVM_ENABLE_CAP: {
 936                 struct kvm_enable_cap cap;
 937                 r = -EFAULT;
 938                 if (copy_from_user(&cap, argp, sizeof(cap)))
 939                         break;
 940                 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
 941                 break;
 942         }
 943         case KVM_CREATE_IRQCHIP: {
 944                 struct kvm_irq_routing_entry routing;
 945
 946                 r = -EINVAL;
 947                 if (kvm->arch.use_irqchip) {
 948                         /* Set up dummy routing. */
 949                         memset(&routing, 0, sizeof(routing));
 950                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
 951                 }
 952                 break;
 953         }
 954         case KVM_SET_DEVICE_ATTR: {
 955                 r = -EFAULT;
 956                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
 957                         break;
 958                 r = kvm_s390_vm_set_attr(kvm, &attr);
 959                 break;
 960         }
 961         case KVM_GET_DEVICE_ATTR: {
 962                 r = -EFAULT;
 963                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
 964                         break;
 965                 r = kvm_s390_vm_get_attr(kvm, &attr);
 966                 break;
 967         }
 968         case KVM_HAS_DEVICE_ATTR: {
 969                 r = -EFAULT;
 970                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
 971                         break;
 972                 r = kvm_s390_vm_has_attr(kvm, &attr);
 973                 break;
 974         }
 975         case KVM_S390_GET_SKEYS: {
 976                 struct kvm_s390_skeys args;
 977
 978                 r = -EFAULT;
 979                 if (copy_from_user(&args, argp,
 980                                    sizeof(struct kvm_s390_skeys)))
 981                         break;
 982                 r = kvm_s390_get_skeys(kvm, &args);
 983                 break;
 984         }
 985         case KVM_S390_SET_SKEYS: {
 986                 struct kvm_s390_skeys args;
 987
 988                 r = -EFAULT;
 989                 if (copy_from_user(&args, argp,
 990                                    sizeof(struct kvm_s390_skeys)))
 991                         break;
 992                 r = kvm_s390_set_skeys(kvm, &args);
 993                 break;
 994         }
 995         default:
 996                 r = -ENOTTY;
 997         }
 998
 999         return r;
1000 }
1001
1002 static int kvm_s390_query_ap_config(u8 *config)
1003 {
1004         u32 fcn_code = 0x04000000UL;
1005         u32 cc = 0;
1006
1007         memset(config, 0, 128);
1008         asm volatile(
1009                 "lgr 0,%1\n"
1010                 "lgr 2,%2\n"
1011                 ".long 0xb2af0000\n"            /* PQAP(QCI) */
1012                 "0: ipm %0\n"
1013                 "srl %0,28\n"
1014                 "1:\n"
1015                 EX_TABLE(0b, 1b)
1016                 : "+r" (cc)
1017                 : "r" (fcn_code), "r" (config)
1018                 : "cc", "0", "2", "memory"
1019         );
1020
1021         return cc;
1022 }
1023
1024 static int kvm_s390_apxa_installed(void)
1025 {
1026         u8 config[128];
1027         int cc;
1028
1029         if (test_facility(2) && test_facility(12)) {
1030                 cc = kvm_s390_query_ap_config(config);
1031
1032                 if (cc)
1033                         pr_err("PQAP(QCI) failed with cc=%d", cc);
1034                 else
1035                         return config[0] & 0x40;
1036         }
1037
1038         return 0;
1039 }
1040
1041 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1042 {
1043         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1044
1045         if (kvm_s390_apxa_installed())
1046                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1047         else
1048                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1049 }
1050
1051 static void kvm_s390_get_cpu_id(struct cpuid *cpu_id)
1052 {
1053         get_cpu_id(cpu_id);
1054         cpu_id->version = 0xff;
1055 }
1056
1057 static int kvm_s390_crypto_init(struct kvm *kvm)
1058 {
1059         if (!test_kvm_facility(kvm, 76))
1060                 return 0;
1061
1062         kvm->arch.crypto.crycb = kzalloc(sizeof(*kvm->arch.crypto.crycb),
1063                                          GFP_KERNEL | GFP_DMA);
1064         if (!kvm->arch.crypto.crycb)
1065                 return -ENOMEM;
1066
1067         kvm_s390_set_crycb_format(kvm);
1068
1069         /* Enable AES/DEA protected key functions by default */
1070         kvm->arch.crypto.aes_kw = 1;
1071         kvm->arch.crypto.dea_kw = 1;
1072         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1073                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1074         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1075                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1076
1077         return 0;
1078 }
1079
1080 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1081 {
1082         int i, rc;
1083         char debug_name[16];
1084         static unsigned long sca_offset;
1085
1086         rc = -EINVAL;
1087 #ifdef CONFIG_KVM_S390_UCONTROL
1088         if (type & ~KVM_VM_S390_UCONTROL)
1089                 goto out_err;
1090         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1091                 goto out_err;
1092 #else
1093         if (type)
1094                 goto out_err;
1095 #endif
1096
1097         rc = s390_enable_sie();
1098         if (rc)
1099                 goto out_err;
1100
1101         rc = -ENOMEM;
1102
1103         kvm->arch.sca = (struct bsca_block *) get_zeroed_page(GFP_KERNEL);
1104         if (!kvm->arch.sca)
1105                 goto out_err;
1106         spin_lock(&kvm_lock);
1107         sca_offset += 16;
1108         if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1109                 sca_offset = 0;
1110         kvm->arch.sca = (struct bsca_block *)
1111                         ((char *) kvm->arch.sca + sca_offset);
1112         spin_unlock(&kvm_lock);
1113
1114         sprintf(debug_name, "kvm-%u", current->pid);
1115
1116         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1117         if (!kvm->arch.dbf)
1118                 goto out_err;
1119
1120         /*
1121          * The architectural maximum amount of facilities is 16 kbit. To store
1122          * this amount, 2 kbyte of memory is required. Thus we need a full
1123          * page to hold the guest facility list (arch.model.fac->list) and the
1124          * facility mask (arch.model.fac->mask). Its address size has to be
1125          * 31 bits and word aligned.
1126          */
1127         kvm->arch.model.fac =
1128                 (struct kvm_s390_fac *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1129         if (!kvm->arch.model.fac)
1130                 goto out_err;
1131
1132         /* Populate the facility mask initially. */
1133         memcpy(kvm->arch.model.fac->mask, S390_lowcore.stfle_fac_list,
1134                S390_ARCH_FAC_LIST_SIZE_BYTE);
1135         for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1136                 if (i < kvm_s390_fac_list_mask_size())
1137                         kvm->arch.model.fac->mask[i] &= kvm_s390_fac_list_mask[i];
1138                 else
1139                         kvm->arch.model.fac->mask[i] = 0UL;
1140         }
1141
1142         /* Populate the facility list initially. */
1143         memcpy(kvm->arch.model.fac->list, kvm->arch.model.fac->mask,
1144                S390_ARCH_FAC_LIST_SIZE_BYTE);
1145
1146         kvm_s390_get_cpu_id(&kvm->arch.model.cpu_id);
1147         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1148
1149         if (kvm_s390_crypto_init(kvm) < 0)
1150                 goto out_err;
1151
1152         spin_lock_init(&kvm->arch.float_int.lock);
1153         for (i = 0; i < FIRQ_LIST_COUNT; i++)
1154                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1155         init_waitqueue_head(&kvm->arch.ipte_wq);
1156         mutex_init(&kvm->arch.ipte_mutex);
1157
1158         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1159         VM_EVENT(kvm, 3, "vm created with type %lu", type);
1160
1161         if (type & KVM_VM_S390_UCONTROL) {
1162                 kvm->arch.gmap = NULL;
1163         } else {
1164                 kvm->arch.gmap = gmap_alloc(current->mm, (1UL << 44) - 1);
1165                 if (!kvm->arch.gmap)
1166                         goto out_err;
1167                 kvm->arch.gmap->private = kvm;
1168                 kvm->arch.gmap->pfault_enabled = 0;
1169         }
1170
1171         kvm->arch.css_support = 0;
1172         kvm->arch.use_irqchip = 0;
1173         kvm->arch.epoch = 0;
1174
1175         spin_lock_init(&kvm->arch.start_stop_lock);
1176         KVM_EVENT(3, "vm 0x%p created by pid %u", kvm, current->pid);
1177
1178         return 0;
1179 out_err:
1180         kfree(kvm->arch.crypto.crycb);
1181         free_page((unsigned long)kvm->arch.model.fac);
1182         debug_unregister(kvm->arch.dbf);
1183         free_page((unsigned long)(kvm->arch.sca));
1184         KVM_EVENT(3, "creation of vm failed: %d", rc);
1185         return rc;
1186 }
1187
1188 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1189 {
1190         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1191         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1192         kvm_s390_clear_local_irqs(vcpu);
1193         kvm_clear_async_pf_completion_queue(vcpu);
1194         if (!kvm_is_ucontrol(vcpu->kvm))
1195                 sca_del_vcpu(vcpu);
1196         smp_mb();
1197
1198         if (kvm_is_ucontrol(vcpu->kvm))
1199                 gmap_free(vcpu->arch.gmap);
1200
1201         if (vcpu->kvm->arch.use_cmma)
1202                 kvm_s390_vcpu_unsetup_cmma(vcpu);
1203         free_page((unsigned long)(vcpu->arch.sie_block));
1204
1205         kvm_vcpu_uninit(vcpu);
1206         kmem_cache_free(kvm_vcpu_cache, vcpu);
1207 }
1208
1209 static void kvm_free_vcpus(struct kvm *kvm)
1210 {
1211         unsigned int i;
1212         struct kvm_vcpu *vcpu;
1213
1214         kvm_for_each_vcpu(i, vcpu, kvm)
1215                 kvm_arch_vcpu_destroy(vcpu);
1216
1217         mutex_lock(&kvm->lock);
1218         for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1219                 kvm->vcpus[i] = NULL;
1220
1221         atomic_set(&kvm->online_vcpus, 0);
1222         mutex_unlock(&kvm->lock);
1223 }
1224
1225 void kvm_arch_destroy_vm(struct kvm *kvm)
1226 {
1227         kvm_free_vcpus(kvm);
1228         free_page((unsigned long)kvm->arch.model.fac);
1229         free_page((unsigned long)(kvm->arch.sca));
1230         debug_unregister(kvm->arch.dbf);
1231         kfree(kvm->arch.crypto.crycb);
1232         if (!kvm_is_ucontrol(kvm))
1233                 gmap_free(kvm->arch.gmap);
1234         kvm_s390_destroy_adapters(kvm);
1235         kvm_s390_clear_float_irqs(kvm);
1236         KVM_EVENT(3, "vm 0x%p destroyed", kvm);
1237 }
1238
1239 /* Section: vcpu related */
1240 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1241 {
1242         vcpu->arch.gmap = gmap_alloc(current->mm, -1UL);
1243         if (!vcpu->arch.gmap)
1244                 return -ENOMEM;
1245         vcpu->arch.gmap->private = vcpu->kvm;
1246
1247         return 0;
1248 }
1249
1250 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
1251 {
1252         struct bsca_block *sca = vcpu->kvm->arch.sca;
1253
1254         clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1255         if (sca->cpu[vcpu->vcpu_id].sda == (__u64) vcpu->arch.sie_block)
1256                 sca->cpu[vcpu->vcpu_id].sda = 0;
1257 }
1258
1259 static void sca_add_vcpu(struct kvm_vcpu *vcpu, struct kvm *kvm,
1260                         unsigned int id)
1261 {
1262         struct bsca_block *sca = kvm->arch.sca;
1263
1264         if (!sca->cpu[id].sda)
1265                 sca->cpu[id].sda = (__u64) vcpu->arch.sie_block;
1266         vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1267         vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
1268         set_bit_inv(id, (unsigned long *) &sca->mcn);
1269 }
1270
1271 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
1272 {
1273         return id < KVM_MAX_VCPUS;
1274 }
1275
1276 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1277 {
1278         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1279         kvm_clear_async_pf_completion_queue(vcpu);
1280         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
1281                                     KVM_SYNC_GPRS |
1282                                     KVM_SYNC_ACRS |
1283                                     KVM_SYNC_CRS |
1284                                     KVM_SYNC_ARCH0 |
1285                                     KVM_SYNC_PFAULT;
1286         if (test_kvm_facility(vcpu->kvm, 129))
1287                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
1288
1289         if (kvm_is_ucontrol(vcpu->kvm))
1290                 return __kvm_ucontrol_vcpu_init(vcpu);
1291
1292         return 0;
1293 }
1294
1295 /*
1296  * Backs up the current FP/VX register save area on a particular
1297  * destination.  Used to switch between different register save
1298  * areas.
1299  */
1300 static inline void save_fpu_to(struct fpu *dst)
1301 {
1302         dst->fpc = current->thread.fpu.fpc;
1303         dst->regs = current->thread.fpu.regs;
1304 }
1305
1306 /*
1307  * Switches the FP/VX register save area from which to lazy
1308  * restore register contents.
1309  */
1310 static inline void load_fpu_from(struct fpu *from)
1311 {
1312         current->thread.fpu.fpc = from->fpc;
1313         current->thread.fpu.regs = from->regs;
1314 }
1315
1316 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1317 {
1318         /* Save host register state */
1319         save_fpu_regs();
1320         save_fpu_to(&vcpu->arch.host_fpregs);
1321
1322         if (test_kvm_facility(vcpu->kvm, 129)) {
1323                 current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
1324                 /*
1325                  * Use the register save area in the SIE-control block
1326                  * for register restore and save in kvm_arch_vcpu_put()
1327                  */
1328                 current->thread.fpu.vxrs =
1329                         (__vector128 *)&vcpu->run->s.regs.vrs;
1330         } else
1331                 load_fpu_from(&vcpu->arch.guest_fpregs);
1332
1333         if (test_fp_ctl(current->thread.fpu.fpc))
1334                 /* User space provided an invalid FPC, let's clear it */
1335                 current->thread.fpu.fpc = 0;
1336
1337         save_access_regs(vcpu->arch.host_acrs);
1338         restore_access_regs(vcpu->run->s.regs.acrs);
1339         gmap_enable(vcpu->arch.gmap);
1340         atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1341 }
1342
1343 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1344 {
1345         atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1346         gmap_disable(vcpu->arch.gmap);
1347
1348         save_fpu_regs();
1349
1350         if (test_kvm_facility(vcpu->kvm, 129))
1351                 /*
1352                  * kvm_arch_vcpu_load() set up the register save area to
1353                  * the &vcpu->run->s.regs.vrs and, thus, the vector registers
1354                  * are already saved.  Only the floating-point control must be
1355                  * copied.
1356                  */
1357                 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
1358         else
1359                 save_fpu_to(&vcpu->arch.guest_fpregs);
1360         load_fpu_from(&vcpu->arch.host_fpregs);
1361
1362         save_access_regs(vcpu->run->s.regs.acrs);
1363         restore_access_regs(vcpu->arch.host_acrs);
1364 }
1365
1366 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
1367 {
1368         /* this equals initial cpu reset in pop, but we don't switch to ESA */
1369         vcpu->arch.sie_block->gpsw.mask = 0UL;
1370         vcpu->arch.sie_block->gpsw.addr = 0UL;
1371         kvm_s390_set_prefix(vcpu, 0);
1372         vcpu->arch.sie_block->cputm     = 0UL;
1373         vcpu->arch.sie_block->ckc       = 0UL;
1374         vcpu->arch.sie_block->todpr     = 0;
1375         memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
1376         vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
1377         vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
1378         vcpu->arch.guest_fpregs.fpc = 0;
1379         asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc));
1380         vcpu->arch.sie_block->gbea = 1;
1381         vcpu->arch.sie_block->pp = 0;
1382         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1383         kvm_clear_async_pf_completion_queue(vcpu);
1384         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
1385                 kvm_s390_vcpu_stop(vcpu);
1386         kvm_s390_clear_local_irqs(vcpu);
1387 }
1388
1389 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
1390 {
1391         mutex_lock(&vcpu->kvm->lock);
1392         preempt_disable();
1393         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
1394         preempt_enable();
1395         mutex_unlock(&vcpu->kvm->lock);
1396         if (!kvm_is_ucontrol(vcpu->kvm))
1397                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
1398 }
1399
1400 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
1401 {
1402         if (!test_kvm_facility(vcpu->kvm, 76))
1403                 return;
1404
1405         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
1406
1407         if (vcpu->kvm->arch.crypto.aes_kw)
1408                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
1409         if (vcpu->kvm->arch.crypto.dea_kw)
1410                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
1411
1412         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
1413 }
1414
1415 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
1416 {
1417         free_page(vcpu->arch.sie_block->cbrlo);
1418         vcpu->arch.sie_block->cbrlo = 0;
1419 }
1420
1421 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
1422 {
1423         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
1424         if (!vcpu->arch.sie_block->cbrlo)
1425                 return -ENOMEM;
1426
1427         vcpu->arch.sie_block->ecb2 |= 0x80;
1428         vcpu->arch.sie_block->ecb2 &= ~0x08;
1429         return 0;
1430 }
1431
1432 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
1433 {
1434         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
1435
1436         vcpu->arch.cpu_id = model->cpu_id;
1437         vcpu->arch.sie_block->ibc = model->ibc;
1438         vcpu->arch.sie_block->fac = (int) (long) model->fac->list;
1439 }
1440
1441 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1442 {
1443         int rc = 0;
1444
1445         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
1446                                                     CPUSTAT_SM |
1447                                                     CPUSTAT_STOPPED);
1448
1449         if (test_kvm_facility(vcpu->kvm, 78))
1450                 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
1451         else if (test_kvm_facility(vcpu->kvm, 8))
1452                 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
1453
1454         kvm_s390_vcpu_setup_model(vcpu);
1455
1456         vcpu->arch.sie_block->ecb   = 6;
1457         if (test_kvm_facility(vcpu->kvm, 50) && test_kvm_facility(vcpu->kvm, 73))
1458                 vcpu->arch.sie_block->ecb |= 0x10;
1459
1460         vcpu->arch.sie_block->ecb2  = 8;
1461         vcpu->arch.sie_block->eca   = 0xC1002000U;
1462         if (sclp.has_siif)
1463                 vcpu->arch.sie_block->eca |= 1;
1464         if (sclp.has_sigpif)
1465                 vcpu->arch.sie_block->eca |= 0x10000000U;
1466         if (test_kvm_facility(vcpu->kvm, 129)) {
1467                 vcpu->arch.sie_block->eca |= 0x00020000;
1468                 vcpu->arch.sie_block->ecd |= 0x20000000;
1469         }
1470         vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
1471
1472         if (vcpu->kvm->arch.use_cmma) {
1473                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
1474                 if (rc)
1475                         return rc;
1476         }
1477         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1478         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
1479
1480         kvm_s390_vcpu_crypto_setup(vcpu);
1481
1482         return rc;
1483 }
1484
1485 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
1486                                       unsigned int id)
1487 {
1488         struct kvm_vcpu *vcpu;
1489         struct sie_page *sie_page;
1490         int rc = -EINVAL;
1491
1492         if (!sca_can_add_vcpu(kvm, id))
1493                 goto out;
1494
1495         rc = -ENOMEM;
1496
1497         vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
1498         if (!vcpu)
1499                 goto out;
1500
1501         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
1502         if (!sie_page)
1503                 goto out_free_cpu;
1504
1505         vcpu->arch.sie_block = &sie_page->sie_block;
1506         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
1507
1508         vcpu->arch.sie_block->icpua = id;
1509         if (!kvm_is_ucontrol(kvm)) {
1510                 if (!kvm->arch.sca) {
1511                         WARN_ON_ONCE(1);
1512                         goto out_free_cpu;
1513                 }
1514                 sca_add_vcpu(vcpu, kvm, id);
1515         }
1516
1517         spin_lock_init(&vcpu->arch.local_int.lock);
1518         vcpu->arch.local_int.float_int = &kvm->arch.float_int;
1519         vcpu->arch.local_int.wq = &vcpu->wq;
1520         vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
1521
1522         /*
1523          * Allocate a save area for floating-point registers.  If the vector
1524          * extension is available, register contents are saved in the SIE
1525          * control block.  The allocated save area is still required in
1526          * particular places, for example, in kvm_s390_vcpu_store_status().
1527          */
1528         vcpu->arch.guest_fpregs.fprs = kzalloc(sizeof(freg_t) * __NUM_FPRS,
1529                                                GFP_KERNEL);
1530         if (!vcpu->arch.guest_fpregs.fprs) {
1531                 rc = -ENOMEM;
1532                 goto out_free_sie_block;
1533         }
1534
1535         rc = kvm_vcpu_init(vcpu, kvm, id);
1536         if (rc)
1537                 goto out_free_sie_block;
1538         VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu,
1539                  vcpu->arch.sie_block);
1540         trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
1541
1542         return vcpu;
1543 out_free_sie_block:
1544         free_page((unsigned long)(vcpu->arch.sie_block));
1545 out_free_cpu:
1546         kmem_cache_free(kvm_vcpu_cache, vcpu);
1547 out:
1548         return ERR_PTR(rc);
1549 }
1550
1551 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
1552 {
1553         return kvm_s390_vcpu_has_irq(vcpu, 0);
1554 }
1555
1556 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
1557 {
1558         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1559         exit_sie(vcpu);
1560 }
1561
1562 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
1563 {
1564         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1565 }
1566
1567 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
1568 {
1569         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
1570         exit_sie(vcpu);
1571 }
1572
1573 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
1574 {
1575         atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
1576 }
1577
1578 /*
1579  * Kick a guest cpu out of SIE and wait until SIE is not running.
1580  * If the CPU is not running (e.g. waiting as idle) the function will
1581  * return immediately. */
1582 void exit_sie(struct kvm_vcpu *vcpu)
1583 {
1584         atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
1585         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
1586                 cpu_relax();
1587 }
1588
1589 /* Kick a guest cpu out of SIE to process a request synchronously */
1590 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
1591 {
1592         kvm_make_request(req, vcpu);
1593         kvm_s390_vcpu_request(vcpu);
1594 }
1595
1596 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address)
1597 {
1598         int i;
1599         struct kvm *kvm = gmap->private;
1600         struct kvm_vcpu *vcpu;
1601
1602         kvm_for_each_vcpu(i, vcpu, kvm) {
1603                 /* match against both prefix pages */
1604                 if (kvm_s390_get_prefix(vcpu) == (address & ~0x1000UL)) {
1605                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx", address);
1606                         kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
1607                 }
1608         }
1609 }
1610
1611 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
1612 {
1613         /* kvm common code refers to this, but never calls it */
1614         BUG();
1615         return 0;
1616 }
1617
1618 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
1619                                            struct kvm_one_reg *reg)
1620 {
1621         int r = -EINVAL;
1622
1623         switch (reg->id) {
1624         case KVM_REG_S390_TODPR:
1625                 r = put_user(vcpu->arch.sie_block->todpr,
1626                              (u32 __user *)reg->addr);
1627                 break;
1628         case KVM_REG_S390_EPOCHDIFF:
1629                 r = put_user(vcpu->arch.sie_block->epoch,
1630                              (u64 __user *)reg->addr);
1631                 break;
1632         case KVM_REG_S390_CPU_TIMER:
1633                 r = put_user(vcpu->arch.sie_block->cputm,
1634                              (u64 __user *)reg->addr);
1635                 break;
1636         case KVM_REG_S390_CLOCK_COMP:
1637                 r = put_user(vcpu->arch.sie_block->ckc,
1638                              (u64 __user *)reg->addr);
1639                 break;
1640         case KVM_REG_S390_PFTOKEN:
1641                 r = put_user(vcpu->arch.pfault_token,
1642                              (u64 __user *)reg->addr);
1643                 break;
1644         case KVM_REG_S390_PFCOMPARE:
1645                 r = put_user(vcpu->arch.pfault_compare,
1646                              (u64 __user *)reg->addr);
1647                 break;
1648         case KVM_REG_S390_PFSELECT:
1649                 r = put_user(vcpu->arch.pfault_select,
1650                              (u64 __user *)reg->addr);
1651                 break;
1652         case KVM_REG_S390_PP:
1653                 r = put_user(vcpu->arch.sie_block->pp,
1654                              (u64 __user *)reg->addr);
1655                 break;
1656         case KVM_REG_S390_GBEA:
1657                 r = put_user(vcpu->arch.sie_block->gbea,
1658                              (u64 __user *)reg->addr);
1659                 break;
1660         default:
1661                 break;
1662         }
1663
1664         return r;
1665 }
1666
1667 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
1668                                            struct kvm_one_reg *reg)
1669 {
1670         int r = -EINVAL;
1671
1672         switch (reg->id) {
1673         case KVM_REG_S390_TODPR:
1674                 r = get_user(vcpu->arch.sie_block->todpr,
1675                              (u32 __user *)reg->addr);
1676                 break;
1677         case KVM_REG_S390_EPOCHDIFF:
1678                 r = get_user(vcpu->arch.sie_block->epoch,
1679                              (u64 __user *)reg->addr);
1680                 break;
1681         case KVM_REG_S390_CPU_TIMER:
1682                 r = get_user(vcpu->arch.sie_block->cputm,
1683                              (u64 __user *)reg->addr);
1684                 break;
1685         case KVM_REG_S390_CLOCK_COMP:
1686                 r = get_user(vcpu->arch.sie_block->ckc,
1687                              (u64 __user *)reg->addr);
1688                 break;
1689         case KVM_REG_S390_PFTOKEN:
1690                 r = get_user(vcpu->arch.pfault_token,
1691                              (u64 __user *)reg->addr);
1692                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
1693                         kvm_clear_async_pf_completion_queue(vcpu);
1694                 break;
1695         case KVM_REG_S390_PFCOMPARE:
1696                 r = get_user(vcpu->arch.pfault_compare,
1697                              (u64 __user *)reg->addr);
1698                 break;
1699         case KVM_REG_S390_PFSELECT:
1700                 r = get_user(vcpu->arch.pfault_select,
1701                              (u64 __user *)reg->addr);
1702                 break;
1703         case KVM_REG_S390_PP:
1704                 r = get_user(vcpu->arch.sie_block->pp,
1705                              (u64 __user *)reg->addr);
1706                 break;
1707         case KVM_REG_S390_GBEA:
1708                 r = get_user(vcpu->arch.sie_block->gbea,
1709                              (u64 __user *)reg->addr);
1710                 break;
1711         default:
1712                 break;
1713         }
1714
1715         return r;
1716 }
1717
1718 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
1719 {
1720         kvm_s390_vcpu_initial_reset(vcpu);
1721         return 0;
1722 }
1723
1724 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1725 {
1726         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
1727         return 0;
1728 }
1729
1730 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1731 {
1732         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
1733         return 0;
1734 }
1735
1736 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
1737                                   struct kvm_sregs *sregs)
1738 {
1739         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
1740         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
1741         restore_access_regs(vcpu->run->s.regs.acrs);
1742         return 0;
1743 }
1744
1745 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
1746                                   struct kvm_sregs *sregs)
1747 {
1748         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
1749         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
1750         return 0;
1751 }
1752
1753 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1754 {
1755         if (test_fp_ctl(fpu->fpc))
1756                 return -EINVAL;
1757         memcpy(vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs));
1758         vcpu->arch.guest_fpregs.fpc = fpu->fpc;
1759         save_fpu_regs();
1760         load_fpu_from(&vcpu->arch.guest_fpregs);
1761         return 0;
1762 }
1763
1764 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1765 {
1766         memcpy(&fpu->fprs, vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs));
1767         fpu->fpc = vcpu->arch.guest_fpregs.fpc;
1768         return 0;
1769 }
1770
1771 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
1772 {
1773         int rc = 0;
1774
1775         if (!is_vcpu_stopped(vcpu))
1776                 rc = -EBUSY;
1777         else {
1778                 vcpu->run->psw_mask = psw.mask;
1779                 vcpu->run->psw_addr = psw.addr;
1780         }
1781         return rc;
1782 }
1783
1784 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
1785                                   struct kvm_translation *tr)
1786 {
1787         return -EINVAL; /* not implemented yet */
1788 }
1789
1790 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
1791                               KVM_GUESTDBG_USE_HW_BP | \
1792                               KVM_GUESTDBG_ENABLE)
1793
1794 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
1795                                         struct kvm_guest_debug *dbg)
1796 {
1797         int rc = 0;
1798
1799         vcpu->guest_debug = 0;
1800         kvm_s390_clear_bp_data(vcpu);
1801
1802         if (dbg->control & ~VALID_GUESTDBG_FLAGS)
1803                 return -EINVAL;
1804
1805         if (dbg->control & KVM_GUESTDBG_ENABLE) {
1806                 vcpu->guest_debug = dbg->control;
1807                 /* enforce guest PER */
1808                 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1809
1810                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
1811                         rc = kvm_s390_import_bp_data(vcpu, dbg);
1812         } else {
1813                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1814                 vcpu->arch.guestdbg.last_bp = 0;
1815         }
1816
1817         if (rc) {
1818                 vcpu->guest_debug = 0;
1819                 kvm_s390_clear_bp_data(vcpu);
1820                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1821         }
1822
1823         return rc;
1824 }
1825
1826 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
1827                                     struct kvm_mp_state *mp_state)
1828 {
1829         /* CHECK_STOP and LOAD are not supported yet */
1830         return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
1831                                        KVM_MP_STATE_OPERATING;
1832 }
1833
1834 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
1835                                     struct kvm_mp_state *mp_state)
1836 {
1837         int rc = 0;
1838
1839         /* user space knows about this interface - let it control the state */
1840         vcpu->kvm->arch.user_cpu_state_ctrl = 1;
1841
1842         switch (mp_state->mp_state) {
1843         case KVM_MP_STATE_STOPPED:
1844                 kvm_s390_vcpu_stop(vcpu);
1845                 break;
1846         case KVM_MP_STATE_OPERATING:
1847                 kvm_s390_vcpu_start(vcpu);
1848                 break;
1849         case KVM_MP_STATE_LOAD:
1850         case KVM_MP_STATE_CHECK_STOP:
1851                 /* fall through - CHECK_STOP and LOAD are not supported yet */
1852         default:
1853                 rc = -ENXIO;
1854         }
1855
1856         return rc;
1857 }
1858
1859 static bool ibs_enabled(struct kvm_vcpu *vcpu)
1860 {
1861         return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
1862 }
1863
1864 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
1865 {
1866 retry:
1867         kvm_s390_vcpu_request_handled(vcpu);
1868         if (!vcpu->requests)
1869                 return 0;
1870         /*
1871          * We use MMU_RELOAD just to re-arm the ipte notifier for the
1872          * guest prefix page. gmap_ipte_notify will wait on the ptl lock.
1873          * This ensures that the ipte instruction for this request has
1874          * already finished. We might race against a second unmapper that
1875          * wants to set the blocking bit. Lets just retry the request loop.
1876          */
1877         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
1878                 int rc;
1879                 rc = gmap_ipte_notify(vcpu->arch.gmap,
1880                                       kvm_s390_get_prefix(vcpu),
1881                                       PAGE_SIZE * 2);
1882                 if (rc)
1883                         return rc;
1884                 goto retry;
1885         }
1886
1887         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
1888                 vcpu->arch.sie_block->ihcpu = 0xffff;
1889                 goto retry;
1890         }
1891
1892         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
1893                 if (!ibs_enabled(vcpu)) {
1894                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
1895                         atomic_or(CPUSTAT_IBS,
1896                                         &vcpu->arch.sie_block->cpuflags);
1897                 }
1898                 goto retry;
1899         }
1900
1901         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
1902                 if (ibs_enabled(vcpu)) {
1903                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
1904                         atomic_andnot(CPUSTAT_IBS,
1905                                           &vcpu->arch.sie_block->cpuflags);
1906                 }
1907                 goto retry;
1908         }
1909
1910         /* nothing to do, just clear the request */
1911         clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
1912
1913         return 0;
1914 }
1915
1916 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
1917 {
1918         struct kvm_vcpu *vcpu;
1919         int i;
1920
1921         mutex_lock(&kvm->lock);
1922         preempt_disable();
1923         kvm->arch.epoch = tod - get_tod_clock();
1924         kvm_s390_vcpu_block_all(kvm);
1925         kvm_for_each_vcpu(i, vcpu, kvm)
1926                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
1927         kvm_s390_vcpu_unblock_all(kvm);
1928         preempt_enable();
1929         mutex_unlock(&kvm->lock);
1930 }
1931
1932 /**
1933  * kvm_arch_fault_in_page - fault-in guest page if necessary
1934  * @vcpu: The corresponding virtual cpu
1935  * @gpa: Guest physical address
1936  * @writable: Whether the page should be writable or not
1937  *
1938  * Make sure that a guest page has been faulted-in on the host.
1939  *
1940  * Return: Zero on success, negative error code otherwise.
1941  */
1942 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
1943 {
1944         return gmap_fault(vcpu->arch.gmap, gpa,
1945                           writable ? FAULT_FLAG_WRITE : 0);
1946 }
1947
1948 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
1949                                       unsigned long token)
1950 {
1951         struct kvm_s390_interrupt inti;
1952         struct kvm_s390_irq irq;
1953
1954         if (start_token) {
1955                 irq.u.ext.ext_params2 = token;
1956                 irq.type = KVM_S390_INT_PFAULT_INIT;
1957                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
1958         } else {
1959                 inti.type = KVM_S390_INT_PFAULT_DONE;
1960                 inti.parm64 = token;
1961                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
1962         }
1963 }
1964
1965 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
1966                                      struct kvm_async_pf *work)
1967 {
1968         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
1969         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
1970 }
1971
1972 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
1973                                  struct kvm_async_pf *work)
1974 {
1975         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
1976         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
1977 }
1978
1979 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
1980                                struct kvm_async_pf *work)
1981 {
1982         /* s390 will always inject the page directly */
1983 }
1984
1985 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
1986 {
1987         /*
1988          * s390 will always inject the page directly,
1989          * but we still want check_async_completion to cleanup
1990          */
1991         return true;
1992 }
1993
1994 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
1995 {
1996         hva_t hva;
1997         struct kvm_arch_async_pf arch;
1998         int rc;
1999
2000         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2001                 return 0;
2002         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
2003             vcpu->arch.pfault_compare)
2004                 return 0;
2005         if (psw_extint_disabled(vcpu))
2006                 return 0;
2007         if (kvm_s390_vcpu_has_irq(vcpu, 0))
2008                 return 0;
2009         if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
2010                 return 0;
2011         if (!vcpu->arch.gmap->pfault_enabled)
2012                 return 0;
2013
2014         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
2015         hva += current->thread.gmap_addr & ~PAGE_MASK;
2016         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
2017                 return 0;
2018
2019         rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
2020         return rc;
2021 }
2022
2023 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
2024 {
2025         int rc, cpuflags;
2026
2027         /*
2028          * On s390 notifications for arriving pages will be delivered directly
2029          * to the guest but the house keeping for completed pfaults is
2030          * handled outside the worker.
2031          */
2032         kvm_check_async_pf_completion(vcpu);
2033
2034         memcpy(&vcpu->arch.sie_block->gg14, &vcpu->run->s.regs.gprs[14], 16);
2035
2036         if (need_resched())
2037                 schedule();
2038
2039         if (test_cpu_flag(CIF_MCCK_PENDING))
2040                 s390_handle_mcck();
2041
2042         if (!kvm_is_ucontrol(vcpu->kvm)) {
2043                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
2044                 if (rc)
2045                         return rc;
2046         }
2047
2048         rc = kvm_s390_handle_requests(vcpu);
2049         if (rc)
2050                 return rc;
2051
2052         if (guestdbg_enabled(vcpu)) {
2053                 kvm_s390_backup_guest_per_regs(vcpu);
2054                 kvm_s390_patch_guest_per_regs(vcpu);
2055         }
2056
2057         vcpu->arch.sie_block->icptcode = 0;
2058         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
2059         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
2060         trace_kvm_s390_sie_enter(vcpu, cpuflags);
2061
2062         return 0;
2063 }
2064
2065 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
2066 {
2067         psw_t *psw = &vcpu->arch.sie_block->gpsw;
2068         u8 opcode;
2069         int rc;
2070
2071         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
2072         trace_kvm_s390_sie_fault(vcpu);
2073
2074         /*
2075          * We want to inject an addressing exception, which is defined as a
2076          * suppressing or terminating exception. However, since we came here
2077          * by a DAT access exception, the PSW still points to the faulting
2078          * instruction since DAT exceptions are nullifying. So we've got
2079          * to look up the current opcode to get the length of the instruction
2080          * to be able to forward the PSW.
2081          */
2082         rc = read_guest(vcpu, psw->addr, 0, &opcode, 1);
2083         if (rc)
2084                 return kvm_s390_inject_prog_cond(vcpu, rc);
2085         psw->addr = __rewind_psw(*psw, -insn_length(opcode));
2086
2087         return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
2088 }
2089
2090 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
2091 {
2092         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
2093                    vcpu->arch.sie_block->icptcode);
2094         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
2095
2096         if (guestdbg_enabled(vcpu))
2097                 kvm_s390_restore_guest_per_regs(vcpu);
2098
2099         memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16);
2100
2101         if (vcpu->arch.sie_block->icptcode > 0) {
2102                 int rc = kvm_handle_sie_intercept(vcpu);
2103
2104                 if (rc != -EOPNOTSUPP)
2105                         return rc;
2106                 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
2107                 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
2108                 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
2109                 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
2110                 return -EREMOTE;
2111         } else if (exit_reason != -EFAULT) {
2112                 vcpu->stat.exit_null++;
2113                 return 0;
2114         } else if (kvm_is_ucontrol(vcpu->kvm)) {
2115                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
2116                 vcpu->run->s390_ucontrol.trans_exc_code =
2117                                                 current->thread.gmap_addr;
2118                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
2119                 return -EREMOTE;
2120         } else if (current->thread.gmap_pfault) {
2121                 trace_kvm_s390_major_guest_pfault(vcpu);
2122                 current->thread.gmap_pfault = 0;
2123                 if (kvm_arch_setup_async_pf(vcpu))
2124                         return 0;
2125                 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
2126         }
2127         return vcpu_post_run_fault_in_sie(vcpu);
2128 }
2129
2130 static int __vcpu_run(struct kvm_vcpu *vcpu)
2131 {
2132         int rc, exit_reason;
2133
2134         /*
2135          * We try to hold kvm->srcu during most of vcpu_run (except when run-
2136          * ning the guest), so that memslots (and other stuff) are protected
2137          */
2138         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2139
2140         do {
2141                 rc = vcpu_pre_run(vcpu);
2142                 if (rc)
2143                         break;
2144
2145                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2146                 /*
2147                  * As PF_VCPU will be used in fault handler, between
2148                  * guest_enter and guest_exit should be no uaccess.
2149                  */
2150                 local_irq_disable();
2151                 __kvm_guest_enter();
2152                 local_irq_enable();
2153                 exit_reason = sie64a(vcpu->arch.sie_block,
2154                                      vcpu->run->s.regs.gprs);
2155                 local_irq_disable();
2156                 __kvm_guest_exit();
2157                 local_irq_enable();
2158                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2159
2160                 rc = vcpu_post_run(vcpu, exit_reason);
2161         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
2162
2163         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2164         return rc;
2165 }
2166
2167 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2168 {
2169         vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
2170         vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
2171         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
2172                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
2173         if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
2174                 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
2175                 /* some control register changes require a tlb flush */
2176                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2177         }
2178         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
2179                 vcpu->arch.sie_block->cputm = kvm_run->s.regs.cputm;
2180                 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
2181                 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
2182                 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
2183                 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
2184         }
2185         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
2186                 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
2187                 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
2188                 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
2189                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2190                         kvm_clear_async_pf_completion_queue(vcpu);
2191         }
2192         kvm_run->kvm_dirty_regs = 0;
2193 }
2194
2195 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2196 {
2197         kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
2198         kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
2199         kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
2200         memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
2201         kvm_run->s.regs.cputm = vcpu->arch.sie_block->cputm;
2202         kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
2203         kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
2204         kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
2205         kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
2206         kvm_run->s.regs.pft = vcpu->arch.pfault_token;
2207         kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
2208         kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
2209 }
2210
2211 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2212 {
2213         int rc;
2214         sigset_t sigsaved;
2215
2216         if (guestdbg_exit_pending(vcpu)) {
2217                 kvm_s390_prepare_debug_exit(vcpu);
2218                 return 0;
2219         }
2220
2221         if (vcpu->sigset_active)
2222                 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
2223
2224         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
2225                 kvm_s390_vcpu_start(vcpu);
2226         } else if (is_vcpu_stopped(vcpu)) {
2227                 pr_err_ratelimited("can't run stopped vcpu %d\n",
2228                                    vcpu->vcpu_id);
2229                 return -EINVAL;
2230         }
2231
2232         sync_regs(vcpu, kvm_run);
2233
2234         might_fault();
2235         rc = __vcpu_run(vcpu);
2236
2237         if (signal_pending(current) && !rc) {
2238                 kvm_run->exit_reason = KVM_EXIT_INTR;
2239                 rc = -EINTR;
2240         }
2241
2242         if (guestdbg_exit_pending(vcpu) && !rc)  {
2243                 kvm_s390_prepare_debug_exit(vcpu);
2244                 rc = 0;
2245         }
2246
2247         if (rc == -EREMOTE) {
2248                 /* userspace support is needed, kvm_run has been prepared */
2249                 rc = 0;
2250         }
2251
2252         store_regs(vcpu, kvm_run);
2253
2254         if (vcpu->sigset_active)
2255                 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
2256
2257         vcpu->stat.exit_userspace++;
2258         return rc;
2259 }
2260
2261 /*
2262  * store status at address
2263  * we use have two special cases:
2264  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
2265  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
2266  */
2267 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
2268 {
2269         unsigned char archmode = 1;
2270         unsigned int px;
2271         u64 clkcomp;
2272         int rc;
2273
2274         if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
2275                 if (write_guest_abs(vcpu, 163, &archmode, 1))
2276                         return -EFAULT;
2277                 gpa = SAVE_AREA_BASE;
2278         } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
2279                 if (write_guest_real(vcpu, 163, &archmode, 1))
2280                         return -EFAULT;
2281                 gpa = kvm_s390_real_to_abs(vcpu, SAVE_AREA_BASE);
2282         }
2283         rc = write_guest_abs(vcpu, gpa + offsetof(struct save_area, fp_regs),
2284                              vcpu->arch.guest_fpregs.fprs, 128);
2285         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, gp_regs),
2286                               vcpu->run->s.regs.gprs, 128);
2287         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, psw),
2288                               &vcpu->arch.sie_block->gpsw, 16);
2289         px = kvm_s390_get_prefix(vcpu);
2290         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, pref_reg),
2291                               &px, 4);
2292         rc |= write_guest_abs(vcpu,
2293                               gpa + offsetof(struct save_area, fp_ctrl_reg),
2294                               &vcpu->arch.guest_fpregs.fpc, 4);
2295         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, tod_reg),
2296                               &vcpu->arch.sie_block->todpr, 4);
2297         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, timer),
2298                               &vcpu->arch.sie_block->cputm, 8);
2299         clkcomp = vcpu->arch.sie_block->ckc >> 8;
2300         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, clk_cmp),
2301                               &clkcomp, 8);
2302         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, acc_regs),
2303                               &vcpu->run->s.regs.acrs, 64);
2304         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, ctrl_regs),
2305                               &vcpu->arch.sie_block->gcr, 128);
2306         return rc ? -EFAULT : 0;
2307 }
2308
2309 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
2310 {
2311         /*
2312          * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
2313          * copying in vcpu load/put. Lets update our copies before we save
2314          * it into the save area
2315          */
2316         save_fpu_regs();
2317         if (test_kvm_facility(vcpu->kvm, 129)) {
2318                 /*
2319                  * If the vector extension is available, the vector registers
2320                  * which overlaps with floating-point registers are saved in
2321                  * the SIE-control block.  Hence, extract the floating-point
2322                  * registers and the FPC value and store them in the
2323                  * guest_fpregs structure.
2324                  */
2325                 vcpu->arch.guest_fpregs.fpc = current->thread.fpu.fpc;
2326                 convert_vx_to_fp(vcpu->arch.guest_fpregs.fprs,
2327                                  current->thread.fpu.vxrs);
2328         } else
2329                 save_fpu_to(&vcpu->arch.guest_fpregs);
2330         save_access_regs(vcpu->run->s.regs.acrs);
2331
2332         return kvm_s390_store_status_unloaded(vcpu, addr);
2333 }
2334
2335 /*
2336  * store additional status at address
2337  */
2338 int kvm_s390_store_adtl_status_unloaded(struct kvm_vcpu *vcpu,
2339                                         unsigned long gpa)
2340 {
2341         /* Only bits 0-53 are used for address formation */
2342         if (!(gpa & ~0x3ff))
2343                 return 0;
2344
2345         return write_guest_abs(vcpu, gpa & ~0x3ff,
2346                                (void *)&vcpu->run->s.regs.vrs, 512);
2347 }
2348
2349 int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu *vcpu, unsigned long addr)
2350 {
2351         if (!test_kvm_facility(vcpu->kvm, 129))
2352                 return 0;
2353
2354         /*
2355          * The guest VXRS are in the host VXRs due to the lazy
2356          * copying in vcpu load/put. We can simply call save_fpu_regs()
2357          * to save the current register state because we are in the
2358          * middle of a load/put cycle.
2359          *
2360          * Let's update our copies before we save it into the save area.
2361          */
2362         save_fpu_regs();
2363
2364         return kvm_s390_store_adtl_status_unloaded(vcpu, addr);
2365 }
2366
2367 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2368 {
2369         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
2370         kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
2371 }
2372
2373 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
2374 {
2375         unsigned int i;
2376         struct kvm_vcpu *vcpu;
2377
2378         kvm_for_each_vcpu(i, vcpu, kvm) {
2379                 __disable_ibs_on_vcpu(vcpu);
2380         }
2381 }
2382
2383 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2384 {
2385         kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
2386         kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
2387 }
2388
2389 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
2390 {
2391         int i, online_vcpus, started_vcpus = 0;
2392
2393         if (!is_vcpu_stopped(vcpu))
2394                 return;
2395
2396         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
2397         /* Only one cpu at a time may enter/leave the STOPPED state. */
2398         spin_lock(&vcpu->kvm->arch.start_stop_lock);
2399         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2400
2401         for (i = 0; i < online_vcpus; i++) {
2402                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
2403                         started_vcpus++;
2404         }
2405
2406         if (started_vcpus == 0) {
2407                 /* we're the only active VCPU -> speed it up */
2408                 __enable_ibs_on_vcpu(vcpu);
2409         } else if (started_vcpus == 1) {
2410                 /*
2411                  * As we are starting a second VCPU, we have to disable
2412                  * the IBS facility on all VCPUs to remove potentially
2413                  * oustanding ENABLE requests.
2414                  */
2415                 __disable_ibs_on_all_vcpus(vcpu->kvm);
2416         }
2417
2418         atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2419         /*
2420          * Another VCPU might have used IBS while we were offline.
2421          * Let's play safe and flush the VCPU at startup.
2422          */
2423         kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2424         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2425         return;
2426 }
2427
2428 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
2429 {
2430         int i, online_vcpus, started_vcpus = 0;
2431         struct kvm_vcpu *started_vcpu = NULL;
2432
2433         if (is_vcpu_stopped(vcpu))
2434                 return;
2435
2436         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
2437         /* Only one cpu at a time may enter/leave the STOPPED state. */
2438         spin_lock(&vcpu->kvm->arch.start_stop_lock);
2439         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2440
2441         /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
2442         kvm_s390_clear_stop_irq(vcpu);
2443
2444         atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2445         __disable_ibs_on_vcpu(vcpu);
2446
2447         for (i = 0; i < online_vcpus; i++) {
2448                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
2449                         started_vcpus++;
2450                         started_vcpu = vcpu->kvm->vcpus[i];
2451                 }
2452         }
2453
2454         if (started_vcpus == 1) {
2455                 /*
2456                  * As we only have one VCPU left, we want to enable the
2457                  * IBS facility for that VCPU to speed it up.
2458                  */
2459                 __enable_ibs_on_vcpu(started_vcpu);
2460         }
2461
2462         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2463         return;
2464 }
2465
2466 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
2467                                      struct kvm_enable_cap *cap)
2468 {
2469         int r;
2470
2471         if (cap->flags)
2472                 return -EINVAL;
2473
2474         switch (cap->cap) {
2475         case KVM_CAP_S390_CSS_SUPPORT:
2476                 if (!vcpu->kvm->arch.css_support) {
2477                         vcpu->kvm->arch.css_support = 1;
2478                         VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
2479                         trace_kvm_s390_enable_css(vcpu->kvm);
2480                 }
2481                 r = 0;
2482                 break;
2483         default:
2484                 r = -EINVAL;
2485                 break;
2486         }
2487         return r;
2488 }
2489
2490 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
2491                                   struct kvm_s390_mem_op *mop)
2492 {
2493         void __user *uaddr = (void __user *)mop->buf;
2494         void *tmpbuf = NULL;
2495         int r, srcu_idx;
2496         const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
2497                                     | KVM_S390_MEMOP_F_CHECK_ONLY;
2498
2499         if (mop->flags & ~supported_flags)
2500                 return -EINVAL;
2501
2502         if (mop->size > MEM_OP_MAX_SIZE)
2503                 return -E2BIG;
2504
2505         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
2506                 tmpbuf = vmalloc(mop->size);
2507                 if (!tmpbuf)
2508                         return -ENOMEM;
2509         }
2510
2511         srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2512
2513         switch (mop->op) {
2514         case KVM_S390_MEMOP_LOGICAL_READ:
2515                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2516                         r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, false);
2517                         break;
2518                 }
2519                 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2520                 if (r == 0) {
2521                         if (copy_to_user(uaddr, tmpbuf, mop->size))
2522                                 r = -EFAULT;
2523                 }
2524                 break;
2525         case KVM_S390_MEMOP_LOGICAL_WRITE:
2526                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2527                         r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, true);
2528                         break;
2529                 }
2530                 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
2531                         r = -EFAULT;
2532                         break;
2533                 }
2534                 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2535                 break;
2536         default:
2537                 r = -EINVAL;
2538         }
2539
2540         srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
2541
2542         if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
2543                 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
2544
2545         vfree(tmpbuf);
2546         return r;
2547 }
2548
2549 long kvm_arch_vcpu_ioctl(struct file *filp,
2550                          unsigned int ioctl, unsigned long arg)
2551 {
2552         struct kvm_vcpu *vcpu = filp->private_data;
2553         void __user *argp = (void __user *)arg;
2554         int idx;
2555         long r;
2556
2557         switch (ioctl) {
2558         case KVM_S390_IRQ: {
2559                 struct kvm_s390_irq s390irq;
2560
2561                 r = -EFAULT;
2562                 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
2563                         break;
2564                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
2565                 break;
2566         }
2567         case KVM_S390_INTERRUPT: {
2568                 struct kvm_s390_interrupt s390int;
2569                 struct kvm_s390_irq s390irq;
2570
2571                 r = -EFAULT;
2572                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
2573                         break;
2574                 if (s390int_to_s390irq(&s390int, &s390irq))
2575                         return -EINVAL;
2576                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
2577                 break;
2578         }
2579         case KVM_S390_STORE_STATUS:
2580                 idx = srcu_read_lock(&vcpu->kvm->srcu);
2581                 r = kvm_s390_vcpu_store_status(vcpu, arg);
2582                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
2583                 break;
2584         case KVM_S390_SET_INITIAL_PSW: {
2585                 psw_t psw;
2586
2587                 r = -EFAULT;
2588                 if (copy_from_user(&psw, argp, sizeof(psw)))
2589                         break;
2590                 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
2591                 break;
2592         }
2593         case KVM_S390_INITIAL_RESET:
2594                 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
2595                 break;
2596         case KVM_SET_ONE_REG:
2597         case KVM_GET_ONE_REG: {
2598                 struct kvm_one_reg reg;
2599                 r = -EFAULT;
2600                 if (copy_from_user(&reg, argp, sizeof(reg)))
2601                         break;
2602                 if (ioctl == KVM_SET_ONE_REG)
2603                         r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
2604                 else
2605                         r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
2606                 break;
2607         }
2608 #ifdef CONFIG_KVM_S390_UCONTROL
2609         case KVM_S390_UCAS_MAP: {
2610                 struct kvm_s390_ucas_mapping ucasmap;
2611
2612                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
2613                         r = -EFAULT;
2614                         break;
2615                 }
2616
2617                 if (!kvm_is_ucontrol(vcpu->kvm)) {
2618                         r = -EINVAL;
2619                         break;
2620                 }
2621
2622                 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
2623                                      ucasmap.vcpu_addr, ucasmap.length);
2624                 break;
2625         }
2626         case KVM_S390_UCAS_UNMAP: {
2627                 struct kvm_s390_ucas_mapping ucasmap;
2628
2629                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
2630                         r = -EFAULT;
2631                         break;
2632                 }
2633
2634                 if (!kvm_is_ucontrol(vcpu->kvm)) {
2635                         r = -EINVAL;
2636                         break;
2637                 }
2638
2639                 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
2640                         ucasmap.length);
2641                 break;
2642         }
2643 #endif
2644         case KVM_S390_VCPU_FAULT: {
2645                 r = gmap_fault(vcpu->arch.gmap, arg, 0);
2646                 break;
2647         }
2648         case KVM_ENABLE_CAP:
2649         {
2650                 struct kvm_enable_cap cap;
2651                 r = -EFAULT;
2652                 if (copy_from_user(&cap, argp, sizeof(cap)))
2653                         break;
2654                 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
2655                 break;
2656         }
2657         case KVM_S390_MEM_OP: {
2658                 struct kvm_s390_mem_op mem_op;
2659
2660                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
2661                         r = kvm_s390_guest_mem_op(vcpu, &mem_op);
2662                 else
2663                         r = -EFAULT;
2664                 break;
2665         }
2666         case KVM_S390_SET_IRQ_STATE: {
2667                 struct kvm_s390_irq_state irq_state;
2668
2669                 r = -EFAULT;
2670                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
2671                         break;
2672                 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
2673                     irq_state.len == 0 ||
2674                     irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
2675                         r = -EINVAL;
2676                         break;
2677                 }
2678                 r = kvm_s390_set_irq_state(vcpu,
2679                                            (void __user *) irq_state.buf,
2680                                            irq_state.len);
2681                 break;
2682         }
2683         case KVM_S390_GET_IRQ_STATE: {
2684                 struct kvm_s390_irq_state irq_state;
2685
2686                 r = -EFAULT;
2687                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
2688                         break;
2689                 if (irq_state.len == 0) {
2690                         r = -EINVAL;
2691                         break;
2692                 }
2693                 r = kvm_s390_get_irq_state(vcpu,
2694                                            (__u8 __user *)  irq_state.buf,
2695                                            irq_state.len);
2696                 break;
2697         }
2698         default:
2699                 r = -ENOTTY;
2700         }
2701         return r;
2702 }
2703
2704 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
2705 {
2706 #ifdef CONFIG_KVM_S390_UCONTROL
2707         if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
2708                  && (kvm_is_ucontrol(vcpu->kvm))) {
2709                 vmf->page = virt_to_page(vcpu->arch.sie_block);
2710                 get_page(vmf->page);
2711                 return 0;
2712         }
2713 #endif
2714         return VM_FAULT_SIGBUS;
2715 }
2716
2717 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
2718                             unsigned long npages)
2719 {
2720         return 0;
2721 }
2722
2723 /* Section: memory related */
2724 int kvm_arch_prepare_memory_region(struct kvm *kvm,
2725                                    struct kvm_memory_slot *memslot,
2726                                    const struct kvm_userspace_memory_region *mem,
2727                                    enum kvm_mr_change change)
2728 {
2729         /* A few sanity checks. We can have memory slots which have to be
2730            located/ended at a segment boundary (1MB). The memory in userland is
2731            ok to be fragmented into various different vmas. It is okay to mmap()
2732            and munmap() stuff in this slot after doing this call at any time */
2733
2734         if (mem->userspace_addr & 0xffffful)
2735                 return -EINVAL;
2736
2737         if (mem->memory_size & 0xffffful)
2738                 return -EINVAL;
2739
2740         return 0;
2741 }
2742
2743 void kvm_arch_commit_memory_region(struct kvm *kvm,
2744                                 const struct kvm_userspace_memory_region *mem,
2745                                 const struct kvm_memory_slot *old,
2746                                 const struct kvm_memory_slot *new,
2747                                 enum kvm_mr_change change)
2748 {
2749         int rc;
2750
2751         /* If the basics of the memslot do not change, we do not want
2752          * to update the gmap. Every update causes several unnecessary
2753          * segment translation exceptions. This is usually handled just
2754          * fine by the normal fault handler + gmap, but it will also
2755          * cause faults on the prefix page of running guest CPUs.
2756          */
2757         if (old->userspace_addr == mem->userspace_addr &&
2758             old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
2759             old->npages * PAGE_SIZE == mem->memory_size)
2760                 return;
2761
2762         rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
2763                 mem->guest_phys_addr, mem->memory_size);
2764         if (rc)
2765                 pr_warn("failed to commit memory region\n");
2766         return;
2767 }
2768
2769 static int __init kvm_s390_init(void)
2770 {
2771         return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
2772 }
2773
2774 static void __exit kvm_s390_exit(void)
2775 {
2776         kvm_exit();
2777 }
2778
2779 module_init(kvm_s390_init);
2780 module_exit(kvm_s390_exit);
2781
2782 /*
2783  * Enable autoloading of the kvm module.
2784  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
2785  * since x86 takes a different approach.
2786  */
2787 #include <linux/miscdevice.h>
2788 MODULE_ALIAS_MISCDEV(KVM_MINOR);
2789 MODULE_ALIAS("devname:kvm");