]> git.karo-electronics.de Git - karo-tx-linux.git/blob - arch/s390/kvm/kvm-s390.c
KVM: s390: don't switch to ESCA for ucontrol
[karo-tx-linux.git] / arch / s390 / kvm / kvm-s390.c
1 /*
2  * hosting zSeries kernel virtual machines
3  *
4  * Copyright IBM Corp. 2008, 2009
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License (version 2 only)
8  * as published by the Free Software Foundation.
9  *
10  *    Author(s): Carsten Otte <cotte@de.ibm.com>
11  *               Christian Borntraeger <borntraeger@de.ibm.com>
12  *               Heiko Carstens <heiko.carstens@de.ibm.com>
13  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
14  *               Jason J. Herne <jjherne@us.ibm.com>
15  */
16
17 #include <linux/compiler.h>
18 #include <linux/err.h>
19 #include <linux/fs.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/module.h>
25 #include <linux/random.h>
26 #include <linux/slab.h>
27 #include <linux/timer.h>
28 #include <linux/vmalloc.h>
29 #include <asm/asm-offsets.h>
30 #include <asm/lowcore.h>
31 #include <asm/etr.h>
32 #include <asm/pgtable.h>
33 #include <asm/nmi.h>
34 #include <asm/switch_to.h>
35 #include <asm/isc.h>
36 #include <asm/sclp.h>
37 #include "kvm-s390.h"
38 #include "gaccess.h"
39
40 #define KMSG_COMPONENT "kvm-s390"
41 #undef pr_fmt
42 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
43
44 #define CREATE_TRACE_POINTS
45 #include "trace.h"
46 #include "trace-s390.h"
47
48 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
49 #define LOCAL_IRQS 32
50 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
51                            (KVM_MAX_VCPUS + LOCAL_IRQS))
52
53 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
54
55 struct kvm_stats_debugfs_item debugfs_entries[] = {
56         { "userspace_handled", VCPU_STAT(exit_userspace) },
57         { "exit_null", VCPU_STAT(exit_null) },
58         { "exit_validity", VCPU_STAT(exit_validity) },
59         { "exit_stop_request", VCPU_STAT(exit_stop_request) },
60         { "exit_external_request", VCPU_STAT(exit_external_request) },
61         { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
62         { "exit_instruction", VCPU_STAT(exit_instruction) },
63         { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
64         { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
65         { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
66         { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
67         { "halt_wakeup", VCPU_STAT(halt_wakeup) },
68         { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
69         { "instruction_lctl", VCPU_STAT(instruction_lctl) },
70         { "instruction_stctl", VCPU_STAT(instruction_stctl) },
71         { "instruction_stctg", VCPU_STAT(instruction_stctg) },
72         { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
73         { "deliver_external_call", VCPU_STAT(deliver_external_call) },
74         { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
75         { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
76         { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
77         { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
78         { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
79         { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
80         { "exit_wait_state", VCPU_STAT(exit_wait_state) },
81         { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
82         { "instruction_stidp", VCPU_STAT(instruction_stidp) },
83         { "instruction_spx", VCPU_STAT(instruction_spx) },
84         { "instruction_stpx", VCPU_STAT(instruction_stpx) },
85         { "instruction_stap", VCPU_STAT(instruction_stap) },
86         { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
87         { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
88         { "instruction_stsch", VCPU_STAT(instruction_stsch) },
89         { "instruction_chsc", VCPU_STAT(instruction_chsc) },
90         { "instruction_essa", VCPU_STAT(instruction_essa) },
91         { "instruction_stsi", VCPU_STAT(instruction_stsi) },
92         { "instruction_stfl", VCPU_STAT(instruction_stfl) },
93         { "instruction_tprot", VCPU_STAT(instruction_tprot) },
94         { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
95         { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
96         { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
97         { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
98         { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
99         { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
100         { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
101         { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
102         { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
103         { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
104         { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
105         { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
106         { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
107         { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
108         { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
109         { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
110         { "diagnose_10", VCPU_STAT(diagnose_10) },
111         { "diagnose_44", VCPU_STAT(diagnose_44) },
112         { "diagnose_9c", VCPU_STAT(diagnose_9c) },
113         { "diagnose_258", VCPU_STAT(diagnose_258) },
114         { "diagnose_308", VCPU_STAT(diagnose_308) },
115         { "diagnose_500", VCPU_STAT(diagnose_500) },
116         { NULL }
117 };
118
119 /* upper facilities limit for kvm */
120 unsigned long kvm_s390_fac_list_mask[] = {
121         0xffe6fffbfcfdfc40UL,
122         0x005e800000000000UL,
123 };
124
125 unsigned long kvm_s390_fac_list_mask_size(void)
126 {
127         BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
128         return ARRAY_SIZE(kvm_s390_fac_list_mask);
129 }
130
131 static struct gmap_notifier gmap_notifier;
132 debug_info_t *kvm_s390_dbf;
133
134 /* Section: not file related */
135 int kvm_arch_hardware_enable(void)
136 {
137         /* every s390 is virtualization enabled ;-) */
138         return 0;
139 }
140
141 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address);
142
143 /*
144  * This callback is executed during stop_machine(). All CPUs are therefore
145  * temporarily stopped. In order not to change guest behavior, we have to
146  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
147  * so a CPU won't be stopped while calculating with the epoch.
148  */
149 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
150                           void *v)
151 {
152         struct kvm *kvm;
153         struct kvm_vcpu *vcpu;
154         int i;
155         unsigned long long *delta = v;
156
157         list_for_each_entry(kvm, &vm_list, vm_list) {
158                 kvm->arch.epoch -= *delta;
159                 kvm_for_each_vcpu(i, vcpu, kvm) {
160                         vcpu->arch.sie_block->epoch -= *delta;
161                 }
162         }
163         return NOTIFY_OK;
164 }
165
166 static struct notifier_block kvm_clock_notifier = {
167         .notifier_call = kvm_clock_sync,
168 };
169
170 int kvm_arch_hardware_setup(void)
171 {
172         gmap_notifier.notifier_call = kvm_gmap_notifier;
173         gmap_register_ipte_notifier(&gmap_notifier);
174         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
175                                        &kvm_clock_notifier);
176         return 0;
177 }
178
179 void kvm_arch_hardware_unsetup(void)
180 {
181         gmap_unregister_ipte_notifier(&gmap_notifier);
182         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
183                                          &kvm_clock_notifier);
184 }
185
186 int kvm_arch_init(void *opaque)
187 {
188         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
189         if (!kvm_s390_dbf)
190                 return -ENOMEM;
191
192         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
193                 debug_unregister(kvm_s390_dbf);
194                 return -ENOMEM;
195         }
196
197         /* Register floating interrupt controller interface. */
198         return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
199 }
200
201 void kvm_arch_exit(void)
202 {
203         debug_unregister(kvm_s390_dbf);
204 }
205
206 /* Section: device related */
207 long kvm_arch_dev_ioctl(struct file *filp,
208                         unsigned int ioctl, unsigned long arg)
209 {
210         if (ioctl == KVM_S390_ENABLE_SIE)
211                 return s390_enable_sie();
212         return -EINVAL;
213 }
214
215 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
216 {
217         int r;
218
219         switch (ext) {
220         case KVM_CAP_S390_PSW:
221         case KVM_CAP_S390_GMAP:
222         case KVM_CAP_SYNC_MMU:
223 #ifdef CONFIG_KVM_S390_UCONTROL
224         case KVM_CAP_S390_UCONTROL:
225 #endif
226         case KVM_CAP_ASYNC_PF:
227         case KVM_CAP_SYNC_REGS:
228         case KVM_CAP_ONE_REG:
229         case KVM_CAP_ENABLE_CAP:
230         case KVM_CAP_S390_CSS_SUPPORT:
231         case KVM_CAP_IOEVENTFD:
232         case KVM_CAP_DEVICE_CTRL:
233         case KVM_CAP_ENABLE_CAP_VM:
234         case KVM_CAP_S390_IRQCHIP:
235         case KVM_CAP_VM_ATTRIBUTES:
236         case KVM_CAP_MP_STATE:
237         case KVM_CAP_S390_INJECT_IRQ:
238         case KVM_CAP_S390_USER_SIGP:
239         case KVM_CAP_S390_USER_STSI:
240         case KVM_CAP_S390_SKEYS:
241         case KVM_CAP_S390_IRQ_STATE:
242                 r = 1;
243                 break;
244         case KVM_CAP_S390_MEM_OP:
245                 r = MEM_OP_MAX_SIZE;
246                 break;
247         case KVM_CAP_NR_VCPUS:
248         case KVM_CAP_MAX_VCPUS:
249                 r = sclp.has_esca ? KVM_S390_ESCA_CPU_SLOTS
250                                   : KVM_S390_BSCA_CPU_SLOTS;
251                 break;
252         case KVM_CAP_NR_MEMSLOTS:
253                 r = KVM_USER_MEM_SLOTS;
254                 break;
255         case KVM_CAP_S390_COW:
256                 r = MACHINE_HAS_ESOP;
257                 break;
258         case KVM_CAP_S390_VECTOR_REGISTERS:
259                 r = MACHINE_HAS_VX;
260                 break;
261         default:
262                 r = 0;
263         }
264         return r;
265 }
266
267 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
268                                         struct kvm_memory_slot *memslot)
269 {
270         gfn_t cur_gfn, last_gfn;
271         unsigned long address;
272         struct gmap *gmap = kvm->arch.gmap;
273
274         down_read(&gmap->mm->mmap_sem);
275         /* Loop over all guest pages */
276         last_gfn = memslot->base_gfn + memslot->npages;
277         for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
278                 address = gfn_to_hva_memslot(memslot, cur_gfn);
279
280                 if (gmap_test_and_clear_dirty(address, gmap))
281                         mark_page_dirty(kvm, cur_gfn);
282         }
283         up_read(&gmap->mm->mmap_sem);
284 }
285
286 /* Section: vm related */
287 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
288
289 /*
290  * Get (and clear) the dirty memory log for a memory slot.
291  */
292 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
293                                struct kvm_dirty_log *log)
294 {
295         int r;
296         unsigned long n;
297         struct kvm_memslots *slots;
298         struct kvm_memory_slot *memslot;
299         int is_dirty = 0;
300
301         mutex_lock(&kvm->slots_lock);
302
303         r = -EINVAL;
304         if (log->slot >= KVM_USER_MEM_SLOTS)
305                 goto out;
306
307         slots = kvm_memslots(kvm);
308         memslot = id_to_memslot(slots, log->slot);
309         r = -ENOENT;
310         if (!memslot->dirty_bitmap)
311                 goto out;
312
313         kvm_s390_sync_dirty_log(kvm, memslot);
314         r = kvm_get_dirty_log(kvm, log, &is_dirty);
315         if (r)
316                 goto out;
317
318         /* Clear the dirty log */
319         if (is_dirty) {
320                 n = kvm_dirty_bitmap_bytes(memslot);
321                 memset(memslot->dirty_bitmap, 0, n);
322         }
323         r = 0;
324 out:
325         mutex_unlock(&kvm->slots_lock);
326         return r;
327 }
328
329 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
330 {
331         int r;
332
333         if (cap->flags)
334                 return -EINVAL;
335
336         switch (cap->cap) {
337         case KVM_CAP_S390_IRQCHIP:
338                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
339                 kvm->arch.use_irqchip = 1;
340                 r = 0;
341                 break;
342         case KVM_CAP_S390_USER_SIGP:
343                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
344                 kvm->arch.user_sigp = 1;
345                 r = 0;
346                 break;
347         case KVM_CAP_S390_VECTOR_REGISTERS:
348                 mutex_lock(&kvm->lock);
349                 if (atomic_read(&kvm->online_vcpus)) {
350                         r = -EBUSY;
351                 } else if (MACHINE_HAS_VX) {
352                         set_kvm_facility(kvm->arch.model.fac->mask, 129);
353                         set_kvm_facility(kvm->arch.model.fac->list, 129);
354                         r = 0;
355                 } else
356                         r = -EINVAL;
357                 mutex_unlock(&kvm->lock);
358                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
359                          r ? "(not available)" : "(success)");
360                 break;
361         case KVM_CAP_S390_USER_STSI:
362                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
363                 kvm->arch.user_stsi = 1;
364                 r = 0;
365                 break;
366         default:
367                 r = -EINVAL;
368                 break;
369         }
370         return r;
371 }
372
373 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
374 {
375         int ret;
376
377         switch (attr->attr) {
378         case KVM_S390_VM_MEM_LIMIT_SIZE:
379                 ret = 0;
380                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
381                          kvm->arch.gmap->asce_end);
382                 if (put_user(kvm->arch.gmap->asce_end, (u64 __user *)attr->addr))
383                         ret = -EFAULT;
384                 break;
385         default:
386                 ret = -ENXIO;
387                 break;
388         }
389         return ret;
390 }
391
392 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
393 {
394         int ret;
395         unsigned int idx;
396         switch (attr->attr) {
397         case KVM_S390_VM_MEM_ENABLE_CMMA:
398                 /* enable CMMA only for z10 and later (EDAT_1) */
399                 ret = -EINVAL;
400                 if (!MACHINE_IS_LPAR || !MACHINE_HAS_EDAT1)
401                         break;
402
403                 ret = -EBUSY;
404                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
405                 mutex_lock(&kvm->lock);
406                 if (atomic_read(&kvm->online_vcpus) == 0) {
407                         kvm->arch.use_cmma = 1;
408                         ret = 0;
409                 }
410                 mutex_unlock(&kvm->lock);
411                 break;
412         case KVM_S390_VM_MEM_CLR_CMMA:
413                 ret = -EINVAL;
414                 if (!kvm->arch.use_cmma)
415                         break;
416
417                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
418                 mutex_lock(&kvm->lock);
419                 idx = srcu_read_lock(&kvm->srcu);
420                 s390_reset_cmma(kvm->arch.gmap->mm);
421                 srcu_read_unlock(&kvm->srcu, idx);
422                 mutex_unlock(&kvm->lock);
423                 ret = 0;
424                 break;
425         case KVM_S390_VM_MEM_LIMIT_SIZE: {
426                 unsigned long new_limit;
427
428                 if (kvm_is_ucontrol(kvm))
429                         return -EINVAL;
430
431                 if (get_user(new_limit, (u64 __user *)attr->addr))
432                         return -EFAULT;
433
434                 if (new_limit > kvm->arch.gmap->asce_end)
435                         return -E2BIG;
436
437                 ret = -EBUSY;
438                 mutex_lock(&kvm->lock);
439                 if (atomic_read(&kvm->online_vcpus) == 0) {
440                         /* gmap_alloc will round the limit up */
441                         struct gmap *new = gmap_alloc(current->mm, new_limit);
442
443                         if (!new) {
444                                 ret = -ENOMEM;
445                         } else {
446                                 gmap_free(kvm->arch.gmap);
447                                 new->private = kvm;
448                                 kvm->arch.gmap = new;
449                                 ret = 0;
450                         }
451                 }
452                 mutex_unlock(&kvm->lock);
453                 VM_EVENT(kvm, 3, "SET: max guest memory: %lu bytes", new_limit);
454                 break;
455         }
456         default:
457                 ret = -ENXIO;
458                 break;
459         }
460         return ret;
461 }
462
463 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
464
465 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
466 {
467         struct kvm_vcpu *vcpu;
468         int i;
469
470         if (!test_kvm_facility(kvm, 76))
471                 return -EINVAL;
472
473         mutex_lock(&kvm->lock);
474         switch (attr->attr) {
475         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
476                 get_random_bytes(
477                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
478                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
479                 kvm->arch.crypto.aes_kw = 1;
480                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
481                 break;
482         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
483                 get_random_bytes(
484                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
485                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
486                 kvm->arch.crypto.dea_kw = 1;
487                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
488                 break;
489         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
490                 kvm->arch.crypto.aes_kw = 0;
491                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
492                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
493                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
494                 break;
495         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
496                 kvm->arch.crypto.dea_kw = 0;
497                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
498                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
499                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
500                 break;
501         default:
502                 mutex_unlock(&kvm->lock);
503                 return -ENXIO;
504         }
505
506         kvm_for_each_vcpu(i, vcpu, kvm) {
507                 kvm_s390_vcpu_crypto_setup(vcpu);
508                 exit_sie(vcpu);
509         }
510         mutex_unlock(&kvm->lock);
511         return 0;
512 }
513
514 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
515 {
516         u8 gtod_high;
517
518         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
519                                            sizeof(gtod_high)))
520                 return -EFAULT;
521
522         if (gtod_high != 0)
523                 return -EINVAL;
524         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
525
526         return 0;
527 }
528
529 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
530 {
531         u64 gtod;
532
533         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
534                 return -EFAULT;
535
536         kvm_s390_set_tod_clock(kvm, gtod);
537         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
538         return 0;
539 }
540
541 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
542 {
543         int ret;
544
545         if (attr->flags)
546                 return -EINVAL;
547
548         switch (attr->attr) {
549         case KVM_S390_VM_TOD_HIGH:
550                 ret = kvm_s390_set_tod_high(kvm, attr);
551                 break;
552         case KVM_S390_VM_TOD_LOW:
553                 ret = kvm_s390_set_tod_low(kvm, attr);
554                 break;
555         default:
556                 ret = -ENXIO;
557                 break;
558         }
559         return ret;
560 }
561
562 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
563 {
564         u8 gtod_high = 0;
565
566         if (copy_to_user((void __user *)attr->addr, &gtod_high,
567                                          sizeof(gtod_high)))
568                 return -EFAULT;
569         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
570
571         return 0;
572 }
573
574 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
575 {
576         u64 gtod;
577
578         gtod = kvm_s390_get_tod_clock_fast(kvm);
579         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
580                 return -EFAULT;
581         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
582
583         return 0;
584 }
585
586 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
587 {
588         int ret;
589
590         if (attr->flags)
591                 return -EINVAL;
592
593         switch (attr->attr) {
594         case KVM_S390_VM_TOD_HIGH:
595                 ret = kvm_s390_get_tod_high(kvm, attr);
596                 break;
597         case KVM_S390_VM_TOD_LOW:
598                 ret = kvm_s390_get_tod_low(kvm, attr);
599                 break;
600         default:
601                 ret = -ENXIO;
602                 break;
603         }
604         return ret;
605 }
606
607 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
608 {
609         struct kvm_s390_vm_cpu_processor *proc;
610         int ret = 0;
611
612         mutex_lock(&kvm->lock);
613         if (atomic_read(&kvm->online_vcpus)) {
614                 ret = -EBUSY;
615                 goto out;
616         }
617         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
618         if (!proc) {
619                 ret = -ENOMEM;
620                 goto out;
621         }
622         if (!copy_from_user(proc, (void __user *)attr->addr,
623                             sizeof(*proc))) {
624                 memcpy(&kvm->arch.model.cpu_id, &proc->cpuid,
625                        sizeof(struct cpuid));
626                 kvm->arch.model.ibc = proc->ibc;
627                 memcpy(kvm->arch.model.fac->list, proc->fac_list,
628                        S390_ARCH_FAC_LIST_SIZE_BYTE);
629         } else
630                 ret = -EFAULT;
631         kfree(proc);
632 out:
633         mutex_unlock(&kvm->lock);
634         return ret;
635 }
636
637 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
638 {
639         int ret = -ENXIO;
640
641         switch (attr->attr) {
642         case KVM_S390_VM_CPU_PROCESSOR:
643                 ret = kvm_s390_set_processor(kvm, attr);
644                 break;
645         }
646         return ret;
647 }
648
649 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
650 {
651         struct kvm_s390_vm_cpu_processor *proc;
652         int ret = 0;
653
654         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
655         if (!proc) {
656                 ret = -ENOMEM;
657                 goto out;
658         }
659         memcpy(&proc->cpuid, &kvm->arch.model.cpu_id, sizeof(struct cpuid));
660         proc->ibc = kvm->arch.model.ibc;
661         memcpy(&proc->fac_list, kvm->arch.model.fac->list, S390_ARCH_FAC_LIST_SIZE_BYTE);
662         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
663                 ret = -EFAULT;
664         kfree(proc);
665 out:
666         return ret;
667 }
668
669 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
670 {
671         struct kvm_s390_vm_cpu_machine *mach;
672         int ret = 0;
673
674         mach = kzalloc(sizeof(*mach), GFP_KERNEL);
675         if (!mach) {
676                 ret = -ENOMEM;
677                 goto out;
678         }
679         get_cpu_id((struct cpuid *) &mach->cpuid);
680         mach->ibc = sclp.ibc;
681         memcpy(&mach->fac_mask, kvm->arch.model.fac->mask,
682                S390_ARCH_FAC_LIST_SIZE_BYTE);
683         memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
684                S390_ARCH_FAC_LIST_SIZE_BYTE);
685         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
686                 ret = -EFAULT;
687         kfree(mach);
688 out:
689         return ret;
690 }
691
692 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
693 {
694         int ret = -ENXIO;
695
696         switch (attr->attr) {
697         case KVM_S390_VM_CPU_PROCESSOR:
698                 ret = kvm_s390_get_processor(kvm, attr);
699                 break;
700         case KVM_S390_VM_CPU_MACHINE:
701                 ret = kvm_s390_get_machine(kvm, attr);
702                 break;
703         }
704         return ret;
705 }
706
707 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
708 {
709         int ret;
710
711         switch (attr->group) {
712         case KVM_S390_VM_MEM_CTRL:
713                 ret = kvm_s390_set_mem_control(kvm, attr);
714                 break;
715         case KVM_S390_VM_TOD:
716                 ret = kvm_s390_set_tod(kvm, attr);
717                 break;
718         case KVM_S390_VM_CPU_MODEL:
719                 ret = kvm_s390_set_cpu_model(kvm, attr);
720                 break;
721         case KVM_S390_VM_CRYPTO:
722                 ret = kvm_s390_vm_set_crypto(kvm, attr);
723                 break;
724         default:
725                 ret = -ENXIO;
726                 break;
727         }
728
729         return ret;
730 }
731
732 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
733 {
734         int ret;
735
736         switch (attr->group) {
737         case KVM_S390_VM_MEM_CTRL:
738                 ret = kvm_s390_get_mem_control(kvm, attr);
739                 break;
740         case KVM_S390_VM_TOD:
741                 ret = kvm_s390_get_tod(kvm, attr);
742                 break;
743         case KVM_S390_VM_CPU_MODEL:
744                 ret = kvm_s390_get_cpu_model(kvm, attr);
745                 break;
746         default:
747                 ret = -ENXIO;
748                 break;
749         }
750
751         return ret;
752 }
753
754 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
755 {
756         int ret;
757
758         switch (attr->group) {
759         case KVM_S390_VM_MEM_CTRL:
760                 switch (attr->attr) {
761                 case KVM_S390_VM_MEM_ENABLE_CMMA:
762                 case KVM_S390_VM_MEM_CLR_CMMA:
763                 case KVM_S390_VM_MEM_LIMIT_SIZE:
764                         ret = 0;
765                         break;
766                 default:
767                         ret = -ENXIO;
768                         break;
769                 }
770                 break;
771         case KVM_S390_VM_TOD:
772                 switch (attr->attr) {
773                 case KVM_S390_VM_TOD_LOW:
774                 case KVM_S390_VM_TOD_HIGH:
775                         ret = 0;
776                         break;
777                 default:
778                         ret = -ENXIO;
779                         break;
780                 }
781                 break;
782         case KVM_S390_VM_CPU_MODEL:
783                 switch (attr->attr) {
784                 case KVM_S390_VM_CPU_PROCESSOR:
785                 case KVM_S390_VM_CPU_MACHINE:
786                         ret = 0;
787                         break;
788                 default:
789                         ret = -ENXIO;
790                         break;
791                 }
792                 break;
793         case KVM_S390_VM_CRYPTO:
794                 switch (attr->attr) {
795                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
796                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
797                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
798                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
799                         ret = 0;
800                         break;
801                 default:
802                         ret = -ENXIO;
803                         break;
804                 }
805                 break;
806         default:
807                 ret = -ENXIO;
808                 break;
809         }
810
811         return ret;
812 }
813
814 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
815 {
816         uint8_t *keys;
817         uint64_t hva;
818         unsigned long curkey;
819         int i, r = 0;
820
821         if (args->flags != 0)
822                 return -EINVAL;
823
824         /* Is this guest using storage keys? */
825         if (!mm_use_skey(current->mm))
826                 return KVM_S390_GET_SKEYS_NONE;
827
828         /* Enforce sane limit on memory allocation */
829         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
830                 return -EINVAL;
831
832         keys = kmalloc_array(args->count, sizeof(uint8_t),
833                              GFP_KERNEL | __GFP_NOWARN);
834         if (!keys)
835                 keys = vmalloc(sizeof(uint8_t) * args->count);
836         if (!keys)
837                 return -ENOMEM;
838
839         for (i = 0; i < args->count; i++) {
840                 hva = gfn_to_hva(kvm, args->start_gfn + i);
841                 if (kvm_is_error_hva(hva)) {
842                         r = -EFAULT;
843                         goto out;
844                 }
845
846                 curkey = get_guest_storage_key(current->mm, hva);
847                 if (IS_ERR_VALUE(curkey)) {
848                         r = curkey;
849                         goto out;
850                 }
851                 keys[i] = curkey;
852         }
853
854         r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
855                          sizeof(uint8_t) * args->count);
856         if (r)
857                 r = -EFAULT;
858 out:
859         kvfree(keys);
860         return r;
861 }
862
863 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
864 {
865         uint8_t *keys;
866         uint64_t hva;
867         int i, r = 0;
868
869         if (args->flags != 0)
870                 return -EINVAL;
871
872         /* Enforce sane limit on memory allocation */
873         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
874                 return -EINVAL;
875
876         keys = kmalloc_array(args->count, sizeof(uint8_t),
877                              GFP_KERNEL | __GFP_NOWARN);
878         if (!keys)
879                 keys = vmalloc(sizeof(uint8_t) * args->count);
880         if (!keys)
881                 return -ENOMEM;
882
883         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
884                            sizeof(uint8_t) * args->count);
885         if (r) {
886                 r = -EFAULT;
887                 goto out;
888         }
889
890         /* Enable storage key handling for the guest */
891         r = s390_enable_skey();
892         if (r)
893                 goto out;
894
895         for (i = 0; i < args->count; i++) {
896                 hva = gfn_to_hva(kvm, args->start_gfn + i);
897                 if (kvm_is_error_hva(hva)) {
898                         r = -EFAULT;
899                         goto out;
900                 }
901
902                 /* Lowest order bit is reserved */
903                 if (keys[i] & 0x01) {
904                         r = -EINVAL;
905                         goto out;
906                 }
907
908                 r = set_guest_storage_key(current->mm, hva,
909                                           (unsigned long)keys[i], 0);
910                 if (r)
911                         goto out;
912         }
913 out:
914         kvfree(keys);
915         return r;
916 }
917
918 long kvm_arch_vm_ioctl(struct file *filp,
919                        unsigned int ioctl, unsigned long arg)
920 {
921         struct kvm *kvm = filp->private_data;
922         void __user *argp = (void __user *)arg;
923         struct kvm_device_attr attr;
924         int r;
925
926         switch (ioctl) {
927         case KVM_S390_INTERRUPT: {
928                 struct kvm_s390_interrupt s390int;
929
930                 r = -EFAULT;
931                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
932                         break;
933                 r = kvm_s390_inject_vm(kvm, &s390int);
934                 break;
935         }
936         case KVM_ENABLE_CAP: {
937                 struct kvm_enable_cap cap;
938                 r = -EFAULT;
939                 if (copy_from_user(&cap, argp, sizeof(cap)))
940                         break;
941                 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
942                 break;
943         }
944         case KVM_CREATE_IRQCHIP: {
945                 struct kvm_irq_routing_entry routing;
946
947                 r = -EINVAL;
948                 if (kvm->arch.use_irqchip) {
949                         /* Set up dummy routing. */
950                         memset(&routing, 0, sizeof(routing));
951                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
952                 }
953                 break;
954         }
955         case KVM_SET_DEVICE_ATTR: {
956                 r = -EFAULT;
957                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
958                         break;
959                 r = kvm_s390_vm_set_attr(kvm, &attr);
960                 break;
961         }
962         case KVM_GET_DEVICE_ATTR: {
963                 r = -EFAULT;
964                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
965                         break;
966                 r = kvm_s390_vm_get_attr(kvm, &attr);
967                 break;
968         }
969         case KVM_HAS_DEVICE_ATTR: {
970                 r = -EFAULT;
971                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
972                         break;
973                 r = kvm_s390_vm_has_attr(kvm, &attr);
974                 break;
975         }
976         case KVM_S390_GET_SKEYS: {
977                 struct kvm_s390_skeys args;
978
979                 r = -EFAULT;
980                 if (copy_from_user(&args, argp,
981                                    sizeof(struct kvm_s390_skeys)))
982                         break;
983                 r = kvm_s390_get_skeys(kvm, &args);
984                 break;
985         }
986         case KVM_S390_SET_SKEYS: {
987                 struct kvm_s390_skeys args;
988
989                 r = -EFAULT;
990                 if (copy_from_user(&args, argp,
991                                    sizeof(struct kvm_s390_skeys)))
992                         break;
993                 r = kvm_s390_set_skeys(kvm, &args);
994                 break;
995         }
996         default:
997                 r = -ENOTTY;
998         }
999
1000         return r;
1001 }
1002
1003 static int kvm_s390_query_ap_config(u8 *config)
1004 {
1005         u32 fcn_code = 0x04000000UL;
1006         u32 cc = 0;
1007
1008         memset(config, 0, 128);
1009         asm volatile(
1010                 "lgr 0,%1\n"
1011                 "lgr 2,%2\n"
1012                 ".long 0xb2af0000\n"            /* PQAP(QCI) */
1013                 "0: ipm %0\n"
1014                 "srl %0,28\n"
1015                 "1:\n"
1016                 EX_TABLE(0b, 1b)
1017                 : "+r" (cc)
1018                 : "r" (fcn_code), "r" (config)
1019                 : "cc", "0", "2", "memory"
1020         );
1021
1022         return cc;
1023 }
1024
1025 static int kvm_s390_apxa_installed(void)
1026 {
1027         u8 config[128];
1028         int cc;
1029
1030         if (test_facility(2) && test_facility(12)) {
1031                 cc = kvm_s390_query_ap_config(config);
1032
1033                 if (cc)
1034                         pr_err("PQAP(QCI) failed with cc=%d", cc);
1035                 else
1036                         return config[0] & 0x40;
1037         }
1038
1039         return 0;
1040 }
1041
1042 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1043 {
1044         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1045
1046         if (kvm_s390_apxa_installed())
1047                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1048         else
1049                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1050 }
1051
1052 static void kvm_s390_get_cpu_id(struct cpuid *cpu_id)
1053 {
1054         get_cpu_id(cpu_id);
1055         cpu_id->version = 0xff;
1056 }
1057
1058 static int kvm_s390_crypto_init(struct kvm *kvm)
1059 {
1060         if (!test_kvm_facility(kvm, 76))
1061                 return 0;
1062
1063         kvm->arch.crypto.crycb = kzalloc(sizeof(*kvm->arch.crypto.crycb),
1064                                          GFP_KERNEL | GFP_DMA);
1065         if (!kvm->arch.crypto.crycb)
1066                 return -ENOMEM;
1067
1068         kvm_s390_set_crycb_format(kvm);
1069
1070         /* Enable AES/DEA protected key functions by default */
1071         kvm->arch.crypto.aes_kw = 1;
1072         kvm->arch.crypto.dea_kw = 1;
1073         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1074                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1075         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1076                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1077
1078         return 0;
1079 }
1080
1081 static void sca_dispose(struct kvm *kvm)
1082 {
1083         if (kvm->arch.use_esca)
1084                 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1085         else
1086                 free_page((unsigned long)(kvm->arch.sca));
1087         kvm->arch.sca = NULL;
1088 }
1089
1090 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1091 {
1092         int i, rc;
1093         char debug_name[16];
1094         static unsigned long sca_offset;
1095
1096         rc = -EINVAL;
1097 #ifdef CONFIG_KVM_S390_UCONTROL
1098         if (type & ~KVM_VM_S390_UCONTROL)
1099                 goto out_err;
1100         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1101                 goto out_err;
1102 #else
1103         if (type)
1104                 goto out_err;
1105 #endif
1106
1107         rc = s390_enable_sie();
1108         if (rc)
1109                 goto out_err;
1110
1111         rc = -ENOMEM;
1112
1113         kvm->arch.use_esca = 0; /* start with basic SCA */
1114         rwlock_init(&kvm->arch.sca_lock);
1115         kvm->arch.sca = (struct bsca_block *) get_zeroed_page(GFP_KERNEL);
1116         if (!kvm->arch.sca)
1117                 goto out_err;
1118         spin_lock(&kvm_lock);
1119         sca_offset += 16;
1120         if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1121                 sca_offset = 0;
1122         kvm->arch.sca = (struct bsca_block *)
1123                         ((char *) kvm->arch.sca + sca_offset);
1124         spin_unlock(&kvm_lock);
1125
1126         sprintf(debug_name, "kvm-%u", current->pid);
1127
1128         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1129         if (!kvm->arch.dbf)
1130                 goto out_err;
1131
1132         /*
1133          * The architectural maximum amount of facilities is 16 kbit. To store
1134          * this amount, 2 kbyte of memory is required. Thus we need a full
1135          * page to hold the guest facility list (arch.model.fac->list) and the
1136          * facility mask (arch.model.fac->mask). Its address size has to be
1137          * 31 bits and word aligned.
1138          */
1139         kvm->arch.model.fac =
1140                 (struct kvm_s390_fac *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1141         if (!kvm->arch.model.fac)
1142                 goto out_err;
1143
1144         /* Populate the facility mask initially. */
1145         memcpy(kvm->arch.model.fac->mask, S390_lowcore.stfle_fac_list,
1146                S390_ARCH_FAC_LIST_SIZE_BYTE);
1147         for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1148                 if (i < kvm_s390_fac_list_mask_size())
1149                         kvm->arch.model.fac->mask[i] &= kvm_s390_fac_list_mask[i];
1150                 else
1151                         kvm->arch.model.fac->mask[i] = 0UL;
1152         }
1153
1154         /* Populate the facility list initially. */
1155         memcpy(kvm->arch.model.fac->list, kvm->arch.model.fac->mask,
1156                S390_ARCH_FAC_LIST_SIZE_BYTE);
1157
1158         kvm_s390_get_cpu_id(&kvm->arch.model.cpu_id);
1159         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1160
1161         if (kvm_s390_crypto_init(kvm) < 0)
1162                 goto out_err;
1163
1164         spin_lock_init(&kvm->arch.float_int.lock);
1165         for (i = 0; i < FIRQ_LIST_COUNT; i++)
1166                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1167         init_waitqueue_head(&kvm->arch.ipte_wq);
1168         mutex_init(&kvm->arch.ipte_mutex);
1169
1170         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1171         VM_EVENT(kvm, 3, "vm created with type %lu", type);
1172
1173         if (type & KVM_VM_S390_UCONTROL) {
1174                 kvm->arch.gmap = NULL;
1175         } else {
1176                 kvm->arch.gmap = gmap_alloc(current->mm, (1UL << 44) - 1);
1177                 if (!kvm->arch.gmap)
1178                         goto out_err;
1179                 kvm->arch.gmap->private = kvm;
1180                 kvm->arch.gmap->pfault_enabled = 0;
1181         }
1182
1183         kvm->arch.css_support = 0;
1184         kvm->arch.use_irqchip = 0;
1185         kvm->arch.epoch = 0;
1186
1187         spin_lock_init(&kvm->arch.start_stop_lock);
1188         KVM_EVENT(3, "vm 0x%p created by pid %u", kvm, current->pid);
1189
1190         return 0;
1191 out_err:
1192         kfree(kvm->arch.crypto.crycb);
1193         free_page((unsigned long)kvm->arch.model.fac);
1194         debug_unregister(kvm->arch.dbf);
1195         sca_dispose(kvm);
1196         KVM_EVENT(3, "creation of vm failed: %d", rc);
1197         return rc;
1198 }
1199
1200 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1201 {
1202         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1203         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1204         kvm_s390_clear_local_irqs(vcpu);
1205         kvm_clear_async_pf_completion_queue(vcpu);
1206         if (!kvm_is_ucontrol(vcpu->kvm))
1207                 sca_del_vcpu(vcpu);
1208         smp_mb();
1209
1210         if (kvm_is_ucontrol(vcpu->kvm))
1211                 gmap_free(vcpu->arch.gmap);
1212
1213         if (vcpu->kvm->arch.use_cmma)
1214                 kvm_s390_vcpu_unsetup_cmma(vcpu);
1215         free_page((unsigned long)(vcpu->arch.sie_block));
1216
1217         kvm_vcpu_uninit(vcpu);
1218         kmem_cache_free(kvm_vcpu_cache, vcpu);
1219 }
1220
1221 static void kvm_free_vcpus(struct kvm *kvm)
1222 {
1223         unsigned int i;
1224         struct kvm_vcpu *vcpu;
1225
1226         kvm_for_each_vcpu(i, vcpu, kvm)
1227                 kvm_arch_vcpu_destroy(vcpu);
1228
1229         mutex_lock(&kvm->lock);
1230         for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1231                 kvm->vcpus[i] = NULL;
1232
1233         atomic_set(&kvm->online_vcpus, 0);
1234         mutex_unlock(&kvm->lock);
1235 }
1236
1237 void kvm_arch_destroy_vm(struct kvm *kvm)
1238 {
1239         kvm_free_vcpus(kvm);
1240         free_page((unsigned long)kvm->arch.model.fac);
1241         sca_dispose(kvm);
1242         debug_unregister(kvm->arch.dbf);
1243         kfree(kvm->arch.crypto.crycb);
1244         if (!kvm_is_ucontrol(kvm))
1245                 gmap_free(kvm->arch.gmap);
1246         kvm_s390_destroy_adapters(kvm);
1247         kvm_s390_clear_float_irqs(kvm);
1248         KVM_EVENT(3, "vm 0x%p destroyed", kvm);
1249 }
1250
1251 /* Section: vcpu related */
1252 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1253 {
1254         vcpu->arch.gmap = gmap_alloc(current->mm, -1UL);
1255         if (!vcpu->arch.gmap)
1256                 return -ENOMEM;
1257         vcpu->arch.gmap->private = vcpu->kvm;
1258
1259         return 0;
1260 }
1261
1262 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
1263 {
1264         read_lock(&vcpu->kvm->arch.sca_lock);
1265         if (vcpu->kvm->arch.use_esca) {
1266                 struct esca_block *sca = vcpu->kvm->arch.sca;
1267
1268                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1269                 sca->cpu[vcpu->vcpu_id].sda = 0;
1270         } else {
1271                 struct bsca_block *sca = vcpu->kvm->arch.sca;
1272
1273                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1274                 sca->cpu[vcpu->vcpu_id].sda = 0;
1275         }
1276         read_unlock(&vcpu->kvm->arch.sca_lock);
1277 }
1278
1279 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
1280 {
1281         read_lock(&vcpu->kvm->arch.sca_lock);
1282         if (vcpu->kvm->arch.use_esca) {
1283                 struct esca_block *sca = vcpu->kvm->arch.sca;
1284
1285                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1286                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1287                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
1288                 vcpu->arch.sie_block->ecb2 |= 0x04U;
1289                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1290         } else {
1291                 struct bsca_block *sca = vcpu->kvm->arch.sca;
1292
1293                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1294                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1295                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
1296                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1297         }
1298         read_unlock(&vcpu->kvm->arch.sca_lock);
1299 }
1300
1301 /* Basic SCA to Extended SCA data copy routines */
1302 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
1303 {
1304         d->sda = s->sda;
1305         d->sigp_ctrl.c = s->sigp_ctrl.c;
1306         d->sigp_ctrl.scn = s->sigp_ctrl.scn;
1307 }
1308
1309 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
1310 {
1311         int i;
1312
1313         d->ipte_control = s->ipte_control;
1314         d->mcn[0] = s->mcn;
1315         for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
1316                 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
1317 }
1318
1319 static int sca_switch_to_extended(struct kvm *kvm)
1320 {
1321         struct bsca_block *old_sca = kvm->arch.sca;
1322         struct esca_block *new_sca;
1323         struct kvm_vcpu *vcpu;
1324         unsigned int vcpu_idx;
1325         u32 scaol, scaoh;
1326
1327         new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
1328         if (!new_sca)
1329                 return -ENOMEM;
1330
1331         scaoh = (u32)((u64)(new_sca) >> 32);
1332         scaol = (u32)(u64)(new_sca) & ~0x3fU;
1333
1334         kvm_s390_vcpu_block_all(kvm);
1335         write_lock(&kvm->arch.sca_lock);
1336
1337         sca_copy_b_to_e(new_sca, old_sca);
1338
1339         kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
1340                 vcpu->arch.sie_block->scaoh = scaoh;
1341                 vcpu->arch.sie_block->scaol = scaol;
1342                 vcpu->arch.sie_block->ecb2 |= 0x04U;
1343         }
1344         kvm->arch.sca = new_sca;
1345         kvm->arch.use_esca = 1;
1346
1347         write_unlock(&kvm->arch.sca_lock);
1348         kvm_s390_vcpu_unblock_all(kvm);
1349
1350         free_page((unsigned long)old_sca);
1351
1352         VM_EVENT(kvm, 2, "Switched to ESCA (%p -> %p)", old_sca, kvm->arch.sca);
1353         return 0;
1354 }
1355
1356 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
1357 {
1358         int rc;
1359
1360         if (id < KVM_S390_BSCA_CPU_SLOTS)
1361                 return true;
1362         if (!sclp.has_esca)
1363                 return false;
1364
1365         mutex_lock(&kvm->lock);
1366         rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
1367         mutex_unlock(&kvm->lock);
1368
1369         return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
1370 }
1371
1372 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1373 {
1374         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1375         kvm_clear_async_pf_completion_queue(vcpu);
1376         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
1377                                     KVM_SYNC_GPRS |
1378                                     KVM_SYNC_ACRS |
1379                                     KVM_SYNC_CRS |
1380                                     KVM_SYNC_ARCH0 |
1381                                     KVM_SYNC_PFAULT;
1382         if (test_kvm_facility(vcpu->kvm, 129))
1383                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
1384
1385         if (kvm_is_ucontrol(vcpu->kvm))
1386                 return __kvm_ucontrol_vcpu_init(vcpu);
1387
1388         return 0;
1389 }
1390
1391 /*
1392  * Backs up the current FP/VX register save area on a particular
1393  * destination.  Used to switch between different register save
1394  * areas.
1395  */
1396 static inline void save_fpu_to(struct fpu *dst)
1397 {
1398         dst->fpc = current->thread.fpu.fpc;
1399         dst->regs = current->thread.fpu.regs;
1400 }
1401
1402 /*
1403  * Switches the FP/VX register save area from which to lazy
1404  * restore register contents.
1405  */
1406 static inline void load_fpu_from(struct fpu *from)
1407 {
1408         current->thread.fpu.fpc = from->fpc;
1409         current->thread.fpu.regs = from->regs;
1410 }
1411
1412 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1413 {
1414         /* Save host register state */
1415         save_fpu_regs();
1416         save_fpu_to(&vcpu->arch.host_fpregs);
1417
1418         if (test_kvm_facility(vcpu->kvm, 129)) {
1419                 current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
1420                 /*
1421                  * Use the register save area in the SIE-control block
1422                  * for register restore and save in kvm_arch_vcpu_put()
1423                  */
1424                 current->thread.fpu.vxrs =
1425                         (__vector128 *)&vcpu->run->s.regs.vrs;
1426         } else
1427                 load_fpu_from(&vcpu->arch.guest_fpregs);
1428
1429         if (test_fp_ctl(current->thread.fpu.fpc))
1430                 /* User space provided an invalid FPC, let's clear it */
1431                 current->thread.fpu.fpc = 0;
1432
1433         save_access_regs(vcpu->arch.host_acrs);
1434         restore_access_regs(vcpu->run->s.regs.acrs);
1435         gmap_enable(vcpu->arch.gmap);
1436         atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1437 }
1438
1439 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1440 {
1441         atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1442         gmap_disable(vcpu->arch.gmap);
1443
1444         save_fpu_regs();
1445
1446         if (test_kvm_facility(vcpu->kvm, 129))
1447                 /*
1448                  * kvm_arch_vcpu_load() set up the register save area to
1449                  * the &vcpu->run->s.regs.vrs and, thus, the vector registers
1450                  * are already saved.  Only the floating-point control must be
1451                  * copied.
1452                  */
1453                 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
1454         else
1455                 save_fpu_to(&vcpu->arch.guest_fpregs);
1456         load_fpu_from(&vcpu->arch.host_fpregs);
1457
1458         save_access_regs(vcpu->run->s.regs.acrs);
1459         restore_access_regs(vcpu->arch.host_acrs);
1460 }
1461
1462 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
1463 {
1464         /* this equals initial cpu reset in pop, but we don't switch to ESA */
1465         vcpu->arch.sie_block->gpsw.mask = 0UL;
1466         vcpu->arch.sie_block->gpsw.addr = 0UL;
1467         kvm_s390_set_prefix(vcpu, 0);
1468         vcpu->arch.sie_block->cputm     = 0UL;
1469         vcpu->arch.sie_block->ckc       = 0UL;
1470         vcpu->arch.sie_block->todpr     = 0;
1471         memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
1472         vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
1473         vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
1474         vcpu->arch.guest_fpregs.fpc = 0;
1475         asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc));
1476         vcpu->arch.sie_block->gbea = 1;
1477         vcpu->arch.sie_block->pp = 0;
1478         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1479         kvm_clear_async_pf_completion_queue(vcpu);
1480         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
1481                 kvm_s390_vcpu_stop(vcpu);
1482         kvm_s390_clear_local_irqs(vcpu);
1483 }
1484
1485 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
1486 {
1487         mutex_lock(&vcpu->kvm->lock);
1488         preempt_disable();
1489         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
1490         preempt_enable();
1491         mutex_unlock(&vcpu->kvm->lock);
1492         if (!kvm_is_ucontrol(vcpu->kvm)) {
1493                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
1494                 sca_add_vcpu(vcpu);
1495         }
1496
1497 }
1498
1499 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
1500 {
1501         if (!test_kvm_facility(vcpu->kvm, 76))
1502                 return;
1503
1504         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
1505
1506         if (vcpu->kvm->arch.crypto.aes_kw)
1507                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
1508         if (vcpu->kvm->arch.crypto.dea_kw)
1509                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
1510
1511         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
1512 }
1513
1514 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
1515 {
1516         free_page(vcpu->arch.sie_block->cbrlo);
1517         vcpu->arch.sie_block->cbrlo = 0;
1518 }
1519
1520 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
1521 {
1522         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
1523         if (!vcpu->arch.sie_block->cbrlo)
1524                 return -ENOMEM;
1525
1526         vcpu->arch.sie_block->ecb2 |= 0x80;
1527         vcpu->arch.sie_block->ecb2 &= ~0x08;
1528         return 0;
1529 }
1530
1531 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
1532 {
1533         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
1534
1535         vcpu->arch.cpu_id = model->cpu_id;
1536         vcpu->arch.sie_block->ibc = model->ibc;
1537         vcpu->arch.sie_block->fac = (int) (long) model->fac->list;
1538 }
1539
1540 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1541 {
1542         int rc = 0;
1543
1544         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
1545                                                     CPUSTAT_SM |
1546                                                     CPUSTAT_STOPPED);
1547
1548         if (test_kvm_facility(vcpu->kvm, 78))
1549                 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
1550         else if (test_kvm_facility(vcpu->kvm, 8))
1551                 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
1552
1553         kvm_s390_vcpu_setup_model(vcpu);
1554
1555         vcpu->arch.sie_block->ecb   = 6;
1556         if (test_kvm_facility(vcpu->kvm, 50) && test_kvm_facility(vcpu->kvm, 73))
1557                 vcpu->arch.sie_block->ecb |= 0x10;
1558
1559         vcpu->arch.sie_block->ecb2  = 8;
1560         vcpu->arch.sie_block->eca   = 0xC1002000U;
1561         if (sclp.has_siif)
1562                 vcpu->arch.sie_block->eca |= 1;
1563         if (sclp.has_sigpif)
1564                 vcpu->arch.sie_block->eca |= 0x10000000U;
1565         if (test_kvm_facility(vcpu->kvm, 129)) {
1566                 vcpu->arch.sie_block->eca |= 0x00020000;
1567                 vcpu->arch.sie_block->ecd |= 0x20000000;
1568         }
1569         vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
1570
1571         if (vcpu->kvm->arch.use_cmma) {
1572                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
1573                 if (rc)
1574                         return rc;
1575         }
1576         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1577         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
1578
1579         kvm_s390_vcpu_crypto_setup(vcpu);
1580
1581         return rc;
1582 }
1583
1584 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
1585                                       unsigned int id)
1586 {
1587         struct kvm_vcpu *vcpu;
1588         struct sie_page *sie_page;
1589         int rc = -EINVAL;
1590
1591         if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
1592                 goto out;
1593
1594         rc = -ENOMEM;
1595
1596         vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
1597         if (!vcpu)
1598                 goto out;
1599
1600         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
1601         if (!sie_page)
1602                 goto out_free_cpu;
1603
1604         vcpu->arch.sie_block = &sie_page->sie_block;
1605         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
1606
1607         vcpu->arch.sie_block->icpua = id;
1608         spin_lock_init(&vcpu->arch.local_int.lock);
1609         vcpu->arch.local_int.float_int = &kvm->arch.float_int;
1610         vcpu->arch.local_int.wq = &vcpu->wq;
1611         vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
1612
1613         /*
1614          * Allocate a save area for floating-point registers.  If the vector
1615          * extension is available, register contents are saved in the SIE
1616          * control block.  The allocated save area is still required in
1617          * particular places, for example, in kvm_s390_vcpu_store_status().
1618          */
1619         vcpu->arch.guest_fpregs.fprs = kzalloc(sizeof(freg_t) * __NUM_FPRS,
1620                                                GFP_KERNEL);
1621         if (!vcpu->arch.guest_fpregs.fprs) {
1622                 rc = -ENOMEM;
1623                 goto out_free_sie_block;
1624         }
1625
1626         rc = kvm_vcpu_init(vcpu, kvm, id);
1627         if (rc)
1628                 goto out_free_sie_block;
1629         VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu,
1630                  vcpu->arch.sie_block);
1631         trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
1632
1633         return vcpu;
1634 out_free_sie_block:
1635         free_page((unsigned long)(vcpu->arch.sie_block));
1636 out_free_cpu:
1637         kmem_cache_free(kvm_vcpu_cache, vcpu);
1638 out:
1639         return ERR_PTR(rc);
1640 }
1641
1642 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
1643 {
1644         return kvm_s390_vcpu_has_irq(vcpu, 0);
1645 }
1646
1647 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
1648 {
1649         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1650         exit_sie(vcpu);
1651 }
1652
1653 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
1654 {
1655         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1656 }
1657
1658 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
1659 {
1660         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
1661         exit_sie(vcpu);
1662 }
1663
1664 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
1665 {
1666         atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
1667 }
1668
1669 /*
1670  * Kick a guest cpu out of SIE and wait until SIE is not running.
1671  * If the CPU is not running (e.g. waiting as idle) the function will
1672  * return immediately. */
1673 void exit_sie(struct kvm_vcpu *vcpu)
1674 {
1675         atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
1676         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
1677                 cpu_relax();
1678 }
1679
1680 /* Kick a guest cpu out of SIE to process a request synchronously */
1681 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
1682 {
1683         kvm_make_request(req, vcpu);
1684         kvm_s390_vcpu_request(vcpu);
1685 }
1686
1687 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address)
1688 {
1689         int i;
1690         struct kvm *kvm = gmap->private;
1691         struct kvm_vcpu *vcpu;
1692
1693         kvm_for_each_vcpu(i, vcpu, kvm) {
1694                 /* match against both prefix pages */
1695                 if (kvm_s390_get_prefix(vcpu) == (address & ~0x1000UL)) {
1696                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx", address);
1697                         kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
1698                 }
1699         }
1700 }
1701
1702 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
1703 {
1704         /* kvm common code refers to this, but never calls it */
1705         BUG();
1706         return 0;
1707 }
1708
1709 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
1710                                            struct kvm_one_reg *reg)
1711 {
1712         int r = -EINVAL;
1713
1714         switch (reg->id) {
1715         case KVM_REG_S390_TODPR:
1716                 r = put_user(vcpu->arch.sie_block->todpr,
1717                              (u32 __user *)reg->addr);
1718                 break;
1719         case KVM_REG_S390_EPOCHDIFF:
1720                 r = put_user(vcpu->arch.sie_block->epoch,
1721                              (u64 __user *)reg->addr);
1722                 break;
1723         case KVM_REG_S390_CPU_TIMER:
1724                 r = put_user(vcpu->arch.sie_block->cputm,
1725                              (u64 __user *)reg->addr);
1726                 break;
1727         case KVM_REG_S390_CLOCK_COMP:
1728                 r = put_user(vcpu->arch.sie_block->ckc,
1729                              (u64 __user *)reg->addr);
1730                 break;
1731         case KVM_REG_S390_PFTOKEN:
1732                 r = put_user(vcpu->arch.pfault_token,
1733                              (u64 __user *)reg->addr);
1734                 break;
1735         case KVM_REG_S390_PFCOMPARE:
1736                 r = put_user(vcpu->arch.pfault_compare,
1737                              (u64 __user *)reg->addr);
1738                 break;
1739         case KVM_REG_S390_PFSELECT:
1740                 r = put_user(vcpu->arch.pfault_select,
1741                              (u64 __user *)reg->addr);
1742                 break;
1743         case KVM_REG_S390_PP:
1744                 r = put_user(vcpu->arch.sie_block->pp,
1745                              (u64 __user *)reg->addr);
1746                 break;
1747         case KVM_REG_S390_GBEA:
1748                 r = put_user(vcpu->arch.sie_block->gbea,
1749                              (u64 __user *)reg->addr);
1750                 break;
1751         default:
1752                 break;
1753         }
1754
1755         return r;
1756 }
1757
1758 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
1759                                            struct kvm_one_reg *reg)
1760 {
1761         int r = -EINVAL;
1762
1763         switch (reg->id) {
1764         case KVM_REG_S390_TODPR:
1765                 r = get_user(vcpu->arch.sie_block->todpr,
1766                              (u32 __user *)reg->addr);
1767                 break;
1768         case KVM_REG_S390_EPOCHDIFF:
1769                 r = get_user(vcpu->arch.sie_block->epoch,
1770                              (u64 __user *)reg->addr);
1771                 break;
1772         case KVM_REG_S390_CPU_TIMER:
1773                 r = get_user(vcpu->arch.sie_block->cputm,
1774                              (u64 __user *)reg->addr);
1775                 break;
1776         case KVM_REG_S390_CLOCK_COMP:
1777                 r = get_user(vcpu->arch.sie_block->ckc,
1778                              (u64 __user *)reg->addr);
1779                 break;
1780         case KVM_REG_S390_PFTOKEN:
1781                 r = get_user(vcpu->arch.pfault_token,
1782                              (u64 __user *)reg->addr);
1783                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
1784                         kvm_clear_async_pf_completion_queue(vcpu);
1785                 break;
1786         case KVM_REG_S390_PFCOMPARE:
1787                 r = get_user(vcpu->arch.pfault_compare,
1788                              (u64 __user *)reg->addr);
1789                 break;
1790         case KVM_REG_S390_PFSELECT:
1791                 r = get_user(vcpu->arch.pfault_select,
1792                              (u64 __user *)reg->addr);
1793                 break;
1794         case KVM_REG_S390_PP:
1795                 r = get_user(vcpu->arch.sie_block->pp,
1796                              (u64 __user *)reg->addr);
1797                 break;
1798         case KVM_REG_S390_GBEA:
1799                 r = get_user(vcpu->arch.sie_block->gbea,
1800                              (u64 __user *)reg->addr);
1801                 break;
1802         default:
1803                 break;
1804         }
1805
1806         return r;
1807 }
1808
1809 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
1810 {
1811         kvm_s390_vcpu_initial_reset(vcpu);
1812         return 0;
1813 }
1814
1815 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1816 {
1817         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
1818         return 0;
1819 }
1820
1821 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1822 {
1823         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
1824         return 0;
1825 }
1826
1827 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
1828                                   struct kvm_sregs *sregs)
1829 {
1830         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
1831         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
1832         restore_access_regs(vcpu->run->s.regs.acrs);
1833         return 0;
1834 }
1835
1836 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
1837                                   struct kvm_sregs *sregs)
1838 {
1839         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
1840         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
1841         return 0;
1842 }
1843
1844 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1845 {
1846         if (test_fp_ctl(fpu->fpc))
1847                 return -EINVAL;
1848         memcpy(vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs));
1849         vcpu->arch.guest_fpregs.fpc = fpu->fpc;
1850         save_fpu_regs();
1851         load_fpu_from(&vcpu->arch.guest_fpregs);
1852         return 0;
1853 }
1854
1855 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1856 {
1857         memcpy(&fpu->fprs, vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs));
1858         fpu->fpc = vcpu->arch.guest_fpregs.fpc;
1859         return 0;
1860 }
1861
1862 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
1863 {
1864         int rc = 0;
1865
1866         if (!is_vcpu_stopped(vcpu))
1867                 rc = -EBUSY;
1868         else {
1869                 vcpu->run->psw_mask = psw.mask;
1870                 vcpu->run->psw_addr = psw.addr;
1871         }
1872         return rc;
1873 }
1874
1875 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
1876                                   struct kvm_translation *tr)
1877 {
1878         return -EINVAL; /* not implemented yet */
1879 }
1880
1881 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
1882                               KVM_GUESTDBG_USE_HW_BP | \
1883                               KVM_GUESTDBG_ENABLE)
1884
1885 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
1886                                         struct kvm_guest_debug *dbg)
1887 {
1888         int rc = 0;
1889
1890         vcpu->guest_debug = 0;
1891         kvm_s390_clear_bp_data(vcpu);
1892
1893         if (dbg->control & ~VALID_GUESTDBG_FLAGS)
1894                 return -EINVAL;
1895
1896         if (dbg->control & KVM_GUESTDBG_ENABLE) {
1897                 vcpu->guest_debug = dbg->control;
1898                 /* enforce guest PER */
1899                 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1900
1901                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
1902                         rc = kvm_s390_import_bp_data(vcpu, dbg);
1903         } else {
1904                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1905                 vcpu->arch.guestdbg.last_bp = 0;
1906         }
1907
1908         if (rc) {
1909                 vcpu->guest_debug = 0;
1910                 kvm_s390_clear_bp_data(vcpu);
1911                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1912         }
1913
1914         return rc;
1915 }
1916
1917 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
1918                                     struct kvm_mp_state *mp_state)
1919 {
1920         /* CHECK_STOP and LOAD are not supported yet */
1921         return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
1922                                        KVM_MP_STATE_OPERATING;
1923 }
1924
1925 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
1926                                     struct kvm_mp_state *mp_state)
1927 {
1928         int rc = 0;
1929
1930         /* user space knows about this interface - let it control the state */
1931         vcpu->kvm->arch.user_cpu_state_ctrl = 1;
1932
1933         switch (mp_state->mp_state) {
1934         case KVM_MP_STATE_STOPPED:
1935                 kvm_s390_vcpu_stop(vcpu);
1936                 break;
1937         case KVM_MP_STATE_OPERATING:
1938                 kvm_s390_vcpu_start(vcpu);
1939                 break;
1940         case KVM_MP_STATE_LOAD:
1941         case KVM_MP_STATE_CHECK_STOP:
1942                 /* fall through - CHECK_STOP and LOAD are not supported yet */
1943         default:
1944                 rc = -ENXIO;
1945         }
1946
1947         return rc;
1948 }
1949
1950 static bool ibs_enabled(struct kvm_vcpu *vcpu)
1951 {
1952         return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
1953 }
1954
1955 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
1956 {
1957 retry:
1958         kvm_s390_vcpu_request_handled(vcpu);
1959         if (!vcpu->requests)
1960                 return 0;
1961         /*
1962          * We use MMU_RELOAD just to re-arm the ipte notifier for the
1963          * guest prefix page. gmap_ipte_notify will wait on the ptl lock.
1964          * This ensures that the ipte instruction for this request has
1965          * already finished. We might race against a second unmapper that
1966          * wants to set the blocking bit. Lets just retry the request loop.
1967          */
1968         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
1969                 int rc;
1970                 rc = gmap_ipte_notify(vcpu->arch.gmap,
1971                                       kvm_s390_get_prefix(vcpu),
1972                                       PAGE_SIZE * 2);
1973                 if (rc)
1974                         return rc;
1975                 goto retry;
1976         }
1977
1978         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
1979                 vcpu->arch.sie_block->ihcpu = 0xffff;
1980                 goto retry;
1981         }
1982
1983         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
1984                 if (!ibs_enabled(vcpu)) {
1985                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
1986                         atomic_or(CPUSTAT_IBS,
1987                                         &vcpu->arch.sie_block->cpuflags);
1988                 }
1989                 goto retry;
1990         }
1991
1992         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
1993                 if (ibs_enabled(vcpu)) {
1994                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
1995                         atomic_andnot(CPUSTAT_IBS,
1996                                           &vcpu->arch.sie_block->cpuflags);
1997                 }
1998                 goto retry;
1999         }
2000
2001         /* nothing to do, just clear the request */
2002         clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
2003
2004         return 0;
2005 }
2006
2007 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2008 {
2009         struct kvm_vcpu *vcpu;
2010         int i;
2011
2012         mutex_lock(&kvm->lock);
2013         preempt_disable();
2014         kvm->arch.epoch = tod - get_tod_clock();
2015         kvm_s390_vcpu_block_all(kvm);
2016         kvm_for_each_vcpu(i, vcpu, kvm)
2017                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2018         kvm_s390_vcpu_unblock_all(kvm);
2019         preempt_enable();
2020         mutex_unlock(&kvm->lock);
2021 }
2022
2023 /**
2024  * kvm_arch_fault_in_page - fault-in guest page if necessary
2025  * @vcpu: The corresponding virtual cpu
2026  * @gpa: Guest physical address
2027  * @writable: Whether the page should be writable or not
2028  *
2029  * Make sure that a guest page has been faulted-in on the host.
2030  *
2031  * Return: Zero on success, negative error code otherwise.
2032  */
2033 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
2034 {
2035         return gmap_fault(vcpu->arch.gmap, gpa,
2036                           writable ? FAULT_FLAG_WRITE : 0);
2037 }
2038
2039 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
2040                                       unsigned long token)
2041 {
2042         struct kvm_s390_interrupt inti;
2043         struct kvm_s390_irq irq;
2044
2045         if (start_token) {
2046                 irq.u.ext.ext_params2 = token;
2047                 irq.type = KVM_S390_INT_PFAULT_INIT;
2048                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
2049         } else {
2050                 inti.type = KVM_S390_INT_PFAULT_DONE;
2051                 inti.parm64 = token;
2052                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
2053         }
2054 }
2055
2056 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
2057                                      struct kvm_async_pf *work)
2058 {
2059         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
2060         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
2061 }
2062
2063 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
2064                                  struct kvm_async_pf *work)
2065 {
2066         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
2067         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
2068 }
2069
2070 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
2071                                struct kvm_async_pf *work)
2072 {
2073         /* s390 will always inject the page directly */
2074 }
2075
2076 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
2077 {
2078         /*
2079          * s390 will always inject the page directly,
2080          * but we still want check_async_completion to cleanup
2081          */
2082         return true;
2083 }
2084
2085 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
2086 {
2087         hva_t hva;
2088         struct kvm_arch_async_pf arch;
2089         int rc;
2090
2091         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2092                 return 0;
2093         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
2094             vcpu->arch.pfault_compare)
2095                 return 0;
2096         if (psw_extint_disabled(vcpu))
2097                 return 0;
2098         if (kvm_s390_vcpu_has_irq(vcpu, 0))
2099                 return 0;
2100         if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
2101                 return 0;
2102         if (!vcpu->arch.gmap->pfault_enabled)
2103                 return 0;
2104
2105         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
2106         hva += current->thread.gmap_addr & ~PAGE_MASK;
2107         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
2108                 return 0;
2109
2110         rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
2111         return rc;
2112 }
2113
2114 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
2115 {
2116         int rc, cpuflags;
2117
2118         /*
2119          * On s390 notifications for arriving pages will be delivered directly
2120          * to the guest but the house keeping for completed pfaults is
2121          * handled outside the worker.
2122          */
2123         kvm_check_async_pf_completion(vcpu);
2124
2125         memcpy(&vcpu->arch.sie_block->gg14, &vcpu->run->s.regs.gprs[14], 16);
2126
2127         if (need_resched())
2128                 schedule();
2129
2130         if (test_cpu_flag(CIF_MCCK_PENDING))
2131                 s390_handle_mcck();
2132
2133         if (!kvm_is_ucontrol(vcpu->kvm)) {
2134                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
2135                 if (rc)
2136                         return rc;
2137         }
2138
2139         rc = kvm_s390_handle_requests(vcpu);
2140         if (rc)
2141                 return rc;
2142
2143         if (guestdbg_enabled(vcpu)) {
2144                 kvm_s390_backup_guest_per_regs(vcpu);
2145                 kvm_s390_patch_guest_per_regs(vcpu);
2146         }
2147
2148         vcpu->arch.sie_block->icptcode = 0;
2149         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
2150         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
2151         trace_kvm_s390_sie_enter(vcpu, cpuflags);
2152
2153         return 0;
2154 }
2155
2156 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
2157 {
2158         psw_t *psw = &vcpu->arch.sie_block->gpsw;
2159         u8 opcode;
2160         int rc;
2161
2162         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
2163         trace_kvm_s390_sie_fault(vcpu);
2164
2165         /*
2166          * We want to inject an addressing exception, which is defined as a
2167          * suppressing or terminating exception. However, since we came here
2168          * by a DAT access exception, the PSW still points to the faulting
2169          * instruction since DAT exceptions are nullifying. So we've got
2170          * to look up the current opcode to get the length of the instruction
2171          * to be able to forward the PSW.
2172          */
2173         rc = read_guest(vcpu, psw->addr, 0, &opcode, 1);
2174         if (rc)
2175                 return kvm_s390_inject_prog_cond(vcpu, rc);
2176         psw->addr = __rewind_psw(*psw, -insn_length(opcode));
2177
2178         return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
2179 }
2180
2181 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
2182 {
2183         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
2184                    vcpu->arch.sie_block->icptcode);
2185         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
2186
2187         if (guestdbg_enabled(vcpu))
2188                 kvm_s390_restore_guest_per_regs(vcpu);
2189
2190         memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16);
2191
2192         if (vcpu->arch.sie_block->icptcode > 0) {
2193                 int rc = kvm_handle_sie_intercept(vcpu);
2194
2195                 if (rc != -EOPNOTSUPP)
2196                         return rc;
2197                 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
2198                 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
2199                 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
2200                 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
2201                 return -EREMOTE;
2202         } else if (exit_reason != -EFAULT) {
2203                 vcpu->stat.exit_null++;
2204                 return 0;
2205         } else if (kvm_is_ucontrol(vcpu->kvm)) {
2206                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
2207                 vcpu->run->s390_ucontrol.trans_exc_code =
2208                                                 current->thread.gmap_addr;
2209                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
2210                 return -EREMOTE;
2211         } else if (current->thread.gmap_pfault) {
2212                 trace_kvm_s390_major_guest_pfault(vcpu);
2213                 current->thread.gmap_pfault = 0;
2214                 if (kvm_arch_setup_async_pf(vcpu))
2215                         return 0;
2216                 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
2217         }
2218         return vcpu_post_run_fault_in_sie(vcpu);
2219 }
2220
2221 static int __vcpu_run(struct kvm_vcpu *vcpu)
2222 {
2223         int rc, exit_reason;
2224
2225         /*
2226          * We try to hold kvm->srcu during most of vcpu_run (except when run-
2227          * ning the guest), so that memslots (and other stuff) are protected
2228          */
2229         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2230
2231         do {
2232                 rc = vcpu_pre_run(vcpu);
2233                 if (rc)
2234                         break;
2235
2236                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2237                 /*
2238                  * As PF_VCPU will be used in fault handler, between
2239                  * guest_enter and guest_exit should be no uaccess.
2240                  */
2241                 local_irq_disable();
2242                 __kvm_guest_enter();
2243                 local_irq_enable();
2244                 exit_reason = sie64a(vcpu->arch.sie_block,
2245                                      vcpu->run->s.regs.gprs);
2246                 local_irq_disable();
2247                 __kvm_guest_exit();
2248                 local_irq_enable();
2249                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2250
2251                 rc = vcpu_post_run(vcpu, exit_reason);
2252         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
2253
2254         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2255         return rc;
2256 }
2257
2258 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2259 {
2260         vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
2261         vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
2262         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
2263                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
2264         if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
2265                 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
2266                 /* some control register changes require a tlb flush */
2267                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2268         }
2269         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
2270                 vcpu->arch.sie_block->cputm = kvm_run->s.regs.cputm;
2271                 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
2272                 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
2273                 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
2274                 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
2275         }
2276         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
2277                 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
2278                 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
2279                 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
2280                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2281                         kvm_clear_async_pf_completion_queue(vcpu);
2282         }
2283         kvm_run->kvm_dirty_regs = 0;
2284 }
2285
2286 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2287 {
2288         kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
2289         kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
2290         kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
2291         memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
2292         kvm_run->s.regs.cputm = vcpu->arch.sie_block->cputm;
2293         kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
2294         kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
2295         kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
2296         kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
2297         kvm_run->s.regs.pft = vcpu->arch.pfault_token;
2298         kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
2299         kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
2300 }
2301
2302 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2303 {
2304         int rc;
2305         sigset_t sigsaved;
2306
2307         if (guestdbg_exit_pending(vcpu)) {
2308                 kvm_s390_prepare_debug_exit(vcpu);
2309                 return 0;
2310         }
2311
2312         if (vcpu->sigset_active)
2313                 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
2314
2315         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
2316                 kvm_s390_vcpu_start(vcpu);
2317         } else if (is_vcpu_stopped(vcpu)) {
2318                 pr_err_ratelimited("can't run stopped vcpu %d\n",
2319                                    vcpu->vcpu_id);
2320                 return -EINVAL;
2321         }
2322
2323         sync_regs(vcpu, kvm_run);
2324
2325         might_fault();
2326         rc = __vcpu_run(vcpu);
2327
2328         if (signal_pending(current) && !rc) {
2329                 kvm_run->exit_reason = KVM_EXIT_INTR;
2330                 rc = -EINTR;
2331         }
2332
2333         if (guestdbg_exit_pending(vcpu) && !rc)  {
2334                 kvm_s390_prepare_debug_exit(vcpu);
2335                 rc = 0;
2336         }
2337
2338         if (rc == -EREMOTE) {
2339                 /* userspace support is needed, kvm_run has been prepared */
2340                 rc = 0;
2341         }
2342
2343         store_regs(vcpu, kvm_run);
2344
2345         if (vcpu->sigset_active)
2346                 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
2347
2348         vcpu->stat.exit_userspace++;
2349         return rc;
2350 }
2351
2352 /*
2353  * store status at address
2354  * we use have two special cases:
2355  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
2356  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
2357  */
2358 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
2359 {
2360         unsigned char archmode = 1;
2361         unsigned int px;
2362         u64 clkcomp;
2363         int rc;
2364
2365         if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
2366                 if (write_guest_abs(vcpu, 163, &archmode, 1))
2367                         return -EFAULT;
2368                 gpa = SAVE_AREA_BASE;
2369         } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
2370                 if (write_guest_real(vcpu, 163, &archmode, 1))
2371                         return -EFAULT;
2372                 gpa = kvm_s390_real_to_abs(vcpu, SAVE_AREA_BASE);
2373         }
2374         rc = write_guest_abs(vcpu, gpa + offsetof(struct save_area, fp_regs),
2375                              vcpu->arch.guest_fpregs.fprs, 128);
2376         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, gp_regs),
2377                               vcpu->run->s.regs.gprs, 128);
2378         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, psw),
2379                               &vcpu->arch.sie_block->gpsw, 16);
2380         px = kvm_s390_get_prefix(vcpu);
2381         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, pref_reg),
2382                               &px, 4);
2383         rc |= write_guest_abs(vcpu,
2384                               gpa + offsetof(struct save_area, fp_ctrl_reg),
2385                               &vcpu->arch.guest_fpregs.fpc, 4);
2386         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, tod_reg),
2387                               &vcpu->arch.sie_block->todpr, 4);
2388         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, timer),
2389                               &vcpu->arch.sie_block->cputm, 8);
2390         clkcomp = vcpu->arch.sie_block->ckc >> 8;
2391         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, clk_cmp),
2392                               &clkcomp, 8);
2393         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, acc_regs),
2394                               &vcpu->run->s.regs.acrs, 64);
2395         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, ctrl_regs),
2396                               &vcpu->arch.sie_block->gcr, 128);
2397         return rc ? -EFAULT : 0;
2398 }
2399
2400 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
2401 {
2402         /*
2403          * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
2404          * copying in vcpu load/put. Lets update our copies before we save
2405          * it into the save area
2406          */
2407         save_fpu_regs();
2408         if (test_kvm_facility(vcpu->kvm, 129)) {
2409                 /*
2410                  * If the vector extension is available, the vector registers
2411                  * which overlaps with floating-point registers are saved in
2412                  * the SIE-control block.  Hence, extract the floating-point
2413                  * registers and the FPC value and store them in the
2414                  * guest_fpregs structure.
2415                  */
2416                 vcpu->arch.guest_fpregs.fpc = current->thread.fpu.fpc;
2417                 convert_vx_to_fp(vcpu->arch.guest_fpregs.fprs,
2418                                  current->thread.fpu.vxrs);
2419         } else
2420                 save_fpu_to(&vcpu->arch.guest_fpregs);
2421         save_access_regs(vcpu->run->s.regs.acrs);
2422
2423         return kvm_s390_store_status_unloaded(vcpu, addr);
2424 }
2425
2426 /*
2427  * store additional status at address
2428  */
2429 int kvm_s390_store_adtl_status_unloaded(struct kvm_vcpu *vcpu,
2430                                         unsigned long gpa)
2431 {
2432         /* Only bits 0-53 are used for address formation */
2433         if (!(gpa & ~0x3ff))
2434                 return 0;
2435
2436         return write_guest_abs(vcpu, gpa & ~0x3ff,
2437                                (void *)&vcpu->run->s.regs.vrs, 512);
2438 }
2439
2440 int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu *vcpu, unsigned long addr)
2441 {
2442         if (!test_kvm_facility(vcpu->kvm, 129))
2443                 return 0;
2444
2445         /*
2446          * The guest VXRS are in the host VXRs due to the lazy
2447          * copying in vcpu load/put. We can simply call save_fpu_regs()
2448          * to save the current register state because we are in the
2449          * middle of a load/put cycle.
2450          *
2451          * Let's update our copies before we save it into the save area.
2452          */
2453         save_fpu_regs();
2454
2455         return kvm_s390_store_adtl_status_unloaded(vcpu, addr);
2456 }
2457
2458 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2459 {
2460         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
2461         kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
2462 }
2463
2464 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
2465 {
2466         unsigned int i;
2467         struct kvm_vcpu *vcpu;
2468
2469         kvm_for_each_vcpu(i, vcpu, kvm) {
2470                 __disable_ibs_on_vcpu(vcpu);
2471         }
2472 }
2473
2474 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2475 {
2476         kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
2477         kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
2478 }
2479
2480 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
2481 {
2482         int i, online_vcpus, started_vcpus = 0;
2483
2484         if (!is_vcpu_stopped(vcpu))
2485                 return;
2486
2487         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
2488         /* Only one cpu at a time may enter/leave the STOPPED state. */
2489         spin_lock(&vcpu->kvm->arch.start_stop_lock);
2490         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2491
2492         for (i = 0; i < online_vcpus; i++) {
2493                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
2494                         started_vcpus++;
2495         }
2496
2497         if (started_vcpus == 0) {
2498                 /* we're the only active VCPU -> speed it up */
2499                 __enable_ibs_on_vcpu(vcpu);
2500         } else if (started_vcpus == 1) {
2501                 /*
2502                  * As we are starting a second VCPU, we have to disable
2503                  * the IBS facility on all VCPUs to remove potentially
2504                  * oustanding ENABLE requests.
2505                  */
2506                 __disable_ibs_on_all_vcpus(vcpu->kvm);
2507         }
2508
2509         atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2510         /*
2511          * Another VCPU might have used IBS while we were offline.
2512          * Let's play safe and flush the VCPU at startup.
2513          */
2514         kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2515         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2516         return;
2517 }
2518
2519 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
2520 {
2521         int i, online_vcpus, started_vcpus = 0;
2522         struct kvm_vcpu *started_vcpu = NULL;
2523
2524         if (is_vcpu_stopped(vcpu))
2525                 return;
2526
2527         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
2528         /* Only one cpu at a time may enter/leave the STOPPED state. */
2529         spin_lock(&vcpu->kvm->arch.start_stop_lock);
2530         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2531
2532         /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
2533         kvm_s390_clear_stop_irq(vcpu);
2534
2535         atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2536         __disable_ibs_on_vcpu(vcpu);
2537
2538         for (i = 0; i < online_vcpus; i++) {
2539                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
2540                         started_vcpus++;
2541                         started_vcpu = vcpu->kvm->vcpus[i];
2542                 }
2543         }
2544
2545         if (started_vcpus == 1) {
2546                 /*
2547                  * As we only have one VCPU left, we want to enable the
2548                  * IBS facility for that VCPU to speed it up.
2549                  */
2550                 __enable_ibs_on_vcpu(started_vcpu);
2551         }
2552
2553         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2554         return;
2555 }
2556
2557 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
2558                                      struct kvm_enable_cap *cap)
2559 {
2560         int r;
2561
2562         if (cap->flags)
2563                 return -EINVAL;
2564
2565         switch (cap->cap) {
2566         case KVM_CAP_S390_CSS_SUPPORT:
2567                 if (!vcpu->kvm->arch.css_support) {
2568                         vcpu->kvm->arch.css_support = 1;
2569                         VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
2570                         trace_kvm_s390_enable_css(vcpu->kvm);
2571                 }
2572                 r = 0;
2573                 break;
2574         default:
2575                 r = -EINVAL;
2576                 break;
2577         }
2578         return r;
2579 }
2580
2581 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
2582                                   struct kvm_s390_mem_op *mop)
2583 {
2584         void __user *uaddr = (void __user *)mop->buf;
2585         void *tmpbuf = NULL;
2586         int r, srcu_idx;
2587         const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
2588                                     | KVM_S390_MEMOP_F_CHECK_ONLY;
2589
2590         if (mop->flags & ~supported_flags)
2591                 return -EINVAL;
2592
2593         if (mop->size > MEM_OP_MAX_SIZE)
2594                 return -E2BIG;
2595
2596         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
2597                 tmpbuf = vmalloc(mop->size);
2598                 if (!tmpbuf)
2599                         return -ENOMEM;
2600         }
2601
2602         srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2603
2604         switch (mop->op) {
2605         case KVM_S390_MEMOP_LOGICAL_READ:
2606                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2607                         r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, false);
2608                         break;
2609                 }
2610                 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2611                 if (r == 0) {
2612                         if (copy_to_user(uaddr, tmpbuf, mop->size))
2613                                 r = -EFAULT;
2614                 }
2615                 break;
2616         case KVM_S390_MEMOP_LOGICAL_WRITE:
2617                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2618                         r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, true);
2619                         break;
2620                 }
2621                 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
2622                         r = -EFAULT;
2623                         break;
2624                 }
2625                 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2626                 break;
2627         default:
2628                 r = -EINVAL;
2629         }
2630
2631         srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
2632
2633         if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
2634                 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
2635
2636         vfree(tmpbuf);
2637         return r;
2638 }
2639
2640 long kvm_arch_vcpu_ioctl(struct file *filp,
2641                          unsigned int ioctl, unsigned long arg)
2642 {
2643         struct kvm_vcpu *vcpu = filp->private_data;
2644         void __user *argp = (void __user *)arg;
2645         int idx;
2646         long r;
2647
2648         switch (ioctl) {
2649         case KVM_S390_IRQ: {
2650                 struct kvm_s390_irq s390irq;
2651
2652                 r = -EFAULT;
2653                 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
2654                         break;
2655                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
2656                 break;
2657         }
2658         case KVM_S390_INTERRUPT: {
2659                 struct kvm_s390_interrupt s390int;
2660                 struct kvm_s390_irq s390irq;
2661
2662                 r = -EFAULT;
2663                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
2664                         break;
2665                 if (s390int_to_s390irq(&s390int, &s390irq))
2666                         return -EINVAL;
2667                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
2668                 break;
2669         }
2670         case KVM_S390_STORE_STATUS:
2671                 idx = srcu_read_lock(&vcpu->kvm->srcu);
2672                 r = kvm_s390_vcpu_store_status(vcpu, arg);
2673                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
2674                 break;
2675         case KVM_S390_SET_INITIAL_PSW: {
2676                 psw_t psw;
2677
2678                 r = -EFAULT;
2679                 if (copy_from_user(&psw, argp, sizeof(psw)))
2680                         break;
2681                 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
2682                 break;
2683         }
2684         case KVM_S390_INITIAL_RESET:
2685                 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
2686                 break;
2687         case KVM_SET_ONE_REG:
2688         case KVM_GET_ONE_REG: {
2689                 struct kvm_one_reg reg;
2690                 r = -EFAULT;
2691                 if (copy_from_user(&reg, argp, sizeof(reg)))
2692                         break;
2693                 if (ioctl == KVM_SET_ONE_REG)
2694                         r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
2695                 else
2696                         r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
2697                 break;
2698         }
2699 #ifdef CONFIG_KVM_S390_UCONTROL
2700         case KVM_S390_UCAS_MAP: {
2701                 struct kvm_s390_ucas_mapping ucasmap;
2702
2703                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
2704                         r = -EFAULT;
2705                         break;
2706                 }
2707
2708                 if (!kvm_is_ucontrol(vcpu->kvm)) {
2709                         r = -EINVAL;
2710                         break;
2711                 }
2712
2713                 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
2714                                      ucasmap.vcpu_addr, ucasmap.length);
2715                 break;
2716         }
2717         case KVM_S390_UCAS_UNMAP: {
2718                 struct kvm_s390_ucas_mapping ucasmap;
2719
2720                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
2721                         r = -EFAULT;
2722                         break;
2723                 }
2724
2725                 if (!kvm_is_ucontrol(vcpu->kvm)) {
2726                         r = -EINVAL;
2727                         break;
2728                 }
2729
2730                 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
2731                         ucasmap.length);
2732                 break;
2733         }
2734 #endif
2735         case KVM_S390_VCPU_FAULT: {
2736                 r = gmap_fault(vcpu->arch.gmap, arg, 0);
2737                 break;
2738         }
2739         case KVM_ENABLE_CAP:
2740         {
2741                 struct kvm_enable_cap cap;
2742                 r = -EFAULT;
2743                 if (copy_from_user(&cap, argp, sizeof(cap)))
2744                         break;
2745                 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
2746                 break;
2747         }
2748         case KVM_S390_MEM_OP: {
2749                 struct kvm_s390_mem_op mem_op;
2750
2751                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
2752                         r = kvm_s390_guest_mem_op(vcpu, &mem_op);
2753                 else
2754                         r = -EFAULT;
2755                 break;
2756         }
2757         case KVM_S390_SET_IRQ_STATE: {
2758                 struct kvm_s390_irq_state irq_state;
2759
2760                 r = -EFAULT;
2761                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
2762                         break;
2763                 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
2764                     irq_state.len == 0 ||
2765                     irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
2766                         r = -EINVAL;
2767                         break;
2768                 }
2769                 r = kvm_s390_set_irq_state(vcpu,
2770                                            (void __user *) irq_state.buf,
2771                                            irq_state.len);
2772                 break;
2773         }
2774         case KVM_S390_GET_IRQ_STATE: {
2775                 struct kvm_s390_irq_state irq_state;
2776
2777                 r = -EFAULT;
2778                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
2779                         break;
2780                 if (irq_state.len == 0) {
2781                         r = -EINVAL;
2782                         break;
2783                 }
2784                 r = kvm_s390_get_irq_state(vcpu,
2785                                            (__u8 __user *)  irq_state.buf,
2786                                            irq_state.len);
2787                 break;
2788         }
2789         default:
2790                 r = -ENOTTY;
2791         }
2792         return r;
2793 }
2794
2795 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
2796 {
2797 #ifdef CONFIG_KVM_S390_UCONTROL
2798         if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
2799                  && (kvm_is_ucontrol(vcpu->kvm))) {
2800                 vmf->page = virt_to_page(vcpu->arch.sie_block);
2801                 get_page(vmf->page);
2802                 return 0;
2803         }
2804 #endif
2805         return VM_FAULT_SIGBUS;
2806 }
2807
2808 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
2809                             unsigned long npages)
2810 {
2811         return 0;
2812 }
2813
2814 /* Section: memory related */
2815 int kvm_arch_prepare_memory_region(struct kvm *kvm,
2816                                    struct kvm_memory_slot *memslot,
2817                                    const struct kvm_userspace_memory_region *mem,
2818                                    enum kvm_mr_change change)
2819 {
2820         /* A few sanity checks. We can have memory slots which have to be
2821            located/ended at a segment boundary (1MB). The memory in userland is
2822            ok to be fragmented into various different vmas. It is okay to mmap()
2823            and munmap() stuff in this slot after doing this call at any time */
2824
2825         if (mem->userspace_addr & 0xffffful)
2826                 return -EINVAL;
2827
2828         if (mem->memory_size & 0xffffful)
2829                 return -EINVAL;
2830
2831         return 0;
2832 }
2833
2834 void kvm_arch_commit_memory_region(struct kvm *kvm,
2835                                 const struct kvm_userspace_memory_region *mem,
2836                                 const struct kvm_memory_slot *old,
2837                                 const struct kvm_memory_slot *new,
2838                                 enum kvm_mr_change change)
2839 {
2840         int rc;
2841
2842         /* If the basics of the memslot do not change, we do not want
2843          * to update the gmap. Every update causes several unnecessary
2844          * segment translation exceptions. This is usually handled just
2845          * fine by the normal fault handler + gmap, but it will also
2846          * cause faults on the prefix page of running guest CPUs.
2847          */
2848         if (old->userspace_addr == mem->userspace_addr &&
2849             old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
2850             old->npages * PAGE_SIZE == mem->memory_size)
2851                 return;
2852
2853         rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
2854                 mem->guest_phys_addr, mem->memory_size);
2855         if (rc)
2856                 pr_warn("failed to commit memory region\n");
2857         return;
2858 }
2859
2860 static int __init kvm_s390_init(void)
2861 {
2862         return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
2863 }
2864
2865 static void __exit kvm_s390_exit(void)
2866 {
2867         kvm_exit();
2868 }
2869
2870 module_init(kvm_s390_init);
2871 module_exit(kvm_s390_exit);
2872
2873 /*
2874  * Enable autoloading of the kvm module.
2875  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
2876  * since x86 takes a different approach.
2877  */
2878 #include <linux/miscdevice.h>
2879 MODULE_ALIAS_MISCDEV(KVM_MINOR);
2880 MODULE_ALIAS("devname:kvm");