]> git.karo-electronics.de Git - karo-tx-linux.git/blob - arch/s390/kvm/kvm-s390.c
KVM: s390: CMMA tracking, ESSA emulation, migration mode
[karo-tx-linux.git] / arch / s390 / kvm / kvm-s390.c
1 /*
2  * hosting zSeries kernel virtual machines
3  *
4  * Copyright IBM Corp. 2008, 2009
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License (version 2 only)
8  * as published by the Free Software Foundation.
9  *
10  *    Author(s): Carsten Otte <cotte@de.ibm.com>
11  *               Christian Borntraeger <borntraeger@de.ibm.com>
12  *               Heiko Carstens <heiko.carstens@de.ibm.com>
13  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
14  *               Jason J. Herne <jjherne@us.ibm.com>
15  */
16
17 #include <linux/compiler.h>
18 #include <linux/err.h>
19 #include <linux/fs.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/mman.h>
25 #include <linux/module.h>
26 #include <linux/moduleparam.h>
27 #include <linux/random.h>
28 #include <linux/slab.h>
29 #include <linux/timer.h>
30 #include <linux/vmalloc.h>
31 #include <linux/bitmap.h>
32 #include <linux/sched/signal.h>
33
34 #include <linux/string.h>
35 #include <asm/asm-offsets.h>
36 #include <asm/lowcore.h>
37 #include <asm/stp.h>
38 #include <asm/pgtable.h>
39 #include <asm/gmap.h>
40 #include <asm/nmi.h>
41 #include <asm/switch_to.h>
42 #include <asm/isc.h>
43 #include <asm/sclp.h>
44 #include <asm/cpacf.h>
45 #include <asm/timex.h>
46 #include "kvm-s390.h"
47 #include "gaccess.h"
48
49 #define KMSG_COMPONENT "kvm-s390"
50 #undef pr_fmt
51 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
52
53 #define CREATE_TRACE_POINTS
54 #include "trace.h"
55 #include "trace-s390.h"
56
57 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
58 #define LOCAL_IRQS 32
59 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
60                            (KVM_MAX_VCPUS + LOCAL_IRQS))
61
62 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
63
64 struct kvm_stats_debugfs_item debugfs_entries[] = {
65         { "userspace_handled", VCPU_STAT(exit_userspace) },
66         { "exit_null", VCPU_STAT(exit_null) },
67         { "exit_validity", VCPU_STAT(exit_validity) },
68         { "exit_stop_request", VCPU_STAT(exit_stop_request) },
69         { "exit_external_request", VCPU_STAT(exit_external_request) },
70         { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
71         { "exit_instruction", VCPU_STAT(exit_instruction) },
72         { "exit_pei", VCPU_STAT(exit_pei) },
73         { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
74         { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
75         { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
76         { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
77         { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
78         { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
79         { "halt_wakeup", VCPU_STAT(halt_wakeup) },
80         { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
81         { "instruction_lctl", VCPU_STAT(instruction_lctl) },
82         { "instruction_stctl", VCPU_STAT(instruction_stctl) },
83         { "instruction_stctg", VCPU_STAT(instruction_stctg) },
84         { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
85         { "deliver_external_call", VCPU_STAT(deliver_external_call) },
86         { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
87         { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
88         { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
89         { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
90         { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
91         { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
92         { "exit_wait_state", VCPU_STAT(exit_wait_state) },
93         { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
94         { "instruction_stidp", VCPU_STAT(instruction_stidp) },
95         { "instruction_spx", VCPU_STAT(instruction_spx) },
96         { "instruction_stpx", VCPU_STAT(instruction_stpx) },
97         { "instruction_stap", VCPU_STAT(instruction_stap) },
98         { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
99         { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
100         { "instruction_stsch", VCPU_STAT(instruction_stsch) },
101         { "instruction_chsc", VCPU_STAT(instruction_chsc) },
102         { "instruction_essa", VCPU_STAT(instruction_essa) },
103         { "instruction_stsi", VCPU_STAT(instruction_stsi) },
104         { "instruction_stfl", VCPU_STAT(instruction_stfl) },
105         { "instruction_tprot", VCPU_STAT(instruction_tprot) },
106         { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
107         { "instruction_sie", VCPU_STAT(instruction_sie) },
108         { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
109         { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
110         { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
111         { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
112         { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
113         { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
114         { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
115         { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
116         { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
117         { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
118         { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
119         { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
120         { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
121         { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
122         { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
123         { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
124         { "diagnose_10", VCPU_STAT(diagnose_10) },
125         { "diagnose_44", VCPU_STAT(diagnose_44) },
126         { "diagnose_9c", VCPU_STAT(diagnose_9c) },
127         { "diagnose_258", VCPU_STAT(diagnose_258) },
128         { "diagnose_308", VCPU_STAT(diagnose_308) },
129         { "diagnose_500", VCPU_STAT(diagnose_500) },
130         { NULL }
131 };
132
133 /* allow nested virtualization in KVM (if enabled by user space) */
134 static int nested;
135 module_param(nested, int, S_IRUGO);
136 MODULE_PARM_DESC(nested, "Nested virtualization support");
137
138 /* upper facilities limit for kvm */
139 unsigned long kvm_s390_fac_list_mask[16] = { FACILITIES_KVM };
140
141 unsigned long kvm_s390_fac_list_mask_size(void)
142 {
143         BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
144         return ARRAY_SIZE(kvm_s390_fac_list_mask);
145 }
146
147 /* available cpu features supported by kvm */
148 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
149 /* available subfunctions indicated via query / "test bit" */
150 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
151
152 static struct gmap_notifier gmap_notifier;
153 static struct gmap_notifier vsie_gmap_notifier;
154 debug_info_t *kvm_s390_dbf;
155
156 /* Section: not file related */
157 int kvm_arch_hardware_enable(void)
158 {
159         /* every s390 is virtualization enabled ;-) */
160         return 0;
161 }
162
163 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
164                               unsigned long end);
165
166 /*
167  * This callback is executed during stop_machine(). All CPUs are therefore
168  * temporarily stopped. In order not to change guest behavior, we have to
169  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
170  * so a CPU won't be stopped while calculating with the epoch.
171  */
172 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
173                           void *v)
174 {
175         struct kvm *kvm;
176         struct kvm_vcpu *vcpu;
177         int i;
178         unsigned long long *delta = v;
179
180         list_for_each_entry(kvm, &vm_list, vm_list) {
181                 kvm->arch.epoch -= *delta;
182                 kvm_for_each_vcpu(i, vcpu, kvm) {
183                         vcpu->arch.sie_block->epoch -= *delta;
184                         if (vcpu->arch.cputm_enabled)
185                                 vcpu->arch.cputm_start += *delta;
186                         if (vcpu->arch.vsie_block)
187                                 vcpu->arch.vsie_block->epoch -= *delta;
188                 }
189         }
190         return NOTIFY_OK;
191 }
192
193 static struct notifier_block kvm_clock_notifier = {
194         .notifier_call = kvm_clock_sync,
195 };
196
197 int kvm_arch_hardware_setup(void)
198 {
199         gmap_notifier.notifier_call = kvm_gmap_notifier;
200         gmap_register_pte_notifier(&gmap_notifier);
201         vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
202         gmap_register_pte_notifier(&vsie_gmap_notifier);
203         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
204                                        &kvm_clock_notifier);
205         return 0;
206 }
207
208 void kvm_arch_hardware_unsetup(void)
209 {
210         gmap_unregister_pte_notifier(&gmap_notifier);
211         gmap_unregister_pte_notifier(&vsie_gmap_notifier);
212         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
213                                          &kvm_clock_notifier);
214 }
215
216 static void allow_cpu_feat(unsigned long nr)
217 {
218         set_bit_inv(nr, kvm_s390_available_cpu_feat);
219 }
220
221 static inline int plo_test_bit(unsigned char nr)
222 {
223         register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
224         int cc;
225
226         asm volatile(
227                 /* Parameter registers are ignored for "test bit" */
228                 "       plo     0,0,0,0(0)\n"
229                 "       ipm     %0\n"
230                 "       srl     %0,28\n"
231                 : "=d" (cc)
232                 : "d" (r0)
233                 : "cc");
234         return cc == 0;
235 }
236
237 static void kvm_s390_cpu_feat_init(void)
238 {
239         int i;
240
241         for (i = 0; i < 256; ++i) {
242                 if (plo_test_bit(i))
243                         kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
244         }
245
246         if (test_facility(28)) /* TOD-clock steering */
247                 ptff(kvm_s390_available_subfunc.ptff,
248                      sizeof(kvm_s390_available_subfunc.ptff),
249                      PTFF_QAF);
250
251         if (test_facility(17)) { /* MSA */
252                 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
253                               kvm_s390_available_subfunc.kmac);
254                 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
255                               kvm_s390_available_subfunc.kmc);
256                 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
257                               kvm_s390_available_subfunc.km);
258                 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
259                               kvm_s390_available_subfunc.kimd);
260                 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
261                               kvm_s390_available_subfunc.klmd);
262         }
263         if (test_facility(76)) /* MSA3 */
264                 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
265                               kvm_s390_available_subfunc.pckmo);
266         if (test_facility(77)) { /* MSA4 */
267                 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
268                               kvm_s390_available_subfunc.kmctr);
269                 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
270                               kvm_s390_available_subfunc.kmf);
271                 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
272                               kvm_s390_available_subfunc.kmo);
273                 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
274                               kvm_s390_available_subfunc.pcc);
275         }
276         if (test_facility(57)) /* MSA5 */
277                 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
278                               kvm_s390_available_subfunc.ppno);
279
280         if (test_facility(146)) /* MSA8 */
281                 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
282                               kvm_s390_available_subfunc.kma);
283
284         if (MACHINE_HAS_ESOP)
285                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
286         /*
287          * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
288          * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
289          */
290         if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
291             !test_facility(3) || !nested)
292                 return;
293         allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
294         if (sclp.has_64bscao)
295                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
296         if (sclp.has_siif)
297                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
298         if (sclp.has_gpere)
299                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
300         if (sclp.has_gsls)
301                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
302         if (sclp.has_ib)
303                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
304         if (sclp.has_cei)
305                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
306         if (sclp.has_ibs)
307                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
308         if (sclp.has_kss)
309                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
310         /*
311          * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
312          * all skey handling functions read/set the skey from the PGSTE
313          * instead of the real storage key.
314          *
315          * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
316          * pages being detected as preserved although they are resident.
317          *
318          * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
319          * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
320          *
321          * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
322          * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
323          * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
324          *
325          * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
326          * cannot easily shadow the SCA because of the ipte lock.
327          */
328 }
329
330 int kvm_arch_init(void *opaque)
331 {
332         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
333         if (!kvm_s390_dbf)
334                 return -ENOMEM;
335
336         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
337                 debug_unregister(kvm_s390_dbf);
338                 return -ENOMEM;
339         }
340
341         kvm_s390_cpu_feat_init();
342
343         /* Register floating interrupt controller interface. */
344         return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
345 }
346
347 void kvm_arch_exit(void)
348 {
349         debug_unregister(kvm_s390_dbf);
350 }
351
352 /* Section: device related */
353 long kvm_arch_dev_ioctl(struct file *filp,
354                         unsigned int ioctl, unsigned long arg)
355 {
356         if (ioctl == KVM_S390_ENABLE_SIE)
357                 return s390_enable_sie();
358         return -EINVAL;
359 }
360
361 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
362 {
363         int r;
364
365         switch (ext) {
366         case KVM_CAP_S390_PSW:
367         case KVM_CAP_S390_GMAP:
368         case KVM_CAP_SYNC_MMU:
369 #ifdef CONFIG_KVM_S390_UCONTROL
370         case KVM_CAP_S390_UCONTROL:
371 #endif
372         case KVM_CAP_ASYNC_PF:
373         case KVM_CAP_SYNC_REGS:
374         case KVM_CAP_ONE_REG:
375         case KVM_CAP_ENABLE_CAP:
376         case KVM_CAP_S390_CSS_SUPPORT:
377         case KVM_CAP_IOEVENTFD:
378         case KVM_CAP_DEVICE_CTRL:
379         case KVM_CAP_ENABLE_CAP_VM:
380         case KVM_CAP_S390_IRQCHIP:
381         case KVM_CAP_VM_ATTRIBUTES:
382         case KVM_CAP_MP_STATE:
383         case KVM_CAP_IMMEDIATE_EXIT:
384         case KVM_CAP_S390_INJECT_IRQ:
385         case KVM_CAP_S390_USER_SIGP:
386         case KVM_CAP_S390_USER_STSI:
387         case KVM_CAP_S390_SKEYS:
388         case KVM_CAP_S390_IRQ_STATE:
389         case KVM_CAP_S390_USER_INSTR0:
390         case KVM_CAP_S390_AIS:
391                 r = 1;
392                 break;
393         case KVM_CAP_S390_MEM_OP:
394                 r = MEM_OP_MAX_SIZE;
395                 break;
396         case KVM_CAP_NR_VCPUS:
397         case KVM_CAP_MAX_VCPUS:
398                 r = KVM_S390_BSCA_CPU_SLOTS;
399                 if (!kvm_s390_use_sca_entries())
400                         r = KVM_MAX_VCPUS;
401                 else if (sclp.has_esca && sclp.has_64bscao)
402                         r = KVM_S390_ESCA_CPU_SLOTS;
403                 break;
404         case KVM_CAP_NR_MEMSLOTS:
405                 r = KVM_USER_MEM_SLOTS;
406                 break;
407         case KVM_CAP_S390_COW:
408                 r = MACHINE_HAS_ESOP;
409                 break;
410         case KVM_CAP_S390_VECTOR_REGISTERS:
411                 r = MACHINE_HAS_VX;
412                 break;
413         case KVM_CAP_S390_RI:
414                 r = test_facility(64);
415                 break;
416         case KVM_CAP_S390_GS:
417                 r = test_facility(133);
418                 break;
419         default:
420                 r = 0;
421         }
422         return r;
423 }
424
425 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
426                                         struct kvm_memory_slot *memslot)
427 {
428         gfn_t cur_gfn, last_gfn;
429         unsigned long address;
430         struct gmap *gmap = kvm->arch.gmap;
431
432         /* Loop over all guest pages */
433         last_gfn = memslot->base_gfn + memslot->npages;
434         for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
435                 address = gfn_to_hva_memslot(memslot, cur_gfn);
436
437                 if (test_and_clear_guest_dirty(gmap->mm, address))
438                         mark_page_dirty(kvm, cur_gfn);
439                 if (fatal_signal_pending(current))
440                         return;
441                 cond_resched();
442         }
443 }
444
445 /* Section: vm related */
446 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
447
448 /*
449  * Get (and clear) the dirty memory log for a memory slot.
450  */
451 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
452                                struct kvm_dirty_log *log)
453 {
454         int r;
455         unsigned long n;
456         struct kvm_memslots *slots;
457         struct kvm_memory_slot *memslot;
458         int is_dirty = 0;
459
460         if (kvm_is_ucontrol(kvm))
461                 return -EINVAL;
462
463         mutex_lock(&kvm->slots_lock);
464
465         r = -EINVAL;
466         if (log->slot >= KVM_USER_MEM_SLOTS)
467                 goto out;
468
469         slots = kvm_memslots(kvm);
470         memslot = id_to_memslot(slots, log->slot);
471         r = -ENOENT;
472         if (!memslot->dirty_bitmap)
473                 goto out;
474
475         kvm_s390_sync_dirty_log(kvm, memslot);
476         r = kvm_get_dirty_log(kvm, log, &is_dirty);
477         if (r)
478                 goto out;
479
480         /* Clear the dirty log */
481         if (is_dirty) {
482                 n = kvm_dirty_bitmap_bytes(memslot);
483                 memset(memslot->dirty_bitmap, 0, n);
484         }
485         r = 0;
486 out:
487         mutex_unlock(&kvm->slots_lock);
488         return r;
489 }
490
491 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
492 {
493         unsigned int i;
494         struct kvm_vcpu *vcpu;
495
496         kvm_for_each_vcpu(i, vcpu, kvm) {
497                 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
498         }
499 }
500
501 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
502 {
503         int r;
504
505         if (cap->flags)
506                 return -EINVAL;
507
508         switch (cap->cap) {
509         case KVM_CAP_S390_IRQCHIP:
510                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
511                 kvm->arch.use_irqchip = 1;
512                 r = 0;
513                 break;
514         case KVM_CAP_S390_USER_SIGP:
515                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
516                 kvm->arch.user_sigp = 1;
517                 r = 0;
518                 break;
519         case KVM_CAP_S390_VECTOR_REGISTERS:
520                 mutex_lock(&kvm->lock);
521                 if (kvm->created_vcpus) {
522                         r = -EBUSY;
523                 } else if (MACHINE_HAS_VX) {
524                         set_kvm_facility(kvm->arch.model.fac_mask, 129);
525                         set_kvm_facility(kvm->arch.model.fac_list, 129);
526                         if (test_facility(134)) {
527                                 set_kvm_facility(kvm->arch.model.fac_mask, 134);
528                                 set_kvm_facility(kvm->arch.model.fac_list, 134);
529                         }
530                         if (test_facility(135)) {
531                                 set_kvm_facility(kvm->arch.model.fac_mask, 135);
532                                 set_kvm_facility(kvm->arch.model.fac_list, 135);
533                         }
534                         r = 0;
535                 } else
536                         r = -EINVAL;
537                 mutex_unlock(&kvm->lock);
538                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
539                          r ? "(not available)" : "(success)");
540                 break;
541         case KVM_CAP_S390_RI:
542                 r = -EINVAL;
543                 mutex_lock(&kvm->lock);
544                 if (kvm->created_vcpus) {
545                         r = -EBUSY;
546                 } else if (test_facility(64)) {
547                         set_kvm_facility(kvm->arch.model.fac_mask, 64);
548                         set_kvm_facility(kvm->arch.model.fac_list, 64);
549                         r = 0;
550                 }
551                 mutex_unlock(&kvm->lock);
552                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
553                          r ? "(not available)" : "(success)");
554                 break;
555         case KVM_CAP_S390_AIS:
556                 mutex_lock(&kvm->lock);
557                 if (kvm->created_vcpus) {
558                         r = -EBUSY;
559                 } else {
560                         set_kvm_facility(kvm->arch.model.fac_mask, 72);
561                         set_kvm_facility(kvm->arch.model.fac_list, 72);
562                         kvm->arch.float_int.ais_enabled = 1;
563                         r = 0;
564                 }
565                 mutex_unlock(&kvm->lock);
566                 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
567                          r ? "(not available)" : "(success)");
568                 break;
569         case KVM_CAP_S390_GS:
570                 r = -EINVAL;
571                 mutex_lock(&kvm->lock);
572                 if (atomic_read(&kvm->online_vcpus)) {
573                         r = -EBUSY;
574                 } else if (test_facility(133)) {
575                         set_kvm_facility(kvm->arch.model.fac_mask, 133);
576                         set_kvm_facility(kvm->arch.model.fac_list, 133);
577                         r = 0;
578                 }
579                 mutex_unlock(&kvm->lock);
580                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
581                          r ? "(not available)" : "(success)");
582                 break;
583         case KVM_CAP_S390_USER_STSI:
584                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
585                 kvm->arch.user_stsi = 1;
586                 r = 0;
587                 break;
588         case KVM_CAP_S390_USER_INSTR0:
589                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
590                 kvm->arch.user_instr0 = 1;
591                 icpt_operexc_on_all_vcpus(kvm);
592                 r = 0;
593                 break;
594         default:
595                 r = -EINVAL;
596                 break;
597         }
598         return r;
599 }
600
601 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
602 {
603         int ret;
604
605         switch (attr->attr) {
606         case KVM_S390_VM_MEM_LIMIT_SIZE:
607                 ret = 0;
608                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
609                          kvm->arch.mem_limit);
610                 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
611                         ret = -EFAULT;
612                 break;
613         default:
614                 ret = -ENXIO;
615                 break;
616         }
617         return ret;
618 }
619
620 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
621 {
622         int ret;
623         unsigned int idx;
624         switch (attr->attr) {
625         case KVM_S390_VM_MEM_ENABLE_CMMA:
626                 ret = -ENXIO;
627                 if (!sclp.has_cmma)
628                         break;
629
630                 ret = -EBUSY;
631                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
632                 mutex_lock(&kvm->lock);
633                 if (!kvm->created_vcpus) {
634                         kvm->arch.use_cmma = 1;
635                         ret = 0;
636                 }
637                 mutex_unlock(&kvm->lock);
638                 break;
639         case KVM_S390_VM_MEM_CLR_CMMA:
640                 ret = -ENXIO;
641                 if (!sclp.has_cmma)
642                         break;
643                 ret = -EINVAL;
644                 if (!kvm->arch.use_cmma)
645                         break;
646
647                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
648                 mutex_lock(&kvm->lock);
649                 idx = srcu_read_lock(&kvm->srcu);
650                 s390_reset_cmma(kvm->arch.gmap->mm);
651                 srcu_read_unlock(&kvm->srcu, idx);
652                 mutex_unlock(&kvm->lock);
653                 ret = 0;
654                 break;
655         case KVM_S390_VM_MEM_LIMIT_SIZE: {
656                 unsigned long new_limit;
657
658                 if (kvm_is_ucontrol(kvm))
659                         return -EINVAL;
660
661                 if (get_user(new_limit, (u64 __user *)attr->addr))
662                         return -EFAULT;
663
664                 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
665                     new_limit > kvm->arch.mem_limit)
666                         return -E2BIG;
667
668                 if (!new_limit)
669                         return -EINVAL;
670
671                 /* gmap_create takes last usable address */
672                 if (new_limit != KVM_S390_NO_MEM_LIMIT)
673                         new_limit -= 1;
674
675                 ret = -EBUSY;
676                 mutex_lock(&kvm->lock);
677                 if (!kvm->created_vcpus) {
678                         /* gmap_create will round the limit up */
679                         struct gmap *new = gmap_create(current->mm, new_limit);
680
681                         if (!new) {
682                                 ret = -ENOMEM;
683                         } else {
684                                 gmap_remove(kvm->arch.gmap);
685                                 new->private = kvm;
686                                 kvm->arch.gmap = new;
687                                 ret = 0;
688                         }
689                 }
690                 mutex_unlock(&kvm->lock);
691                 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
692                 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
693                          (void *) kvm->arch.gmap->asce);
694                 break;
695         }
696         default:
697                 ret = -ENXIO;
698                 break;
699         }
700         return ret;
701 }
702
703 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
704
705 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
706 {
707         struct kvm_vcpu *vcpu;
708         int i;
709
710         if (!test_kvm_facility(kvm, 76))
711                 return -EINVAL;
712
713         mutex_lock(&kvm->lock);
714         switch (attr->attr) {
715         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
716                 get_random_bytes(
717                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
718                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
719                 kvm->arch.crypto.aes_kw = 1;
720                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
721                 break;
722         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
723                 get_random_bytes(
724                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
725                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
726                 kvm->arch.crypto.dea_kw = 1;
727                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
728                 break;
729         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
730                 kvm->arch.crypto.aes_kw = 0;
731                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
732                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
733                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
734                 break;
735         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
736                 kvm->arch.crypto.dea_kw = 0;
737                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
738                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
739                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
740                 break;
741         default:
742                 mutex_unlock(&kvm->lock);
743                 return -ENXIO;
744         }
745
746         kvm_for_each_vcpu(i, vcpu, kvm) {
747                 kvm_s390_vcpu_crypto_setup(vcpu);
748                 exit_sie(vcpu);
749         }
750         mutex_unlock(&kvm->lock);
751         return 0;
752 }
753
754 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
755 {
756         int cx;
757         struct kvm_vcpu *vcpu;
758
759         kvm_for_each_vcpu(cx, vcpu, kvm)
760                 kvm_s390_sync_request(req, vcpu);
761 }
762
763 /*
764  * Must be called with kvm->srcu held to avoid races on memslots, and with
765  * kvm->lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
766  */
767 static int kvm_s390_vm_start_migration(struct kvm *kvm)
768 {
769         struct kvm_s390_migration_state *mgs;
770         struct kvm_memory_slot *ms;
771         /* should be the only one */
772         struct kvm_memslots *slots;
773         unsigned long ram_pages;
774         int slotnr;
775
776         /* migration mode already enabled */
777         if (kvm->arch.migration_state)
778                 return 0;
779
780         slots = kvm_memslots(kvm);
781         if (!slots || !slots->used_slots)
782                 return -EINVAL;
783
784         mgs = kzalloc(sizeof(*mgs), GFP_KERNEL);
785         if (!mgs)
786                 return -ENOMEM;
787         kvm->arch.migration_state = mgs;
788
789         if (kvm->arch.use_cmma) {
790                 /*
791                  * Get the last slot. They should be sorted by base_gfn, so the
792                  * last slot is also the one at the end of the address space.
793                  * We have verified above that at least one slot is present.
794                  */
795                 ms = slots->memslots + slots->used_slots - 1;
796                 /* round up so we only use full longs */
797                 ram_pages = roundup(ms->base_gfn + ms->npages, BITS_PER_LONG);
798                 /* allocate enough bytes to store all the bits */
799                 mgs->pgste_bitmap = vmalloc(ram_pages / 8);
800                 if (!mgs->pgste_bitmap) {
801                         kfree(mgs);
802                         kvm->arch.migration_state = NULL;
803                         return -ENOMEM;
804                 }
805
806                 mgs->bitmap_size = ram_pages;
807                 atomic64_set(&mgs->dirty_pages, ram_pages);
808                 /* mark all the pages in active slots as dirty */
809                 for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
810                         ms = slots->memslots + slotnr;
811                         bitmap_set(mgs->pgste_bitmap, ms->base_gfn, ms->npages);
812                 }
813
814                 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
815         }
816         return 0;
817 }
818
819 /*
820  * Must be called with kvm->lock to avoid races with ourselves and
821  * kvm_s390_vm_start_migration.
822  */
823 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
824 {
825         struct kvm_s390_migration_state *mgs;
826
827         /* migration mode already disabled */
828         if (!kvm->arch.migration_state)
829                 return 0;
830         mgs = kvm->arch.migration_state;
831         kvm->arch.migration_state = NULL;
832
833         if (kvm->arch.use_cmma) {
834                 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
835                 vfree(mgs->pgste_bitmap);
836         }
837         kfree(mgs);
838         return 0;
839 }
840
841 static int kvm_s390_vm_set_migration(struct kvm *kvm,
842                                      struct kvm_device_attr *attr)
843 {
844         int idx, res = -ENXIO;
845
846         mutex_lock(&kvm->lock);
847         switch (attr->attr) {
848         case KVM_S390_VM_MIGRATION_START:
849                 idx = srcu_read_lock(&kvm->srcu);
850                 res = kvm_s390_vm_start_migration(kvm);
851                 srcu_read_unlock(&kvm->srcu, idx);
852                 break;
853         case KVM_S390_VM_MIGRATION_STOP:
854                 res = kvm_s390_vm_stop_migration(kvm);
855                 break;
856         default:
857                 break;
858         }
859         mutex_unlock(&kvm->lock);
860
861         return res;
862 }
863
864 static int kvm_s390_vm_get_migration(struct kvm *kvm,
865                                      struct kvm_device_attr *attr)
866 {
867         u64 mig = (kvm->arch.migration_state != NULL);
868
869         if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
870                 return -ENXIO;
871
872         if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
873                 return -EFAULT;
874         return 0;
875 }
876
877 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
878 {
879         u8 gtod_high;
880
881         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
882                                            sizeof(gtod_high)))
883                 return -EFAULT;
884
885         if (gtod_high != 0)
886                 return -EINVAL;
887         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
888
889         return 0;
890 }
891
892 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
893 {
894         u64 gtod;
895
896         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
897                 return -EFAULT;
898
899         kvm_s390_set_tod_clock(kvm, gtod);
900         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
901         return 0;
902 }
903
904 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
905 {
906         int ret;
907
908         if (attr->flags)
909                 return -EINVAL;
910
911         switch (attr->attr) {
912         case KVM_S390_VM_TOD_HIGH:
913                 ret = kvm_s390_set_tod_high(kvm, attr);
914                 break;
915         case KVM_S390_VM_TOD_LOW:
916                 ret = kvm_s390_set_tod_low(kvm, attr);
917                 break;
918         default:
919                 ret = -ENXIO;
920                 break;
921         }
922         return ret;
923 }
924
925 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
926 {
927         u8 gtod_high = 0;
928
929         if (copy_to_user((void __user *)attr->addr, &gtod_high,
930                                          sizeof(gtod_high)))
931                 return -EFAULT;
932         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
933
934         return 0;
935 }
936
937 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
938 {
939         u64 gtod;
940
941         gtod = kvm_s390_get_tod_clock_fast(kvm);
942         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
943                 return -EFAULT;
944         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
945
946         return 0;
947 }
948
949 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
950 {
951         int ret;
952
953         if (attr->flags)
954                 return -EINVAL;
955
956         switch (attr->attr) {
957         case KVM_S390_VM_TOD_HIGH:
958                 ret = kvm_s390_get_tod_high(kvm, attr);
959                 break;
960         case KVM_S390_VM_TOD_LOW:
961                 ret = kvm_s390_get_tod_low(kvm, attr);
962                 break;
963         default:
964                 ret = -ENXIO;
965                 break;
966         }
967         return ret;
968 }
969
970 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
971 {
972         struct kvm_s390_vm_cpu_processor *proc;
973         u16 lowest_ibc, unblocked_ibc;
974         int ret = 0;
975
976         mutex_lock(&kvm->lock);
977         if (kvm->created_vcpus) {
978                 ret = -EBUSY;
979                 goto out;
980         }
981         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
982         if (!proc) {
983                 ret = -ENOMEM;
984                 goto out;
985         }
986         if (!copy_from_user(proc, (void __user *)attr->addr,
987                             sizeof(*proc))) {
988                 kvm->arch.model.cpuid = proc->cpuid;
989                 lowest_ibc = sclp.ibc >> 16 & 0xfff;
990                 unblocked_ibc = sclp.ibc & 0xfff;
991                 if (lowest_ibc && proc->ibc) {
992                         if (proc->ibc > unblocked_ibc)
993                                 kvm->arch.model.ibc = unblocked_ibc;
994                         else if (proc->ibc < lowest_ibc)
995                                 kvm->arch.model.ibc = lowest_ibc;
996                         else
997                                 kvm->arch.model.ibc = proc->ibc;
998                 }
999                 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1000                        S390_ARCH_FAC_LIST_SIZE_BYTE);
1001                 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1002                          kvm->arch.model.ibc,
1003                          kvm->arch.model.cpuid);
1004                 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1005                          kvm->arch.model.fac_list[0],
1006                          kvm->arch.model.fac_list[1],
1007                          kvm->arch.model.fac_list[2]);
1008         } else
1009                 ret = -EFAULT;
1010         kfree(proc);
1011 out:
1012         mutex_unlock(&kvm->lock);
1013         return ret;
1014 }
1015
1016 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1017                                        struct kvm_device_attr *attr)
1018 {
1019         struct kvm_s390_vm_cpu_feat data;
1020         int ret = -EBUSY;
1021
1022         if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1023                 return -EFAULT;
1024         if (!bitmap_subset((unsigned long *) data.feat,
1025                            kvm_s390_available_cpu_feat,
1026                            KVM_S390_VM_CPU_FEAT_NR_BITS))
1027                 return -EINVAL;
1028
1029         mutex_lock(&kvm->lock);
1030         if (!atomic_read(&kvm->online_vcpus)) {
1031                 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1032                             KVM_S390_VM_CPU_FEAT_NR_BITS);
1033                 ret = 0;
1034         }
1035         mutex_unlock(&kvm->lock);
1036         return ret;
1037 }
1038
1039 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1040                                           struct kvm_device_attr *attr)
1041 {
1042         /*
1043          * Once supported by kernel + hw, we have to store the subfunctions
1044          * in kvm->arch and remember that user space configured them.
1045          */
1046         return -ENXIO;
1047 }
1048
1049 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1050 {
1051         int ret = -ENXIO;
1052
1053         switch (attr->attr) {
1054         case KVM_S390_VM_CPU_PROCESSOR:
1055                 ret = kvm_s390_set_processor(kvm, attr);
1056                 break;
1057         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1058                 ret = kvm_s390_set_processor_feat(kvm, attr);
1059                 break;
1060         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1061                 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1062                 break;
1063         }
1064         return ret;
1065 }
1066
1067 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1068 {
1069         struct kvm_s390_vm_cpu_processor *proc;
1070         int ret = 0;
1071
1072         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1073         if (!proc) {
1074                 ret = -ENOMEM;
1075                 goto out;
1076         }
1077         proc->cpuid = kvm->arch.model.cpuid;
1078         proc->ibc = kvm->arch.model.ibc;
1079         memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1080                S390_ARCH_FAC_LIST_SIZE_BYTE);
1081         VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1082                  kvm->arch.model.ibc,
1083                  kvm->arch.model.cpuid);
1084         VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1085                  kvm->arch.model.fac_list[0],
1086                  kvm->arch.model.fac_list[1],
1087                  kvm->arch.model.fac_list[2]);
1088         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1089                 ret = -EFAULT;
1090         kfree(proc);
1091 out:
1092         return ret;
1093 }
1094
1095 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1096 {
1097         struct kvm_s390_vm_cpu_machine *mach;
1098         int ret = 0;
1099
1100         mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1101         if (!mach) {
1102                 ret = -ENOMEM;
1103                 goto out;
1104         }
1105         get_cpu_id((struct cpuid *) &mach->cpuid);
1106         mach->ibc = sclp.ibc;
1107         memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1108                S390_ARCH_FAC_LIST_SIZE_BYTE);
1109         memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1110                sizeof(S390_lowcore.stfle_fac_list));
1111         VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1112                  kvm->arch.model.ibc,
1113                  kvm->arch.model.cpuid);
1114         VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1115                  mach->fac_mask[0],
1116                  mach->fac_mask[1],
1117                  mach->fac_mask[2]);
1118         VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1119                  mach->fac_list[0],
1120                  mach->fac_list[1],
1121                  mach->fac_list[2]);
1122         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1123                 ret = -EFAULT;
1124         kfree(mach);
1125 out:
1126         return ret;
1127 }
1128
1129 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1130                                        struct kvm_device_attr *attr)
1131 {
1132         struct kvm_s390_vm_cpu_feat data;
1133
1134         bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1135                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1136         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1137                 return -EFAULT;
1138         return 0;
1139 }
1140
1141 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1142                                      struct kvm_device_attr *attr)
1143 {
1144         struct kvm_s390_vm_cpu_feat data;
1145
1146         bitmap_copy((unsigned long *) data.feat,
1147                     kvm_s390_available_cpu_feat,
1148                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1149         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1150                 return -EFAULT;
1151         return 0;
1152 }
1153
1154 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1155                                           struct kvm_device_attr *attr)
1156 {
1157         /*
1158          * Once we can actually configure subfunctions (kernel + hw support),
1159          * we have to check if they were already set by user space, if so copy
1160          * them from kvm->arch.
1161          */
1162         return -ENXIO;
1163 }
1164
1165 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1166                                         struct kvm_device_attr *attr)
1167 {
1168         if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1169             sizeof(struct kvm_s390_vm_cpu_subfunc)))
1170                 return -EFAULT;
1171         return 0;
1172 }
1173 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1174 {
1175         int ret = -ENXIO;
1176
1177         switch (attr->attr) {
1178         case KVM_S390_VM_CPU_PROCESSOR:
1179                 ret = kvm_s390_get_processor(kvm, attr);
1180                 break;
1181         case KVM_S390_VM_CPU_MACHINE:
1182                 ret = kvm_s390_get_machine(kvm, attr);
1183                 break;
1184         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1185                 ret = kvm_s390_get_processor_feat(kvm, attr);
1186                 break;
1187         case KVM_S390_VM_CPU_MACHINE_FEAT:
1188                 ret = kvm_s390_get_machine_feat(kvm, attr);
1189                 break;
1190         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1191                 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1192                 break;
1193         case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1194                 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1195                 break;
1196         }
1197         return ret;
1198 }
1199
1200 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1201 {
1202         int ret;
1203
1204         switch (attr->group) {
1205         case KVM_S390_VM_MEM_CTRL:
1206                 ret = kvm_s390_set_mem_control(kvm, attr);
1207                 break;
1208         case KVM_S390_VM_TOD:
1209                 ret = kvm_s390_set_tod(kvm, attr);
1210                 break;
1211         case KVM_S390_VM_CPU_MODEL:
1212                 ret = kvm_s390_set_cpu_model(kvm, attr);
1213                 break;
1214         case KVM_S390_VM_CRYPTO:
1215                 ret = kvm_s390_vm_set_crypto(kvm, attr);
1216                 break;
1217         case KVM_S390_VM_MIGRATION:
1218                 ret = kvm_s390_vm_set_migration(kvm, attr);
1219                 break;
1220         default:
1221                 ret = -ENXIO;
1222                 break;
1223         }
1224
1225         return ret;
1226 }
1227
1228 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1229 {
1230         int ret;
1231
1232         switch (attr->group) {
1233         case KVM_S390_VM_MEM_CTRL:
1234                 ret = kvm_s390_get_mem_control(kvm, attr);
1235                 break;
1236         case KVM_S390_VM_TOD:
1237                 ret = kvm_s390_get_tod(kvm, attr);
1238                 break;
1239         case KVM_S390_VM_CPU_MODEL:
1240                 ret = kvm_s390_get_cpu_model(kvm, attr);
1241                 break;
1242         case KVM_S390_VM_MIGRATION:
1243                 ret = kvm_s390_vm_get_migration(kvm, attr);
1244                 break;
1245         default:
1246                 ret = -ENXIO;
1247                 break;
1248         }
1249
1250         return ret;
1251 }
1252
1253 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1254 {
1255         int ret;
1256
1257         switch (attr->group) {
1258         case KVM_S390_VM_MEM_CTRL:
1259                 switch (attr->attr) {
1260                 case KVM_S390_VM_MEM_ENABLE_CMMA:
1261                 case KVM_S390_VM_MEM_CLR_CMMA:
1262                         ret = sclp.has_cmma ? 0 : -ENXIO;
1263                         break;
1264                 case KVM_S390_VM_MEM_LIMIT_SIZE:
1265                         ret = 0;
1266                         break;
1267                 default:
1268                         ret = -ENXIO;
1269                         break;
1270                 }
1271                 break;
1272         case KVM_S390_VM_TOD:
1273                 switch (attr->attr) {
1274                 case KVM_S390_VM_TOD_LOW:
1275                 case KVM_S390_VM_TOD_HIGH:
1276                         ret = 0;
1277                         break;
1278                 default:
1279                         ret = -ENXIO;
1280                         break;
1281                 }
1282                 break;
1283         case KVM_S390_VM_CPU_MODEL:
1284                 switch (attr->attr) {
1285                 case KVM_S390_VM_CPU_PROCESSOR:
1286                 case KVM_S390_VM_CPU_MACHINE:
1287                 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1288                 case KVM_S390_VM_CPU_MACHINE_FEAT:
1289                 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1290                         ret = 0;
1291                         break;
1292                 /* configuring subfunctions is not supported yet */
1293                 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1294                 default:
1295                         ret = -ENXIO;
1296                         break;
1297                 }
1298                 break;
1299         case KVM_S390_VM_CRYPTO:
1300                 switch (attr->attr) {
1301                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1302                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1303                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1304                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1305                         ret = 0;
1306                         break;
1307                 default:
1308                         ret = -ENXIO;
1309                         break;
1310                 }
1311                 break;
1312         case KVM_S390_VM_MIGRATION:
1313                 ret = 0;
1314                 break;
1315         default:
1316                 ret = -ENXIO;
1317                 break;
1318         }
1319
1320         return ret;
1321 }
1322
1323 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1324 {
1325         uint8_t *keys;
1326         uint64_t hva;
1327         int i, r = 0;
1328
1329         if (args->flags != 0)
1330                 return -EINVAL;
1331
1332         /* Is this guest using storage keys? */
1333         if (!mm_use_skey(current->mm))
1334                 return KVM_S390_GET_SKEYS_NONE;
1335
1336         /* Enforce sane limit on memory allocation */
1337         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1338                 return -EINVAL;
1339
1340         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1341         if (!keys)
1342                 return -ENOMEM;
1343
1344         down_read(&current->mm->mmap_sem);
1345         for (i = 0; i < args->count; i++) {
1346                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1347                 if (kvm_is_error_hva(hva)) {
1348                         r = -EFAULT;
1349                         break;
1350                 }
1351
1352                 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1353                 if (r)
1354                         break;
1355         }
1356         up_read(&current->mm->mmap_sem);
1357
1358         if (!r) {
1359                 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1360                                  sizeof(uint8_t) * args->count);
1361                 if (r)
1362                         r = -EFAULT;
1363         }
1364
1365         kvfree(keys);
1366         return r;
1367 }
1368
1369 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1370 {
1371         uint8_t *keys;
1372         uint64_t hva;
1373         int i, r = 0;
1374
1375         if (args->flags != 0)
1376                 return -EINVAL;
1377
1378         /* Enforce sane limit on memory allocation */
1379         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1380                 return -EINVAL;
1381
1382         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1383         if (!keys)
1384                 return -ENOMEM;
1385
1386         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1387                            sizeof(uint8_t) * args->count);
1388         if (r) {
1389                 r = -EFAULT;
1390                 goto out;
1391         }
1392
1393         /* Enable storage key handling for the guest */
1394         r = s390_enable_skey();
1395         if (r)
1396                 goto out;
1397
1398         down_read(&current->mm->mmap_sem);
1399         for (i = 0; i < args->count; i++) {
1400                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1401                 if (kvm_is_error_hva(hva)) {
1402                         r = -EFAULT;
1403                         break;
1404                 }
1405
1406                 /* Lowest order bit is reserved */
1407                 if (keys[i] & 0x01) {
1408                         r = -EINVAL;
1409                         break;
1410                 }
1411
1412                 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1413                 if (r)
1414                         break;
1415         }
1416         up_read(&current->mm->mmap_sem);
1417 out:
1418         kvfree(keys);
1419         return r;
1420 }
1421
1422 long kvm_arch_vm_ioctl(struct file *filp,
1423                        unsigned int ioctl, unsigned long arg)
1424 {
1425         struct kvm *kvm = filp->private_data;
1426         void __user *argp = (void __user *)arg;
1427         struct kvm_device_attr attr;
1428         int r;
1429
1430         switch (ioctl) {
1431         case KVM_S390_INTERRUPT: {
1432                 struct kvm_s390_interrupt s390int;
1433
1434                 r = -EFAULT;
1435                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1436                         break;
1437                 r = kvm_s390_inject_vm(kvm, &s390int);
1438                 break;
1439         }
1440         case KVM_ENABLE_CAP: {
1441                 struct kvm_enable_cap cap;
1442                 r = -EFAULT;
1443                 if (copy_from_user(&cap, argp, sizeof(cap)))
1444                         break;
1445                 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1446                 break;
1447         }
1448         case KVM_CREATE_IRQCHIP: {
1449                 struct kvm_irq_routing_entry routing;
1450
1451                 r = -EINVAL;
1452                 if (kvm->arch.use_irqchip) {
1453                         /* Set up dummy routing. */
1454                         memset(&routing, 0, sizeof(routing));
1455                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1456                 }
1457                 break;
1458         }
1459         case KVM_SET_DEVICE_ATTR: {
1460                 r = -EFAULT;
1461                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1462                         break;
1463                 r = kvm_s390_vm_set_attr(kvm, &attr);
1464                 break;
1465         }
1466         case KVM_GET_DEVICE_ATTR: {
1467                 r = -EFAULT;
1468                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1469                         break;
1470                 r = kvm_s390_vm_get_attr(kvm, &attr);
1471                 break;
1472         }
1473         case KVM_HAS_DEVICE_ATTR: {
1474                 r = -EFAULT;
1475                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1476                         break;
1477                 r = kvm_s390_vm_has_attr(kvm, &attr);
1478                 break;
1479         }
1480         case KVM_S390_GET_SKEYS: {
1481                 struct kvm_s390_skeys args;
1482
1483                 r = -EFAULT;
1484                 if (copy_from_user(&args, argp,
1485                                    sizeof(struct kvm_s390_skeys)))
1486                         break;
1487                 r = kvm_s390_get_skeys(kvm, &args);
1488                 break;
1489         }
1490         case KVM_S390_SET_SKEYS: {
1491                 struct kvm_s390_skeys args;
1492
1493                 r = -EFAULT;
1494                 if (copy_from_user(&args, argp,
1495                                    sizeof(struct kvm_s390_skeys)))
1496                         break;
1497                 r = kvm_s390_set_skeys(kvm, &args);
1498                 break;
1499         }
1500         default:
1501                 r = -ENOTTY;
1502         }
1503
1504         return r;
1505 }
1506
1507 static int kvm_s390_query_ap_config(u8 *config)
1508 {
1509         u32 fcn_code = 0x04000000UL;
1510         u32 cc = 0;
1511
1512         memset(config, 0, 128);
1513         asm volatile(
1514                 "lgr 0,%1\n"
1515                 "lgr 2,%2\n"
1516                 ".long 0xb2af0000\n"            /* PQAP(QCI) */
1517                 "0: ipm %0\n"
1518                 "srl %0,28\n"
1519                 "1:\n"
1520                 EX_TABLE(0b, 1b)
1521                 : "+r" (cc)
1522                 : "r" (fcn_code), "r" (config)
1523                 : "cc", "0", "2", "memory"
1524         );
1525
1526         return cc;
1527 }
1528
1529 static int kvm_s390_apxa_installed(void)
1530 {
1531         u8 config[128];
1532         int cc;
1533
1534         if (test_facility(12)) {
1535                 cc = kvm_s390_query_ap_config(config);
1536
1537                 if (cc)
1538                         pr_err("PQAP(QCI) failed with cc=%d", cc);
1539                 else
1540                         return config[0] & 0x40;
1541         }
1542
1543         return 0;
1544 }
1545
1546 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1547 {
1548         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1549
1550         if (kvm_s390_apxa_installed())
1551                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1552         else
1553                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1554 }
1555
1556 static u64 kvm_s390_get_initial_cpuid(void)
1557 {
1558         struct cpuid cpuid;
1559
1560         get_cpu_id(&cpuid);
1561         cpuid.version = 0xff;
1562         return *((u64 *) &cpuid);
1563 }
1564
1565 static void kvm_s390_crypto_init(struct kvm *kvm)
1566 {
1567         if (!test_kvm_facility(kvm, 76))
1568                 return;
1569
1570         kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1571         kvm_s390_set_crycb_format(kvm);
1572
1573         /* Enable AES/DEA protected key functions by default */
1574         kvm->arch.crypto.aes_kw = 1;
1575         kvm->arch.crypto.dea_kw = 1;
1576         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1577                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1578         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1579                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1580 }
1581
1582 static void sca_dispose(struct kvm *kvm)
1583 {
1584         if (kvm->arch.use_esca)
1585                 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1586         else
1587                 free_page((unsigned long)(kvm->arch.sca));
1588         kvm->arch.sca = NULL;
1589 }
1590
1591 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1592 {
1593         gfp_t alloc_flags = GFP_KERNEL;
1594         int i, rc;
1595         char debug_name[16];
1596         static unsigned long sca_offset;
1597
1598         rc = -EINVAL;
1599 #ifdef CONFIG_KVM_S390_UCONTROL
1600         if (type & ~KVM_VM_S390_UCONTROL)
1601                 goto out_err;
1602         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1603                 goto out_err;
1604 #else
1605         if (type)
1606                 goto out_err;
1607 #endif
1608
1609         rc = s390_enable_sie();
1610         if (rc)
1611                 goto out_err;
1612
1613         rc = -ENOMEM;
1614
1615         ratelimit_state_init(&kvm->arch.sthyi_limit, 5 * HZ, 500);
1616
1617         kvm->arch.use_esca = 0; /* start with basic SCA */
1618         if (!sclp.has_64bscao)
1619                 alloc_flags |= GFP_DMA;
1620         rwlock_init(&kvm->arch.sca_lock);
1621         kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1622         if (!kvm->arch.sca)
1623                 goto out_err;
1624         spin_lock(&kvm_lock);
1625         sca_offset += 16;
1626         if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1627                 sca_offset = 0;
1628         kvm->arch.sca = (struct bsca_block *)
1629                         ((char *) kvm->arch.sca + sca_offset);
1630         spin_unlock(&kvm_lock);
1631
1632         sprintf(debug_name, "kvm-%u", current->pid);
1633
1634         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1635         if (!kvm->arch.dbf)
1636                 goto out_err;
1637
1638         kvm->arch.sie_page2 =
1639              (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1640         if (!kvm->arch.sie_page2)
1641                 goto out_err;
1642
1643         /* Populate the facility mask initially. */
1644         memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1645                sizeof(S390_lowcore.stfle_fac_list));
1646         for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1647                 if (i < kvm_s390_fac_list_mask_size())
1648                         kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1649                 else
1650                         kvm->arch.model.fac_mask[i] = 0UL;
1651         }
1652
1653         /* Populate the facility list initially. */
1654         kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1655         memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1656                S390_ARCH_FAC_LIST_SIZE_BYTE);
1657
1658         set_kvm_facility(kvm->arch.model.fac_mask, 74);
1659         set_kvm_facility(kvm->arch.model.fac_list, 74);
1660
1661         kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
1662         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1663
1664         kvm_s390_crypto_init(kvm);
1665
1666         mutex_init(&kvm->arch.float_int.ais_lock);
1667         kvm->arch.float_int.simm = 0;
1668         kvm->arch.float_int.nimm = 0;
1669         kvm->arch.float_int.ais_enabled = 0;
1670         spin_lock_init(&kvm->arch.float_int.lock);
1671         for (i = 0; i < FIRQ_LIST_COUNT; i++)
1672                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1673         init_waitqueue_head(&kvm->arch.ipte_wq);
1674         mutex_init(&kvm->arch.ipte_mutex);
1675
1676         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1677         VM_EVENT(kvm, 3, "vm created with type %lu", type);
1678
1679         if (type & KVM_VM_S390_UCONTROL) {
1680                 kvm->arch.gmap = NULL;
1681                 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1682         } else {
1683                 if (sclp.hamax == U64_MAX)
1684                         kvm->arch.mem_limit = TASK_SIZE_MAX;
1685                 else
1686                         kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
1687                                                     sclp.hamax + 1);
1688                 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
1689                 if (!kvm->arch.gmap)
1690                         goto out_err;
1691                 kvm->arch.gmap->private = kvm;
1692                 kvm->arch.gmap->pfault_enabled = 0;
1693         }
1694
1695         kvm->arch.css_support = 0;
1696         kvm->arch.use_irqchip = 0;
1697         kvm->arch.epoch = 0;
1698
1699         spin_lock_init(&kvm->arch.start_stop_lock);
1700         kvm_s390_vsie_init(kvm);
1701         KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1702
1703         return 0;
1704 out_err:
1705         free_page((unsigned long)kvm->arch.sie_page2);
1706         debug_unregister(kvm->arch.dbf);
1707         sca_dispose(kvm);
1708         KVM_EVENT(3, "creation of vm failed: %d", rc);
1709         return rc;
1710 }
1711
1712 bool kvm_arch_has_vcpu_debugfs(void)
1713 {
1714         return false;
1715 }
1716
1717 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
1718 {
1719         return 0;
1720 }
1721
1722 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1723 {
1724         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1725         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1726         kvm_s390_clear_local_irqs(vcpu);
1727         kvm_clear_async_pf_completion_queue(vcpu);
1728         if (!kvm_is_ucontrol(vcpu->kvm))
1729                 sca_del_vcpu(vcpu);
1730
1731         if (kvm_is_ucontrol(vcpu->kvm))
1732                 gmap_remove(vcpu->arch.gmap);
1733
1734         if (vcpu->kvm->arch.use_cmma)
1735                 kvm_s390_vcpu_unsetup_cmma(vcpu);
1736         free_page((unsigned long)(vcpu->arch.sie_block));
1737
1738         kvm_vcpu_uninit(vcpu);
1739         kmem_cache_free(kvm_vcpu_cache, vcpu);
1740 }
1741
1742 static void kvm_free_vcpus(struct kvm *kvm)
1743 {
1744         unsigned int i;
1745         struct kvm_vcpu *vcpu;
1746
1747         kvm_for_each_vcpu(i, vcpu, kvm)
1748                 kvm_arch_vcpu_destroy(vcpu);
1749
1750         mutex_lock(&kvm->lock);
1751         for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1752                 kvm->vcpus[i] = NULL;
1753
1754         atomic_set(&kvm->online_vcpus, 0);
1755         mutex_unlock(&kvm->lock);
1756 }
1757
1758 void kvm_arch_destroy_vm(struct kvm *kvm)
1759 {
1760         kvm_free_vcpus(kvm);
1761         sca_dispose(kvm);
1762         debug_unregister(kvm->arch.dbf);
1763         free_page((unsigned long)kvm->arch.sie_page2);
1764         if (!kvm_is_ucontrol(kvm))
1765                 gmap_remove(kvm->arch.gmap);
1766         kvm_s390_destroy_adapters(kvm);
1767         kvm_s390_clear_float_irqs(kvm);
1768         kvm_s390_vsie_destroy(kvm);
1769         if (kvm->arch.migration_state) {
1770                 vfree(kvm->arch.migration_state->pgste_bitmap);
1771                 kfree(kvm->arch.migration_state);
1772         }
1773         KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
1774 }
1775
1776 /* Section: vcpu related */
1777 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1778 {
1779         vcpu->arch.gmap = gmap_create(current->mm, -1UL);
1780         if (!vcpu->arch.gmap)
1781                 return -ENOMEM;
1782         vcpu->arch.gmap->private = vcpu->kvm;
1783
1784         return 0;
1785 }
1786
1787 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
1788 {
1789         if (!kvm_s390_use_sca_entries())
1790                 return;
1791         read_lock(&vcpu->kvm->arch.sca_lock);
1792         if (vcpu->kvm->arch.use_esca) {
1793                 struct esca_block *sca = vcpu->kvm->arch.sca;
1794
1795                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1796                 sca->cpu[vcpu->vcpu_id].sda = 0;
1797         } else {
1798                 struct bsca_block *sca = vcpu->kvm->arch.sca;
1799
1800                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1801                 sca->cpu[vcpu->vcpu_id].sda = 0;
1802         }
1803         read_unlock(&vcpu->kvm->arch.sca_lock);
1804 }
1805
1806 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
1807 {
1808         if (!kvm_s390_use_sca_entries()) {
1809                 struct bsca_block *sca = vcpu->kvm->arch.sca;
1810
1811                 /* we still need the basic sca for the ipte control */
1812                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1813                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
1814         }
1815         read_lock(&vcpu->kvm->arch.sca_lock);
1816         if (vcpu->kvm->arch.use_esca) {
1817                 struct esca_block *sca = vcpu->kvm->arch.sca;
1818
1819                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1820                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1821                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
1822                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
1823                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1824         } else {
1825                 struct bsca_block *sca = vcpu->kvm->arch.sca;
1826
1827                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1828                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1829                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
1830                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1831         }
1832         read_unlock(&vcpu->kvm->arch.sca_lock);
1833 }
1834
1835 /* Basic SCA to Extended SCA data copy routines */
1836 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
1837 {
1838         d->sda = s->sda;
1839         d->sigp_ctrl.c = s->sigp_ctrl.c;
1840         d->sigp_ctrl.scn = s->sigp_ctrl.scn;
1841 }
1842
1843 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
1844 {
1845         int i;
1846
1847         d->ipte_control = s->ipte_control;
1848         d->mcn[0] = s->mcn;
1849         for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
1850                 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
1851 }
1852
1853 static int sca_switch_to_extended(struct kvm *kvm)
1854 {
1855         struct bsca_block *old_sca = kvm->arch.sca;
1856         struct esca_block *new_sca;
1857         struct kvm_vcpu *vcpu;
1858         unsigned int vcpu_idx;
1859         u32 scaol, scaoh;
1860
1861         new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
1862         if (!new_sca)
1863                 return -ENOMEM;
1864
1865         scaoh = (u32)((u64)(new_sca) >> 32);
1866         scaol = (u32)(u64)(new_sca) & ~0x3fU;
1867
1868         kvm_s390_vcpu_block_all(kvm);
1869         write_lock(&kvm->arch.sca_lock);
1870
1871         sca_copy_b_to_e(new_sca, old_sca);
1872
1873         kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
1874                 vcpu->arch.sie_block->scaoh = scaoh;
1875                 vcpu->arch.sie_block->scaol = scaol;
1876                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
1877         }
1878         kvm->arch.sca = new_sca;
1879         kvm->arch.use_esca = 1;
1880
1881         write_unlock(&kvm->arch.sca_lock);
1882         kvm_s390_vcpu_unblock_all(kvm);
1883
1884         free_page((unsigned long)old_sca);
1885
1886         VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
1887                  old_sca, kvm->arch.sca);
1888         return 0;
1889 }
1890
1891 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
1892 {
1893         int rc;
1894
1895         if (!kvm_s390_use_sca_entries()) {
1896                 if (id < KVM_MAX_VCPUS)
1897                         return true;
1898                 return false;
1899         }
1900         if (id < KVM_S390_BSCA_CPU_SLOTS)
1901                 return true;
1902         if (!sclp.has_esca || !sclp.has_64bscao)
1903                 return false;
1904
1905         mutex_lock(&kvm->lock);
1906         rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
1907         mutex_unlock(&kvm->lock);
1908
1909         return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
1910 }
1911
1912 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1913 {
1914         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1915         kvm_clear_async_pf_completion_queue(vcpu);
1916         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
1917                                     KVM_SYNC_GPRS |
1918                                     KVM_SYNC_ACRS |
1919                                     KVM_SYNC_CRS |
1920                                     KVM_SYNC_ARCH0 |
1921                                     KVM_SYNC_PFAULT;
1922         kvm_s390_set_prefix(vcpu, 0);
1923         if (test_kvm_facility(vcpu->kvm, 64))
1924                 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
1925         if (test_kvm_facility(vcpu->kvm, 133))
1926                 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
1927         /* fprs can be synchronized via vrs, even if the guest has no vx. With
1928          * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
1929          */
1930         if (MACHINE_HAS_VX)
1931                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
1932         else
1933                 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
1934
1935         if (kvm_is_ucontrol(vcpu->kvm))
1936                 return __kvm_ucontrol_vcpu_init(vcpu);
1937
1938         return 0;
1939 }
1940
1941 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1942 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1943 {
1944         WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
1945         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1946         vcpu->arch.cputm_start = get_tod_clock_fast();
1947         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1948 }
1949
1950 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1951 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1952 {
1953         WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
1954         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1955         vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1956         vcpu->arch.cputm_start = 0;
1957         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1958 }
1959
1960 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1961 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1962 {
1963         WARN_ON_ONCE(vcpu->arch.cputm_enabled);
1964         vcpu->arch.cputm_enabled = true;
1965         __start_cpu_timer_accounting(vcpu);
1966 }
1967
1968 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1969 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1970 {
1971         WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
1972         __stop_cpu_timer_accounting(vcpu);
1973         vcpu->arch.cputm_enabled = false;
1974 }
1975
1976 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1977 {
1978         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1979         __enable_cpu_timer_accounting(vcpu);
1980         preempt_enable();
1981 }
1982
1983 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1984 {
1985         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1986         __disable_cpu_timer_accounting(vcpu);
1987         preempt_enable();
1988 }
1989
1990 /* set the cpu timer - may only be called from the VCPU thread itself */
1991 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
1992 {
1993         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1994         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1995         if (vcpu->arch.cputm_enabled)
1996                 vcpu->arch.cputm_start = get_tod_clock_fast();
1997         vcpu->arch.sie_block->cputm = cputm;
1998         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1999         preempt_enable();
2000 }
2001
2002 /* update and get the cpu timer - can also be called from other VCPU threads */
2003 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
2004 {
2005         unsigned int seq;
2006         __u64 value;
2007
2008         if (unlikely(!vcpu->arch.cputm_enabled))
2009                 return vcpu->arch.sie_block->cputm;
2010
2011         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2012         do {
2013                 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
2014                 /*
2015                  * If the writer would ever execute a read in the critical
2016                  * section, e.g. in irq context, we have a deadlock.
2017                  */
2018                 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
2019                 value = vcpu->arch.sie_block->cputm;
2020                 /* if cputm_start is 0, accounting is being started/stopped */
2021                 if (likely(vcpu->arch.cputm_start))
2022                         value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2023         } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
2024         preempt_enable();
2025         return value;
2026 }
2027
2028 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2029 {
2030
2031         gmap_enable(vcpu->arch.enabled_gmap);
2032         atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
2033         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2034                 __start_cpu_timer_accounting(vcpu);
2035         vcpu->cpu = cpu;
2036 }
2037
2038 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2039 {
2040         vcpu->cpu = -1;
2041         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2042                 __stop_cpu_timer_accounting(vcpu);
2043         atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
2044         vcpu->arch.enabled_gmap = gmap_get_enabled();
2045         gmap_disable(vcpu->arch.enabled_gmap);
2046
2047 }
2048
2049 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
2050 {
2051         /* this equals initial cpu reset in pop, but we don't switch to ESA */
2052         vcpu->arch.sie_block->gpsw.mask = 0UL;
2053         vcpu->arch.sie_block->gpsw.addr = 0UL;
2054         kvm_s390_set_prefix(vcpu, 0);
2055         kvm_s390_set_cpu_timer(vcpu, 0);
2056         vcpu->arch.sie_block->ckc       = 0UL;
2057         vcpu->arch.sie_block->todpr     = 0;
2058         memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
2059         vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
2060         vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
2061         /* make sure the new fpc will be lazily loaded */
2062         save_fpu_regs();
2063         current->thread.fpu.fpc = 0;
2064         vcpu->arch.sie_block->gbea = 1;
2065         vcpu->arch.sie_block->pp = 0;
2066         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2067         kvm_clear_async_pf_completion_queue(vcpu);
2068         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
2069                 kvm_s390_vcpu_stop(vcpu);
2070         kvm_s390_clear_local_irqs(vcpu);
2071 }
2072
2073 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
2074 {
2075         mutex_lock(&vcpu->kvm->lock);
2076         preempt_disable();
2077         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
2078         preempt_enable();
2079         mutex_unlock(&vcpu->kvm->lock);
2080         if (!kvm_is_ucontrol(vcpu->kvm)) {
2081                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
2082                 sca_add_vcpu(vcpu);
2083         }
2084         if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
2085                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2086         /* make vcpu_load load the right gmap on the first trigger */
2087         vcpu->arch.enabled_gmap = vcpu->arch.gmap;
2088 }
2089
2090 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
2091 {
2092         if (!test_kvm_facility(vcpu->kvm, 76))
2093                 return;
2094
2095         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
2096
2097         if (vcpu->kvm->arch.crypto.aes_kw)
2098                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
2099         if (vcpu->kvm->arch.crypto.dea_kw)
2100                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
2101
2102         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
2103 }
2104
2105 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
2106 {
2107         free_page(vcpu->arch.sie_block->cbrlo);
2108         vcpu->arch.sie_block->cbrlo = 0;
2109 }
2110
2111 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
2112 {
2113         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
2114         if (!vcpu->arch.sie_block->cbrlo)
2115                 return -ENOMEM;
2116
2117         vcpu->arch.sie_block->ecb2 &= ~ECB2_PFMFI;
2118         return 0;
2119 }
2120
2121 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
2122 {
2123         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
2124
2125         vcpu->arch.sie_block->ibc = model->ibc;
2126         if (test_kvm_facility(vcpu->kvm, 7))
2127                 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
2128 }
2129
2130 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
2131 {
2132         int rc = 0;
2133
2134         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
2135                                                     CPUSTAT_SM |
2136                                                     CPUSTAT_STOPPED);
2137
2138         if (test_kvm_facility(vcpu->kvm, 78))
2139                 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
2140         else if (test_kvm_facility(vcpu->kvm, 8))
2141                 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
2142
2143         kvm_s390_vcpu_setup_model(vcpu);
2144
2145         /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
2146         if (MACHINE_HAS_ESOP)
2147                 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
2148         if (test_kvm_facility(vcpu->kvm, 9))
2149                 vcpu->arch.sie_block->ecb |= ECB_SRSI;
2150         if (test_kvm_facility(vcpu->kvm, 73))
2151                 vcpu->arch.sie_block->ecb |= ECB_TE;
2152
2153         if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
2154                 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
2155         if (test_kvm_facility(vcpu->kvm, 130))
2156                 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
2157         vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
2158         if (sclp.has_cei)
2159                 vcpu->arch.sie_block->eca |= ECA_CEI;
2160         if (sclp.has_ib)
2161                 vcpu->arch.sie_block->eca |= ECA_IB;
2162         if (sclp.has_siif)
2163                 vcpu->arch.sie_block->eca |= ECA_SII;
2164         if (sclp.has_sigpif)
2165                 vcpu->arch.sie_block->eca |= ECA_SIGPI;
2166         if (test_kvm_facility(vcpu->kvm, 129)) {
2167                 vcpu->arch.sie_block->eca |= ECA_VX;
2168                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
2169         }
2170         vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
2171                                         | SDNXC;
2172         vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
2173
2174         if (sclp.has_kss)
2175                 atomic_or(CPUSTAT_KSS, &vcpu->arch.sie_block->cpuflags);
2176         else
2177                 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
2178
2179         if (vcpu->kvm->arch.use_cmma) {
2180                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
2181                 if (rc)
2182                         return rc;
2183         }
2184         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2185         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
2186
2187         kvm_s390_vcpu_crypto_setup(vcpu);
2188
2189         return rc;
2190 }
2191
2192 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
2193                                       unsigned int id)
2194 {
2195         struct kvm_vcpu *vcpu;
2196         struct sie_page *sie_page;
2197         int rc = -EINVAL;
2198
2199         if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
2200                 goto out;
2201
2202         rc = -ENOMEM;
2203
2204         vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2205         if (!vcpu)
2206                 goto out;
2207
2208         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2209         if (!sie_page)
2210                 goto out_free_cpu;
2211
2212         vcpu->arch.sie_block = &sie_page->sie_block;
2213         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2214
2215         /* the real guest size will always be smaller than msl */
2216         vcpu->arch.sie_block->mso = 0;
2217         vcpu->arch.sie_block->msl = sclp.hamax;
2218
2219         vcpu->arch.sie_block->icpua = id;
2220         spin_lock_init(&vcpu->arch.local_int.lock);
2221         vcpu->arch.local_int.float_int = &kvm->arch.float_int;
2222         vcpu->arch.local_int.wq = &vcpu->wq;
2223         vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
2224         seqcount_init(&vcpu->arch.cputm_seqcount);
2225
2226         rc = kvm_vcpu_init(vcpu, kvm, id);
2227         if (rc)
2228                 goto out_free_sie_block;
2229         VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2230                  vcpu->arch.sie_block);
2231         trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2232
2233         return vcpu;
2234 out_free_sie_block:
2235         free_page((unsigned long)(vcpu->arch.sie_block));
2236 out_free_cpu:
2237         kmem_cache_free(kvm_vcpu_cache, vcpu);
2238 out:
2239         return ERR_PTR(rc);
2240 }
2241
2242 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2243 {
2244         return kvm_s390_vcpu_has_irq(vcpu, 0);
2245 }
2246
2247 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2248 {
2249         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2250         exit_sie(vcpu);
2251 }
2252
2253 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2254 {
2255         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2256 }
2257
2258 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2259 {
2260         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2261         exit_sie(vcpu);
2262 }
2263
2264 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2265 {
2266         atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2267 }
2268
2269 /*
2270  * Kick a guest cpu out of SIE and wait until SIE is not running.
2271  * If the CPU is not running (e.g. waiting as idle) the function will
2272  * return immediately. */
2273 void exit_sie(struct kvm_vcpu *vcpu)
2274 {
2275         atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
2276         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2277                 cpu_relax();
2278 }
2279
2280 /* Kick a guest cpu out of SIE to process a request synchronously */
2281 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2282 {
2283         kvm_make_request(req, vcpu);
2284         kvm_s390_vcpu_request(vcpu);
2285 }
2286
2287 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2288                               unsigned long end)
2289 {
2290         struct kvm *kvm = gmap->private;
2291         struct kvm_vcpu *vcpu;
2292         unsigned long prefix;
2293         int i;
2294
2295         if (gmap_is_shadow(gmap))
2296                 return;
2297         if (start >= 1UL << 31)
2298                 /* We are only interested in prefix pages */
2299                 return;
2300         kvm_for_each_vcpu(i, vcpu, kvm) {
2301                 /* match against both prefix pages */
2302                 prefix = kvm_s390_get_prefix(vcpu);
2303                 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2304                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2305                                    start, end);
2306                         kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2307                 }
2308         }
2309 }
2310
2311 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2312 {
2313         /* kvm common code refers to this, but never calls it */
2314         BUG();
2315         return 0;
2316 }
2317
2318 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2319                                            struct kvm_one_reg *reg)
2320 {
2321         int r = -EINVAL;
2322
2323         switch (reg->id) {
2324         case KVM_REG_S390_TODPR:
2325                 r = put_user(vcpu->arch.sie_block->todpr,
2326                              (u32 __user *)reg->addr);
2327                 break;
2328         case KVM_REG_S390_EPOCHDIFF:
2329                 r = put_user(vcpu->arch.sie_block->epoch,
2330                              (u64 __user *)reg->addr);
2331                 break;
2332         case KVM_REG_S390_CPU_TIMER:
2333                 r = put_user(kvm_s390_get_cpu_timer(vcpu),
2334                              (u64 __user *)reg->addr);
2335                 break;
2336         case KVM_REG_S390_CLOCK_COMP:
2337                 r = put_user(vcpu->arch.sie_block->ckc,
2338                              (u64 __user *)reg->addr);
2339                 break;
2340         case KVM_REG_S390_PFTOKEN:
2341                 r = put_user(vcpu->arch.pfault_token,
2342                              (u64 __user *)reg->addr);
2343                 break;
2344         case KVM_REG_S390_PFCOMPARE:
2345                 r = put_user(vcpu->arch.pfault_compare,
2346                              (u64 __user *)reg->addr);
2347                 break;
2348         case KVM_REG_S390_PFSELECT:
2349                 r = put_user(vcpu->arch.pfault_select,
2350                              (u64 __user *)reg->addr);
2351                 break;
2352         case KVM_REG_S390_PP:
2353                 r = put_user(vcpu->arch.sie_block->pp,
2354                              (u64 __user *)reg->addr);
2355                 break;
2356         case KVM_REG_S390_GBEA:
2357                 r = put_user(vcpu->arch.sie_block->gbea,
2358                              (u64 __user *)reg->addr);
2359                 break;
2360         default:
2361                 break;
2362         }
2363
2364         return r;
2365 }
2366
2367 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2368                                            struct kvm_one_reg *reg)
2369 {
2370         int r = -EINVAL;
2371         __u64 val;
2372
2373         switch (reg->id) {
2374         case KVM_REG_S390_TODPR:
2375                 r = get_user(vcpu->arch.sie_block->todpr,
2376                              (u32 __user *)reg->addr);
2377                 break;
2378         case KVM_REG_S390_EPOCHDIFF:
2379                 r = get_user(vcpu->arch.sie_block->epoch,
2380                              (u64 __user *)reg->addr);
2381                 break;
2382         case KVM_REG_S390_CPU_TIMER:
2383                 r = get_user(val, (u64 __user *)reg->addr);
2384                 if (!r)
2385                         kvm_s390_set_cpu_timer(vcpu, val);
2386                 break;
2387         case KVM_REG_S390_CLOCK_COMP:
2388                 r = get_user(vcpu->arch.sie_block->ckc,
2389                              (u64 __user *)reg->addr);
2390                 break;
2391         case KVM_REG_S390_PFTOKEN:
2392                 r = get_user(vcpu->arch.pfault_token,
2393                              (u64 __user *)reg->addr);
2394                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2395                         kvm_clear_async_pf_completion_queue(vcpu);
2396                 break;
2397         case KVM_REG_S390_PFCOMPARE:
2398                 r = get_user(vcpu->arch.pfault_compare,
2399                              (u64 __user *)reg->addr);
2400                 break;
2401         case KVM_REG_S390_PFSELECT:
2402                 r = get_user(vcpu->arch.pfault_select,
2403                              (u64 __user *)reg->addr);
2404                 break;
2405         case KVM_REG_S390_PP:
2406                 r = get_user(vcpu->arch.sie_block->pp,
2407                              (u64 __user *)reg->addr);
2408                 break;
2409         case KVM_REG_S390_GBEA:
2410                 r = get_user(vcpu->arch.sie_block->gbea,
2411                              (u64 __user *)reg->addr);
2412                 break;
2413         default:
2414                 break;
2415         }
2416
2417         return r;
2418 }
2419
2420 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2421 {
2422         kvm_s390_vcpu_initial_reset(vcpu);
2423         return 0;
2424 }
2425
2426 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2427 {
2428         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
2429         return 0;
2430 }
2431
2432 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2433 {
2434         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2435         return 0;
2436 }
2437
2438 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2439                                   struct kvm_sregs *sregs)
2440 {
2441         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2442         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2443         return 0;
2444 }
2445
2446 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2447                                   struct kvm_sregs *sregs)
2448 {
2449         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2450         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2451         return 0;
2452 }
2453
2454 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2455 {
2456         if (test_fp_ctl(fpu->fpc))
2457                 return -EINVAL;
2458         vcpu->run->s.regs.fpc = fpu->fpc;
2459         if (MACHINE_HAS_VX)
2460                 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
2461                                  (freg_t *) fpu->fprs);
2462         else
2463                 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
2464         return 0;
2465 }
2466
2467 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2468 {
2469         /* make sure we have the latest values */
2470         save_fpu_regs();
2471         if (MACHINE_HAS_VX)
2472                 convert_vx_to_fp((freg_t *) fpu->fprs,
2473                                  (__vector128 *) vcpu->run->s.regs.vrs);
2474         else
2475                 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
2476         fpu->fpc = vcpu->run->s.regs.fpc;
2477         return 0;
2478 }
2479
2480 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2481 {
2482         int rc = 0;
2483
2484         if (!is_vcpu_stopped(vcpu))
2485                 rc = -EBUSY;
2486         else {
2487                 vcpu->run->psw_mask = psw.mask;
2488                 vcpu->run->psw_addr = psw.addr;
2489         }
2490         return rc;
2491 }
2492
2493 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2494                                   struct kvm_translation *tr)
2495 {
2496         return -EINVAL; /* not implemented yet */
2497 }
2498
2499 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2500                               KVM_GUESTDBG_USE_HW_BP | \
2501                               KVM_GUESTDBG_ENABLE)
2502
2503 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2504                                         struct kvm_guest_debug *dbg)
2505 {
2506         int rc = 0;
2507
2508         vcpu->guest_debug = 0;
2509         kvm_s390_clear_bp_data(vcpu);
2510
2511         if (dbg->control & ~VALID_GUESTDBG_FLAGS)
2512                 return -EINVAL;
2513         if (!sclp.has_gpere)
2514                 return -EINVAL;
2515
2516         if (dbg->control & KVM_GUESTDBG_ENABLE) {
2517                 vcpu->guest_debug = dbg->control;
2518                 /* enforce guest PER */
2519                 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2520
2521                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2522                         rc = kvm_s390_import_bp_data(vcpu, dbg);
2523         } else {
2524                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2525                 vcpu->arch.guestdbg.last_bp = 0;
2526         }
2527
2528         if (rc) {
2529                 vcpu->guest_debug = 0;
2530                 kvm_s390_clear_bp_data(vcpu);
2531                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2532         }
2533
2534         return rc;
2535 }
2536
2537 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2538                                     struct kvm_mp_state *mp_state)
2539 {
2540         /* CHECK_STOP and LOAD are not supported yet */
2541         return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2542                                        KVM_MP_STATE_OPERATING;
2543 }
2544
2545 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2546                                     struct kvm_mp_state *mp_state)
2547 {
2548         int rc = 0;
2549
2550         /* user space knows about this interface - let it control the state */
2551         vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2552
2553         switch (mp_state->mp_state) {
2554         case KVM_MP_STATE_STOPPED:
2555                 kvm_s390_vcpu_stop(vcpu);
2556                 break;
2557         case KVM_MP_STATE_OPERATING:
2558                 kvm_s390_vcpu_start(vcpu);
2559                 break;
2560         case KVM_MP_STATE_LOAD:
2561         case KVM_MP_STATE_CHECK_STOP:
2562                 /* fall through - CHECK_STOP and LOAD are not supported yet */
2563         default:
2564                 rc = -ENXIO;
2565         }
2566
2567         return rc;
2568 }
2569
2570 static bool ibs_enabled(struct kvm_vcpu *vcpu)
2571 {
2572         return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
2573 }
2574
2575 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2576 {
2577 retry:
2578         kvm_s390_vcpu_request_handled(vcpu);
2579         if (!vcpu->requests)
2580                 return 0;
2581         /*
2582          * We use MMU_RELOAD just to re-arm the ipte notifier for the
2583          * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
2584          * This ensures that the ipte instruction for this request has
2585          * already finished. We might race against a second unmapper that
2586          * wants to set the blocking bit. Lets just retry the request loop.
2587          */
2588         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2589                 int rc;
2590                 rc = gmap_mprotect_notify(vcpu->arch.gmap,
2591                                           kvm_s390_get_prefix(vcpu),
2592                                           PAGE_SIZE * 2, PROT_WRITE);
2593                 if (rc) {
2594                         kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
2595                         return rc;
2596                 }
2597                 goto retry;
2598         }
2599
2600         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2601                 vcpu->arch.sie_block->ihcpu = 0xffff;
2602                 goto retry;
2603         }
2604
2605         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
2606                 if (!ibs_enabled(vcpu)) {
2607                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
2608                         atomic_or(CPUSTAT_IBS,
2609                                         &vcpu->arch.sie_block->cpuflags);
2610                 }
2611                 goto retry;
2612         }
2613
2614         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2615                 if (ibs_enabled(vcpu)) {
2616                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2617                         atomic_andnot(CPUSTAT_IBS,
2618                                           &vcpu->arch.sie_block->cpuflags);
2619                 }
2620                 goto retry;
2621         }
2622
2623         if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
2624                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2625                 goto retry;
2626         }
2627
2628         if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
2629                 /*
2630                  * Disable CMMA virtualization; we will emulate the ESSA
2631                  * instruction manually, in order to provide additional
2632                  * functionalities needed for live migration.
2633                  */
2634                 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
2635                 goto retry;
2636         }
2637
2638         if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
2639                 /*
2640                  * Re-enable CMMA virtualization if CMMA is available and
2641                  * was used.
2642                  */
2643                 if ((vcpu->kvm->arch.use_cmma) &&
2644                     (vcpu->kvm->mm->context.use_cmma))
2645                         vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
2646                 goto retry;
2647         }
2648
2649         /* nothing to do, just clear the request */
2650         kvm_clear_request(KVM_REQ_UNHALT, vcpu);
2651
2652         return 0;
2653 }
2654
2655 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2656 {
2657         struct kvm_vcpu *vcpu;
2658         int i;
2659
2660         mutex_lock(&kvm->lock);
2661         preempt_disable();
2662         kvm->arch.epoch = tod - get_tod_clock();
2663         kvm_s390_vcpu_block_all(kvm);
2664         kvm_for_each_vcpu(i, vcpu, kvm)
2665                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2666         kvm_s390_vcpu_unblock_all(kvm);
2667         preempt_enable();
2668         mutex_unlock(&kvm->lock);
2669 }
2670
2671 /**
2672  * kvm_arch_fault_in_page - fault-in guest page if necessary
2673  * @vcpu: The corresponding virtual cpu
2674  * @gpa: Guest physical address
2675  * @writable: Whether the page should be writable or not
2676  *
2677  * Make sure that a guest page has been faulted-in on the host.
2678  *
2679  * Return: Zero on success, negative error code otherwise.
2680  */
2681 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
2682 {
2683         return gmap_fault(vcpu->arch.gmap, gpa,
2684                           writable ? FAULT_FLAG_WRITE : 0);
2685 }
2686
2687 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
2688                                       unsigned long token)
2689 {
2690         struct kvm_s390_interrupt inti;
2691         struct kvm_s390_irq irq;
2692
2693         if (start_token) {
2694                 irq.u.ext.ext_params2 = token;
2695                 irq.type = KVM_S390_INT_PFAULT_INIT;
2696                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
2697         } else {
2698                 inti.type = KVM_S390_INT_PFAULT_DONE;
2699                 inti.parm64 = token;
2700                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
2701         }
2702 }
2703
2704 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
2705                                      struct kvm_async_pf *work)
2706 {
2707         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
2708         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
2709 }
2710
2711 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
2712                                  struct kvm_async_pf *work)
2713 {
2714         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
2715         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
2716 }
2717
2718 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
2719                                struct kvm_async_pf *work)
2720 {
2721         /* s390 will always inject the page directly */
2722 }
2723
2724 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
2725 {
2726         /*
2727          * s390 will always inject the page directly,
2728          * but we still want check_async_completion to cleanup
2729          */
2730         return true;
2731 }
2732
2733 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
2734 {
2735         hva_t hva;
2736         struct kvm_arch_async_pf arch;
2737         int rc;
2738
2739         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2740                 return 0;
2741         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
2742             vcpu->arch.pfault_compare)
2743                 return 0;
2744         if (psw_extint_disabled(vcpu))
2745                 return 0;
2746         if (kvm_s390_vcpu_has_irq(vcpu, 0))
2747                 return 0;
2748         if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
2749                 return 0;
2750         if (!vcpu->arch.gmap->pfault_enabled)
2751                 return 0;
2752
2753         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
2754         hva += current->thread.gmap_addr & ~PAGE_MASK;
2755         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
2756                 return 0;
2757
2758         rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
2759         return rc;
2760 }
2761
2762 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
2763 {
2764         int rc, cpuflags;
2765
2766         /*
2767          * On s390 notifications for arriving pages will be delivered directly
2768          * to the guest but the house keeping for completed pfaults is
2769          * handled outside the worker.
2770          */
2771         kvm_check_async_pf_completion(vcpu);
2772
2773         vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
2774         vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
2775
2776         if (need_resched())
2777                 schedule();
2778
2779         if (test_cpu_flag(CIF_MCCK_PENDING))
2780                 s390_handle_mcck();
2781
2782         if (!kvm_is_ucontrol(vcpu->kvm)) {
2783                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
2784                 if (rc)
2785                         return rc;
2786         }
2787
2788         rc = kvm_s390_handle_requests(vcpu);
2789         if (rc)
2790                 return rc;
2791
2792         if (guestdbg_enabled(vcpu)) {
2793                 kvm_s390_backup_guest_per_regs(vcpu);
2794                 kvm_s390_patch_guest_per_regs(vcpu);
2795         }
2796
2797         vcpu->arch.sie_block->icptcode = 0;
2798         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
2799         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
2800         trace_kvm_s390_sie_enter(vcpu, cpuflags);
2801
2802         return 0;
2803 }
2804
2805 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
2806 {
2807         struct kvm_s390_pgm_info pgm_info = {
2808                 .code = PGM_ADDRESSING,
2809         };
2810         u8 opcode, ilen;
2811         int rc;
2812
2813         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
2814         trace_kvm_s390_sie_fault(vcpu);
2815
2816         /*
2817          * We want to inject an addressing exception, which is defined as a
2818          * suppressing or terminating exception. However, since we came here
2819          * by a DAT access exception, the PSW still points to the faulting
2820          * instruction since DAT exceptions are nullifying. So we've got
2821          * to look up the current opcode to get the length of the instruction
2822          * to be able to forward the PSW.
2823          */
2824         rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
2825         ilen = insn_length(opcode);
2826         if (rc < 0) {
2827                 return rc;
2828         } else if (rc) {
2829                 /* Instruction-Fetching Exceptions - we can't detect the ilen.
2830                  * Forward by arbitrary ilc, injection will take care of
2831                  * nullification if necessary.
2832                  */
2833                 pgm_info = vcpu->arch.pgm;
2834                 ilen = 4;
2835         }
2836         pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
2837         kvm_s390_forward_psw(vcpu, ilen);
2838         return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
2839 }
2840
2841 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
2842 {
2843         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
2844                    vcpu->arch.sie_block->icptcode);
2845         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
2846
2847         if (guestdbg_enabled(vcpu))
2848                 kvm_s390_restore_guest_per_regs(vcpu);
2849
2850         vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
2851         vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
2852
2853         if (vcpu->arch.sie_block->icptcode > 0) {
2854                 int rc = kvm_handle_sie_intercept(vcpu);
2855
2856                 if (rc != -EOPNOTSUPP)
2857                         return rc;
2858                 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
2859                 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
2860                 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
2861                 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
2862                 return -EREMOTE;
2863         } else if (exit_reason != -EFAULT) {
2864                 vcpu->stat.exit_null++;
2865                 return 0;
2866         } else if (kvm_is_ucontrol(vcpu->kvm)) {
2867                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
2868                 vcpu->run->s390_ucontrol.trans_exc_code =
2869                                                 current->thread.gmap_addr;
2870                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
2871                 return -EREMOTE;
2872         } else if (current->thread.gmap_pfault) {
2873                 trace_kvm_s390_major_guest_pfault(vcpu);
2874                 current->thread.gmap_pfault = 0;
2875                 if (kvm_arch_setup_async_pf(vcpu))
2876                         return 0;
2877                 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
2878         }
2879         return vcpu_post_run_fault_in_sie(vcpu);
2880 }
2881
2882 static int __vcpu_run(struct kvm_vcpu *vcpu)
2883 {
2884         int rc, exit_reason;
2885
2886         /*
2887          * We try to hold kvm->srcu during most of vcpu_run (except when run-
2888          * ning the guest), so that memslots (and other stuff) are protected
2889          */
2890         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2891
2892         do {
2893                 rc = vcpu_pre_run(vcpu);
2894                 if (rc)
2895                         break;
2896
2897                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2898                 /*
2899                  * As PF_VCPU will be used in fault handler, between
2900                  * guest_enter and guest_exit should be no uaccess.
2901                  */
2902                 local_irq_disable();
2903                 guest_enter_irqoff();
2904                 __disable_cpu_timer_accounting(vcpu);
2905                 local_irq_enable();
2906                 exit_reason = sie64a(vcpu->arch.sie_block,
2907                                      vcpu->run->s.regs.gprs);
2908                 local_irq_disable();
2909                 __enable_cpu_timer_accounting(vcpu);
2910                 guest_exit_irqoff();
2911                 local_irq_enable();
2912                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2913
2914                 rc = vcpu_post_run(vcpu, exit_reason);
2915         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
2916
2917         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2918         return rc;
2919 }
2920
2921 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2922 {
2923         struct runtime_instr_cb *riccb;
2924         struct gs_cb *gscb;
2925
2926         riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
2927         gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
2928         vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
2929         vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
2930         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
2931                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
2932         if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
2933                 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
2934                 /* some control register changes require a tlb flush */
2935                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2936         }
2937         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
2938                 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
2939                 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
2940                 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
2941                 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
2942                 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
2943         }
2944         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
2945                 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
2946                 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
2947                 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
2948                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2949                         kvm_clear_async_pf_completion_queue(vcpu);
2950         }
2951         /*
2952          * If userspace sets the riccb (e.g. after migration) to a valid state,
2953          * we should enable RI here instead of doing the lazy enablement.
2954          */
2955         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
2956             test_kvm_facility(vcpu->kvm, 64) &&
2957             riccb->valid &&
2958             !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
2959                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
2960                 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
2961         }
2962         /*
2963          * If userspace sets the gscb (e.g. after migration) to non-zero,
2964          * we should enable GS here instead of doing the lazy enablement.
2965          */
2966         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
2967             test_kvm_facility(vcpu->kvm, 133) &&
2968             gscb->gssm &&
2969             !vcpu->arch.gs_enabled) {
2970                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
2971                 vcpu->arch.sie_block->ecb |= ECB_GS;
2972                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
2973                 vcpu->arch.gs_enabled = 1;
2974         }
2975         save_access_regs(vcpu->arch.host_acrs);
2976         restore_access_regs(vcpu->run->s.regs.acrs);
2977         /* save host (userspace) fprs/vrs */
2978         save_fpu_regs();
2979         vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
2980         vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
2981         if (MACHINE_HAS_VX)
2982                 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
2983         else
2984                 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
2985         current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
2986         if (test_fp_ctl(current->thread.fpu.fpc))
2987                 /* User space provided an invalid FPC, let's clear it */
2988                 current->thread.fpu.fpc = 0;
2989         if (MACHINE_HAS_GS) {
2990                 preempt_disable();
2991                 __ctl_set_bit(2, 4);
2992                 if (current->thread.gs_cb) {
2993                         vcpu->arch.host_gscb = current->thread.gs_cb;
2994                         save_gs_cb(vcpu->arch.host_gscb);
2995                 }
2996                 if (vcpu->arch.gs_enabled) {
2997                         current->thread.gs_cb = (struct gs_cb *)
2998                                                 &vcpu->run->s.regs.gscb;
2999                         restore_gs_cb(current->thread.gs_cb);
3000                 }
3001                 preempt_enable();
3002         }
3003
3004         kvm_run->kvm_dirty_regs = 0;
3005 }
3006
3007 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3008 {
3009         kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
3010         kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
3011         kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
3012         memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
3013         kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
3014         kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
3015         kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
3016         kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
3017         kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
3018         kvm_run->s.regs.pft = vcpu->arch.pfault_token;
3019         kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
3020         kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
3021         save_access_regs(vcpu->run->s.regs.acrs);
3022         restore_access_regs(vcpu->arch.host_acrs);
3023         /* Save guest register state */
3024         save_fpu_regs();
3025         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3026         /* Restore will be done lazily at return */
3027         current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
3028         current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
3029         if (MACHINE_HAS_GS) {
3030                 __ctl_set_bit(2, 4);
3031                 if (vcpu->arch.gs_enabled)
3032                         save_gs_cb(current->thread.gs_cb);
3033                 preempt_disable();
3034                 current->thread.gs_cb = vcpu->arch.host_gscb;
3035                 restore_gs_cb(vcpu->arch.host_gscb);
3036                 preempt_enable();
3037                 if (!vcpu->arch.host_gscb)
3038                         __ctl_clear_bit(2, 4);
3039                 vcpu->arch.host_gscb = NULL;
3040         }
3041
3042 }
3043
3044 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3045 {
3046         int rc;
3047         sigset_t sigsaved;
3048
3049         if (kvm_run->immediate_exit)
3050                 return -EINTR;
3051
3052         if (guestdbg_exit_pending(vcpu)) {
3053                 kvm_s390_prepare_debug_exit(vcpu);
3054                 return 0;
3055         }
3056
3057         if (vcpu->sigset_active)
3058                 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
3059
3060         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
3061                 kvm_s390_vcpu_start(vcpu);
3062         } else if (is_vcpu_stopped(vcpu)) {
3063                 pr_err_ratelimited("can't run stopped vcpu %d\n",
3064                                    vcpu->vcpu_id);
3065                 return -EINVAL;
3066         }
3067
3068         sync_regs(vcpu, kvm_run);
3069         enable_cpu_timer_accounting(vcpu);
3070
3071         might_fault();
3072         rc = __vcpu_run(vcpu);
3073
3074         if (signal_pending(current) && !rc) {
3075                 kvm_run->exit_reason = KVM_EXIT_INTR;
3076                 rc = -EINTR;
3077         }
3078
3079         if (guestdbg_exit_pending(vcpu) && !rc)  {
3080                 kvm_s390_prepare_debug_exit(vcpu);
3081                 rc = 0;
3082         }
3083
3084         if (rc == -EREMOTE) {
3085                 /* userspace support is needed, kvm_run has been prepared */
3086                 rc = 0;
3087         }
3088
3089         disable_cpu_timer_accounting(vcpu);
3090         store_regs(vcpu, kvm_run);
3091
3092         if (vcpu->sigset_active)
3093                 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
3094
3095         vcpu->stat.exit_userspace++;
3096         return rc;
3097 }
3098
3099 /*
3100  * store status at address
3101  * we use have two special cases:
3102  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
3103  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
3104  */
3105 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
3106 {
3107         unsigned char archmode = 1;
3108         freg_t fprs[NUM_FPRS];
3109         unsigned int px;
3110         u64 clkcomp, cputm;
3111         int rc;
3112
3113         px = kvm_s390_get_prefix(vcpu);
3114         if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
3115                 if (write_guest_abs(vcpu, 163, &archmode, 1))
3116                         return -EFAULT;
3117                 gpa = 0;
3118         } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
3119                 if (write_guest_real(vcpu, 163, &archmode, 1))
3120                         return -EFAULT;
3121                 gpa = px;
3122         } else
3123                 gpa -= __LC_FPREGS_SAVE_AREA;
3124
3125         /* manually convert vector registers if necessary */
3126         if (MACHINE_HAS_VX) {
3127                 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
3128                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3129                                      fprs, 128);
3130         } else {
3131                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3132                                      vcpu->run->s.regs.fprs, 128);
3133         }
3134         rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
3135                               vcpu->run->s.regs.gprs, 128);
3136         rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
3137                               &vcpu->arch.sie_block->gpsw, 16);
3138         rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
3139                               &px, 4);
3140         rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
3141                               &vcpu->run->s.regs.fpc, 4);
3142         rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
3143                               &vcpu->arch.sie_block->todpr, 4);
3144         cputm = kvm_s390_get_cpu_timer(vcpu);
3145         rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
3146                               &cputm, 8);
3147         clkcomp = vcpu->arch.sie_block->ckc >> 8;
3148         rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
3149                               &clkcomp, 8);
3150         rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
3151                               &vcpu->run->s.regs.acrs, 64);
3152         rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
3153                               &vcpu->arch.sie_block->gcr, 128);
3154         return rc ? -EFAULT : 0;
3155 }
3156
3157 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
3158 {
3159         /*
3160          * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
3161          * switch in the run ioctl. Let's update our copies before we save
3162          * it into the save area
3163          */
3164         save_fpu_regs();
3165         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3166         save_access_regs(vcpu->run->s.regs.acrs);
3167
3168         return kvm_s390_store_status_unloaded(vcpu, addr);
3169 }
3170
3171 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3172 {
3173         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
3174         kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
3175 }
3176
3177 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
3178 {
3179         unsigned int i;
3180         struct kvm_vcpu *vcpu;
3181
3182         kvm_for_each_vcpu(i, vcpu, kvm) {
3183                 __disable_ibs_on_vcpu(vcpu);
3184         }
3185 }
3186
3187 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3188 {
3189         if (!sclp.has_ibs)
3190                 return;
3191         kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
3192         kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
3193 }
3194
3195 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
3196 {
3197         int i, online_vcpus, started_vcpus = 0;
3198
3199         if (!is_vcpu_stopped(vcpu))
3200                 return;
3201
3202         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
3203         /* Only one cpu at a time may enter/leave the STOPPED state. */
3204         spin_lock(&vcpu->kvm->arch.start_stop_lock);
3205         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3206
3207         for (i = 0; i < online_vcpus; i++) {
3208                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
3209                         started_vcpus++;
3210         }
3211
3212         if (started_vcpus == 0) {
3213                 /* we're the only active VCPU -> speed it up */
3214                 __enable_ibs_on_vcpu(vcpu);
3215         } else if (started_vcpus == 1) {
3216                 /*
3217                  * As we are starting a second VCPU, we have to disable
3218                  * the IBS facility on all VCPUs to remove potentially
3219                  * oustanding ENABLE requests.
3220                  */
3221                 __disable_ibs_on_all_vcpus(vcpu->kvm);
3222         }
3223
3224         atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
3225         /*
3226          * Another VCPU might have used IBS while we were offline.
3227          * Let's play safe and flush the VCPU at startup.
3228          */
3229         kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3230         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3231         return;
3232 }
3233
3234 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
3235 {
3236         int i, online_vcpus, started_vcpus = 0;
3237         struct kvm_vcpu *started_vcpu = NULL;
3238
3239         if (is_vcpu_stopped(vcpu))
3240                 return;
3241
3242         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
3243         /* Only one cpu at a time may enter/leave the STOPPED state. */
3244         spin_lock(&vcpu->kvm->arch.start_stop_lock);
3245         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3246
3247         /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
3248         kvm_s390_clear_stop_irq(vcpu);
3249
3250         atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
3251         __disable_ibs_on_vcpu(vcpu);
3252
3253         for (i = 0; i < online_vcpus; i++) {
3254                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
3255                         started_vcpus++;
3256                         started_vcpu = vcpu->kvm->vcpus[i];
3257                 }
3258         }
3259
3260         if (started_vcpus == 1) {
3261                 /*
3262                  * As we only have one VCPU left, we want to enable the
3263                  * IBS facility for that VCPU to speed it up.
3264                  */
3265                 __enable_ibs_on_vcpu(started_vcpu);
3266         }
3267
3268         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3269         return;
3270 }
3271
3272 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
3273                                      struct kvm_enable_cap *cap)
3274 {
3275         int r;
3276
3277         if (cap->flags)
3278                 return -EINVAL;
3279
3280         switch (cap->cap) {
3281         case KVM_CAP_S390_CSS_SUPPORT:
3282                 if (!vcpu->kvm->arch.css_support) {
3283                         vcpu->kvm->arch.css_support = 1;
3284                         VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
3285                         trace_kvm_s390_enable_css(vcpu->kvm);
3286                 }
3287                 r = 0;
3288                 break;
3289         default:
3290                 r = -EINVAL;
3291                 break;
3292         }
3293         return r;
3294 }
3295
3296 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3297                                   struct kvm_s390_mem_op *mop)
3298 {
3299         void __user *uaddr = (void __user *)mop->buf;
3300         void *tmpbuf = NULL;
3301         int r, srcu_idx;
3302         const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3303                                     | KVM_S390_MEMOP_F_CHECK_ONLY;
3304
3305         if (mop->flags & ~supported_flags)
3306                 return -EINVAL;
3307
3308         if (mop->size > MEM_OP_MAX_SIZE)
3309                 return -E2BIG;
3310
3311         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3312                 tmpbuf = vmalloc(mop->size);
3313                 if (!tmpbuf)
3314                         return -ENOMEM;
3315         }
3316
3317         srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3318
3319         switch (mop->op) {
3320         case KVM_S390_MEMOP_LOGICAL_READ:
3321                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3322                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3323                                             mop->size, GACC_FETCH);
3324                         break;
3325                 }
3326                 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3327                 if (r == 0) {
3328                         if (copy_to_user(uaddr, tmpbuf, mop->size))
3329                                 r = -EFAULT;
3330                 }
3331                 break;
3332         case KVM_S390_MEMOP_LOGICAL_WRITE:
3333                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3334                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3335                                             mop->size, GACC_STORE);
3336                         break;
3337                 }
3338                 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
3339                         r = -EFAULT;
3340                         break;
3341                 }
3342                 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3343                 break;
3344         default:
3345                 r = -EINVAL;
3346         }
3347
3348         srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
3349
3350         if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
3351                 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
3352
3353         vfree(tmpbuf);
3354         return r;
3355 }
3356
3357 long kvm_arch_vcpu_ioctl(struct file *filp,
3358                          unsigned int ioctl, unsigned long arg)
3359 {
3360         struct kvm_vcpu *vcpu = filp->private_data;
3361         void __user *argp = (void __user *)arg;
3362         int idx;
3363         long r;
3364
3365         switch (ioctl) {
3366         case KVM_S390_IRQ: {
3367                 struct kvm_s390_irq s390irq;
3368
3369                 r = -EFAULT;
3370                 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3371                         break;
3372                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3373                 break;
3374         }
3375         case KVM_S390_INTERRUPT: {
3376                 struct kvm_s390_interrupt s390int;
3377                 struct kvm_s390_irq s390irq;
3378
3379                 r = -EFAULT;
3380                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
3381                         break;
3382                 if (s390int_to_s390irq(&s390int, &s390irq))
3383                         return -EINVAL;
3384                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3385                 break;
3386         }
3387         case KVM_S390_STORE_STATUS:
3388                 idx = srcu_read_lock(&vcpu->kvm->srcu);
3389                 r = kvm_s390_vcpu_store_status(vcpu, arg);
3390                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
3391                 break;
3392         case KVM_S390_SET_INITIAL_PSW: {
3393                 psw_t psw;
3394
3395                 r = -EFAULT;
3396                 if (copy_from_user(&psw, argp, sizeof(psw)))
3397                         break;
3398                 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3399                 break;
3400         }
3401         case KVM_S390_INITIAL_RESET:
3402                 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3403                 break;
3404         case KVM_SET_ONE_REG:
3405         case KVM_GET_ONE_REG: {
3406                 struct kvm_one_reg reg;
3407                 r = -EFAULT;
3408                 if (copy_from_user(&reg, argp, sizeof(reg)))
3409                         break;
3410                 if (ioctl == KVM_SET_ONE_REG)
3411                         r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
3412                 else
3413                         r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
3414                 break;
3415         }
3416 #ifdef CONFIG_KVM_S390_UCONTROL
3417         case KVM_S390_UCAS_MAP: {
3418                 struct kvm_s390_ucas_mapping ucasmap;
3419
3420                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3421                         r = -EFAULT;
3422                         break;
3423                 }
3424
3425                 if (!kvm_is_ucontrol(vcpu->kvm)) {
3426                         r = -EINVAL;
3427                         break;
3428                 }
3429
3430                 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
3431                                      ucasmap.vcpu_addr, ucasmap.length);
3432                 break;
3433         }
3434         case KVM_S390_UCAS_UNMAP: {
3435                 struct kvm_s390_ucas_mapping ucasmap;
3436
3437                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3438                         r = -EFAULT;
3439                         break;
3440                 }
3441
3442                 if (!kvm_is_ucontrol(vcpu->kvm)) {
3443                         r = -EINVAL;
3444                         break;
3445                 }
3446
3447                 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
3448                         ucasmap.length);
3449                 break;
3450         }
3451 #endif
3452         case KVM_S390_VCPU_FAULT: {
3453                 r = gmap_fault(vcpu->arch.gmap, arg, 0);
3454                 break;
3455         }
3456         case KVM_ENABLE_CAP:
3457         {
3458                 struct kvm_enable_cap cap;
3459                 r = -EFAULT;
3460                 if (copy_from_user(&cap, argp, sizeof(cap)))
3461                         break;
3462                 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
3463                 break;
3464         }
3465         case KVM_S390_MEM_OP: {
3466                 struct kvm_s390_mem_op mem_op;
3467
3468                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3469                         r = kvm_s390_guest_mem_op(vcpu, &mem_op);
3470                 else
3471                         r = -EFAULT;
3472                 break;
3473         }
3474         case KVM_S390_SET_IRQ_STATE: {
3475                 struct kvm_s390_irq_state irq_state;
3476
3477                 r = -EFAULT;
3478                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3479                         break;
3480                 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
3481                     irq_state.len == 0 ||
3482                     irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
3483                         r = -EINVAL;
3484                         break;
3485                 }
3486                 r = kvm_s390_set_irq_state(vcpu,
3487                                            (void __user *) irq_state.buf,
3488                                            irq_state.len);
3489                 break;
3490         }
3491         case KVM_S390_GET_IRQ_STATE: {
3492                 struct kvm_s390_irq_state irq_state;
3493
3494                 r = -EFAULT;
3495                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3496                         break;
3497                 if (irq_state.len == 0) {
3498                         r = -EINVAL;
3499                         break;
3500                 }
3501                 r = kvm_s390_get_irq_state(vcpu,
3502                                            (__u8 __user *)  irq_state.buf,
3503                                            irq_state.len);
3504                 break;
3505         }
3506         default:
3507                 r = -ENOTTY;
3508         }
3509         return r;
3510 }
3511
3512 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3513 {
3514 #ifdef CONFIG_KVM_S390_UCONTROL
3515         if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
3516                  && (kvm_is_ucontrol(vcpu->kvm))) {
3517                 vmf->page = virt_to_page(vcpu->arch.sie_block);
3518                 get_page(vmf->page);
3519                 return 0;
3520         }
3521 #endif
3522         return VM_FAULT_SIGBUS;
3523 }
3524
3525 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
3526                             unsigned long npages)
3527 {
3528         return 0;
3529 }
3530
3531 /* Section: memory related */
3532 int kvm_arch_prepare_memory_region(struct kvm *kvm,
3533                                    struct kvm_memory_slot *memslot,
3534                                    const struct kvm_userspace_memory_region *mem,
3535                                    enum kvm_mr_change change)
3536 {
3537         /* A few sanity checks. We can have memory slots which have to be
3538            located/ended at a segment boundary (1MB). The memory in userland is
3539            ok to be fragmented into various different vmas. It is okay to mmap()
3540            and munmap() stuff in this slot after doing this call at any time */
3541
3542         if (mem->userspace_addr & 0xffffful)
3543                 return -EINVAL;
3544
3545         if (mem->memory_size & 0xffffful)
3546                 return -EINVAL;
3547
3548         if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
3549                 return -EINVAL;
3550
3551         return 0;
3552 }
3553
3554 void kvm_arch_commit_memory_region(struct kvm *kvm,
3555                                 const struct kvm_userspace_memory_region *mem,
3556                                 const struct kvm_memory_slot *old,
3557                                 const struct kvm_memory_slot *new,
3558                                 enum kvm_mr_change change)
3559 {
3560         int rc;
3561
3562         /* If the basics of the memslot do not change, we do not want
3563          * to update the gmap. Every update causes several unnecessary
3564          * segment translation exceptions. This is usually handled just
3565          * fine by the normal fault handler + gmap, but it will also
3566          * cause faults on the prefix page of running guest CPUs.
3567          */
3568         if (old->userspace_addr == mem->userspace_addr &&
3569             old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
3570             old->npages * PAGE_SIZE == mem->memory_size)
3571                 return;
3572
3573         rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
3574                 mem->guest_phys_addr, mem->memory_size);
3575         if (rc)
3576                 pr_warn("failed to commit memory region\n");
3577         return;
3578 }
3579
3580 static inline unsigned long nonhyp_mask(int i)
3581 {
3582         unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
3583
3584         return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
3585 }
3586
3587 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
3588 {
3589         vcpu->valid_wakeup = false;
3590 }
3591
3592 static int __init kvm_s390_init(void)
3593 {
3594         int i;
3595
3596         if (!sclp.has_sief2) {
3597                 pr_info("SIE not available\n");
3598                 return -ENODEV;
3599         }
3600
3601         for (i = 0; i < 16; i++)
3602                 kvm_s390_fac_list_mask[i] |=
3603                         S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
3604
3605         return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
3606 }
3607
3608 static void __exit kvm_s390_exit(void)
3609 {
3610         kvm_exit();
3611 }
3612
3613 module_init(kvm_s390_init);
3614 module_exit(kvm_s390_exit);
3615
3616 /*
3617  * Enable autoloading of the kvm module.
3618  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
3619  * since x86 takes a different approach.
3620  */
3621 #include <linux/miscdevice.h>
3622 MODULE_ALIAS_MISCDEV(KVM_MINOR);
3623 MODULE_ALIAS("devname:kvm");