]> git.karo-electronics.de Git - karo-tx-linux.git/blob - arch/s390/kvm/kvm-s390.c
treewide: use kv[mz]alloc* rather than opencoded variants
[karo-tx-linux.git] / arch / s390 / kvm / kvm-s390.c
1 /*
2  * hosting zSeries kernel virtual machines
3  *
4  * Copyright IBM Corp. 2008, 2009
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License (version 2 only)
8  * as published by the Free Software Foundation.
9  *
10  *    Author(s): Carsten Otte <cotte@de.ibm.com>
11  *               Christian Borntraeger <borntraeger@de.ibm.com>
12  *               Heiko Carstens <heiko.carstens@de.ibm.com>
13  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
14  *               Jason J. Herne <jjherne@us.ibm.com>
15  */
16
17 #include <linux/compiler.h>
18 #include <linux/err.h>
19 #include <linux/fs.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/mman.h>
25 #include <linux/module.h>
26 #include <linux/moduleparam.h>
27 #include <linux/random.h>
28 #include <linux/slab.h>
29 #include <linux/timer.h>
30 #include <linux/vmalloc.h>
31 #include <linux/bitmap.h>
32 #include <linux/sched/signal.h>
33
34 #include <asm/asm-offsets.h>
35 #include <asm/lowcore.h>
36 #include <asm/stp.h>
37 #include <asm/pgtable.h>
38 #include <asm/gmap.h>
39 #include <asm/nmi.h>
40 #include <asm/switch_to.h>
41 #include <asm/isc.h>
42 #include <asm/sclp.h>
43 #include <asm/cpacf.h>
44 #include <asm/timex.h>
45 #include "kvm-s390.h"
46 #include "gaccess.h"
47
48 #define KMSG_COMPONENT "kvm-s390"
49 #undef pr_fmt
50 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
51
52 #define CREATE_TRACE_POINTS
53 #include "trace.h"
54 #include "trace-s390.h"
55
56 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
57 #define LOCAL_IRQS 32
58 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
59                            (KVM_MAX_VCPUS + LOCAL_IRQS))
60
61 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
62
63 struct kvm_stats_debugfs_item debugfs_entries[] = {
64         { "userspace_handled", VCPU_STAT(exit_userspace) },
65         { "exit_null", VCPU_STAT(exit_null) },
66         { "exit_validity", VCPU_STAT(exit_validity) },
67         { "exit_stop_request", VCPU_STAT(exit_stop_request) },
68         { "exit_external_request", VCPU_STAT(exit_external_request) },
69         { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
70         { "exit_instruction", VCPU_STAT(exit_instruction) },
71         { "exit_pei", VCPU_STAT(exit_pei) },
72         { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
73         { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
74         { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
75         { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
76         { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
77         { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
78         { "halt_wakeup", VCPU_STAT(halt_wakeup) },
79         { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
80         { "instruction_lctl", VCPU_STAT(instruction_lctl) },
81         { "instruction_stctl", VCPU_STAT(instruction_stctl) },
82         { "instruction_stctg", VCPU_STAT(instruction_stctg) },
83         { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
84         { "deliver_external_call", VCPU_STAT(deliver_external_call) },
85         { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
86         { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
87         { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
88         { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
89         { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
90         { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
91         { "exit_wait_state", VCPU_STAT(exit_wait_state) },
92         { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
93         { "instruction_stidp", VCPU_STAT(instruction_stidp) },
94         { "instruction_spx", VCPU_STAT(instruction_spx) },
95         { "instruction_stpx", VCPU_STAT(instruction_stpx) },
96         { "instruction_stap", VCPU_STAT(instruction_stap) },
97         { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
98         { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
99         { "instruction_stsch", VCPU_STAT(instruction_stsch) },
100         { "instruction_chsc", VCPU_STAT(instruction_chsc) },
101         { "instruction_essa", VCPU_STAT(instruction_essa) },
102         { "instruction_stsi", VCPU_STAT(instruction_stsi) },
103         { "instruction_stfl", VCPU_STAT(instruction_stfl) },
104         { "instruction_tprot", VCPU_STAT(instruction_tprot) },
105         { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
106         { "instruction_sie", VCPU_STAT(instruction_sie) },
107         { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
108         { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
109         { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
110         { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
111         { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
112         { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
113         { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
114         { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
115         { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
116         { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
117         { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
118         { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
119         { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
120         { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
121         { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
122         { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
123         { "diagnose_10", VCPU_STAT(diagnose_10) },
124         { "diagnose_44", VCPU_STAT(diagnose_44) },
125         { "diagnose_9c", VCPU_STAT(diagnose_9c) },
126         { "diagnose_258", VCPU_STAT(diagnose_258) },
127         { "diagnose_308", VCPU_STAT(diagnose_308) },
128         { "diagnose_500", VCPU_STAT(diagnose_500) },
129         { NULL }
130 };
131
132 /* allow nested virtualization in KVM (if enabled by user space) */
133 static int nested;
134 module_param(nested, int, S_IRUGO);
135 MODULE_PARM_DESC(nested, "Nested virtualization support");
136
137 /* upper facilities limit for kvm */
138 unsigned long kvm_s390_fac_list_mask[16] = { FACILITIES_KVM };
139
140 unsigned long kvm_s390_fac_list_mask_size(void)
141 {
142         BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
143         return ARRAY_SIZE(kvm_s390_fac_list_mask);
144 }
145
146 /* available cpu features supported by kvm */
147 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
148 /* available subfunctions indicated via query / "test bit" */
149 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
150
151 static struct gmap_notifier gmap_notifier;
152 static struct gmap_notifier vsie_gmap_notifier;
153 debug_info_t *kvm_s390_dbf;
154
155 /* Section: not file related */
156 int kvm_arch_hardware_enable(void)
157 {
158         /* every s390 is virtualization enabled ;-) */
159         return 0;
160 }
161
162 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
163                               unsigned long end);
164
165 /*
166  * This callback is executed during stop_machine(). All CPUs are therefore
167  * temporarily stopped. In order not to change guest behavior, we have to
168  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
169  * so a CPU won't be stopped while calculating with the epoch.
170  */
171 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
172                           void *v)
173 {
174         struct kvm *kvm;
175         struct kvm_vcpu *vcpu;
176         int i;
177         unsigned long long *delta = v;
178
179         list_for_each_entry(kvm, &vm_list, vm_list) {
180                 kvm->arch.epoch -= *delta;
181                 kvm_for_each_vcpu(i, vcpu, kvm) {
182                         vcpu->arch.sie_block->epoch -= *delta;
183                         if (vcpu->arch.cputm_enabled)
184                                 vcpu->arch.cputm_start += *delta;
185                         if (vcpu->arch.vsie_block)
186                                 vcpu->arch.vsie_block->epoch -= *delta;
187                 }
188         }
189         return NOTIFY_OK;
190 }
191
192 static struct notifier_block kvm_clock_notifier = {
193         .notifier_call = kvm_clock_sync,
194 };
195
196 int kvm_arch_hardware_setup(void)
197 {
198         gmap_notifier.notifier_call = kvm_gmap_notifier;
199         gmap_register_pte_notifier(&gmap_notifier);
200         vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
201         gmap_register_pte_notifier(&vsie_gmap_notifier);
202         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
203                                        &kvm_clock_notifier);
204         return 0;
205 }
206
207 void kvm_arch_hardware_unsetup(void)
208 {
209         gmap_unregister_pte_notifier(&gmap_notifier);
210         gmap_unregister_pte_notifier(&vsie_gmap_notifier);
211         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
212                                          &kvm_clock_notifier);
213 }
214
215 static void allow_cpu_feat(unsigned long nr)
216 {
217         set_bit_inv(nr, kvm_s390_available_cpu_feat);
218 }
219
220 static inline int plo_test_bit(unsigned char nr)
221 {
222         register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
223         int cc;
224
225         asm volatile(
226                 /* Parameter registers are ignored for "test bit" */
227                 "       plo     0,0,0,0(0)\n"
228                 "       ipm     %0\n"
229                 "       srl     %0,28\n"
230                 : "=d" (cc)
231                 : "d" (r0)
232                 : "cc");
233         return cc == 0;
234 }
235
236 static void kvm_s390_cpu_feat_init(void)
237 {
238         int i;
239
240         for (i = 0; i < 256; ++i) {
241                 if (plo_test_bit(i))
242                         kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
243         }
244
245         if (test_facility(28)) /* TOD-clock steering */
246                 ptff(kvm_s390_available_subfunc.ptff,
247                      sizeof(kvm_s390_available_subfunc.ptff),
248                      PTFF_QAF);
249
250         if (test_facility(17)) { /* MSA */
251                 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
252                               kvm_s390_available_subfunc.kmac);
253                 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
254                               kvm_s390_available_subfunc.kmc);
255                 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
256                               kvm_s390_available_subfunc.km);
257                 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
258                               kvm_s390_available_subfunc.kimd);
259                 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
260                               kvm_s390_available_subfunc.klmd);
261         }
262         if (test_facility(76)) /* MSA3 */
263                 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
264                               kvm_s390_available_subfunc.pckmo);
265         if (test_facility(77)) { /* MSA4 */
266                 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
267                               kvm_s390_available_subfunc.kmctr);
268                 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
269                               kvm_s390_available_subfunc.kmf);
270                 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
271                               kvm_s390_available_subfunc.kmo);
272                 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
273                               kvm_s390_available_subfunc.pcc);
274         }
275         if (test_facility(57)) /* MSA5 */
276                 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
277                               kvm_s390_available_subfunc.ppno);
278
279         if (MACHINE_HAS_ESOP)
280                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
281         /*
282          * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
283          * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
284          */
285         if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
286             !test_facility(3) || !nested)
287                 return;
288         allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
289         if (sclp.has_64bscao)
290                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
291         if (sclp.has_siif)
292                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
293         if (sclp.has_gpere)
294                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
295         if (sclp.has_gsls)
296                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
297         if (sclp.has_ib)
298                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
299         if (sclp.has_cei)
300                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
301         if (sclp.has_ibs)
302                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
303         /*
304          * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
305          * all skey handling functions read/set the skey from the PGSTE
306          * instead of the real storage key.
307          *
308          * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
309          * pages being detected as preserved although they are resident.
310          *
311          * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
312          * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
313          *
314          * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
315          * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
316          * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
317          *
318          * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
319          * cannot easily shadow the SCA because of the ipte lock.
320          */
321 }
322
323 int kvm_arch_init(void *opaque)
324 {
325         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
326         if (!kvm_s390_dbf)
327                 return -ENOMEM;
328
329         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
330                 debug_unregister(kvm_s390_dbf);
331                 return -ENOMEM;
332         }
333
334         kvm_s390_cpu_feat_init();
335
336         /* Register floating interrupt controller interface. */
337         return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
338 }
339
340 void kvm_arch_exit(void)
341 {
342         debug_unregister(kvm_s390_dbf);
343 }
344
345 /* Section: device related */
346 long kvm_arch_dev_ioctl(struct file *filp,
347                         unsigned int ioctl, unsigned long arg)
348 {
349         if (ioctl == KVM_S390_ENABLE_SIE)
350                 return s390_enable_sie();
351         return -EINVAL;
352 }
353
354 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
355 {
356         int r;
357
358         switch (ext) {
359         case KVM_CAP_S390_PSW:
360         case KVM_CAP_S390_GMAP:
361         case KVM_CAP_SYNC_MMU:
362 #ifdef CONFIG_KVM_S390_UCONTROL
363         case KVM_CAP_S390_UCONTROL:
364 #endif
365         case KVM_CAP_ASYNC_PF:
366         case KVM_CAP_SYNC_REGS:
367         case KVM_CAP_ONE_REG:
368         case KVM_CAP_ENABLE_CAP:
369         case KVM_CAP_S390_CSS_SUPPORT:
370         case KVM_CAP_IOEVENTFD:
371         case KVM_CAP_DEVICE_CTRL:
372         case KVM_CAP_ENABLE_CAP_VM:
373         case KVM_CAP_S390_IRQCHIP:
374         case KVM_CAP_VM_ATTRIBUTES:
375         case KVM_CAP_MP_STATE:
376         case KVM_CAP_IMMEDIATE_EXIT:
377         case KVM_CAP_S390_INJECT_IRQ:
378         case KVM_CAP_S390_USER_SIGP:
379         case KVM_CAP_S390_USER_STSI:
380         case KVM_CAP_S390_SKEYS:
381         case KVM_CAP_S390_IRQ_STATE:
382         case KVM_CAP_S390_USER_INSTR0:
383                 r = 1;
384                 break;
385         case KVM_CAP_S390_MEM_OP:
386                 r = MEM_OP_MAX_SIZE;
387                 break;
388         case KVM_CAP_NR_VCPUS:
389         case KVM_CAP_MAX_VCPUS:
390                 r = KVM_S390_BSCA_CPU_SLOTS;
391                 if (!kvm_s390_use_sca_entries())
392                         r = KVM_MAX_VCPUS;
393                 else if (sclp.has_esca && sclp.has_64bscao)
394                         r = KVM_S390_ESCA_CPU_SLOTS;
395                 break;
396         case KVM_CAP_NR_MEMSLOTS:
397                 r = KVM_USER_MEM_SLOTS;
398                 break;
399         case KVM_CAP_S390_COW:
400                 r = MACHINE_HAS_ESOP;
401                 break;
402         case KVM_CAP_S390_VECTOR_REGISTERS:
403                 r = MACHINE_HAS_VX;
404                 break;
405         case KVM_CAP_S390_RI:
406                 r = test_facility(64);
407                 break;
408         default:
409                 r = 0;
410         }
411         return r;
412 }
413
414 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
415                                         struct kvm_memory_slot *memslot)
416 {
417         gfn_t cur_gfn, last_gfn;
418         unsigned long address;
419         struct gmap *gmap = kvm->arch.gmap;
420
421         /* Loop over all guest pages */
422         last_gfn = memslot->base_gfn + memslot->npages;
423         for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
424                 address = gfn_to_hva_memslot(memslot, cur_gfn);
425
426                 if (test_and_clear_guest_dirty(gmap->mm, address))
427                         mark_page_dirty(kvm, cur_gfn);
428                 if (fatal_signal_pending(current))
429                         return;
430                 cond_resched();
431         }
432 }
433
434 /* Section: vm related */
435 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
436
437 /*
438  * Get (and clear) the dirty memory log for a memory slot.
439  */
440 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
441                                struct kvm_dirty_log *log)
442 {
443         int r;
444         unsigned long n;
445         struct kvm_memslots *slots;
446         struct kvm_memory_slot *memslot;
447         int is_dirty = 0;
448
449         if (kvm_is_ucontrol(kvm))
450                 return -EINVAL;
451
452         mutex_lock(&kvm->slots_lock);
453
454         r = -EINVAL;
455         if (log->slot >= KVM_USER_MEM_SLOTS)
456                 goto out;
457
458         slots = kvm_memslots(kvm);
459         memslot = id_to_memslot(slots, log->slot);
460         r = -ENOENT;
461         if (!memslot->dirty_bitmap)
462                 goto out;
463
464         kvm_s390_sync_dirty_log(kvm, memslot);
465         r = kvm_get_dirty_log(kvm, log, &is_dirty);
466         if (r)
467                 goto out;
468
469         /* Clear the dirty log */
470         if (is_dirty) {
471                 n = kvm_dirty_bitmap_bytes(memslot);
472                 memset(memslot->dirty_bitmap, 0, n);
473         }
474         r = 0;
475 out:
476         mutex_unlock(&kvm->slots_lock);
477         return r;
478 }
479
480 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
481 {
482         unsigned int i;
483         struct kvm_vcpu *vcpu;
484
485         kvm_for_each_vcpu(i, vcpu, kvm) {
486                 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
487         }
488 }
489
490 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
491 {
492         int r;
493
494         if (cap->flags)
495                 return -EINVAL;
496
497         switch (cap->cap) {
498         case KVM_CAP_S390_IRQCHIP:
499                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
500                 kvm->arch.use_irqchip = 1;
501                 r = 0;
502                 break;
503         case KVM_CAP_S390_USER_SIGP:
504                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
505                 kvm->arch.user_sigp = 1;
506                 r = 0;
507                 break;
508         case KVM_CAP_S390_VECTOR_REGISTERS:
509                 mutex_lock(&kvm->lock);
510                 if (kvm->created_vcpus) {
511                         r = -EBUSY;
512                 } else if (MACHINE_HAS_VX) {
513                         set_kvm_facility(kvm->arch.model.fac_mask, 129);
514                         set_kvm_facility(kvm->arch.model.fac_list, 129);
515                         if (test_facility(134)) {
516                                 set_kvm_facility(kvm->arch.model.fac_mask, 134);
517                                 set_kvm_facility(kvm->arch.model.fac_list, 134);
518                         }
519                         if (test_facility(135)) {
520                                 set_kvm_facility(kvm->arch.model.fac_mask, 135);
521                                 set_kvm_facility(kvm->arch.model.fac_list, 135);
522                         }
523                         r = 0;
524                 } else
525                         r = -EINVAL;
526                 mutex_unlock(&kvm->lock);
527                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
528                          r ? "(not available)" : "(success)");
529                 break;
530         case KVM_CAP_S390_RI:
531                 r = -EINVAL;
532                 mutex_lock(&kvm->lock);
533                 if (kvm->created_vcpus) {
534                         r = -EBUSY;
535                 } else if (test_facility(64)) {
536                         set_kvm_facility(kvm->arch.model.fac_mask, 64);
537                         set_kvm_facility(kvm->arch.model.fac_list, 64);
538                         r = 0;
539                 }
540                 mutex_unlock(&kvm->lock);
541                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
542                          r ? "(not available)" : "(success)");
543                 break;
544         case KVM_CAP_S390_USER_STSI:
545                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
546                 kvm->arch.user_stsi = 1;
547                 r = 0;
548                 break;
549         case KVM_CAP_S390_USER_INSTR0:
550                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
551                 kvm->arch.user_instr0 = 1;
552                 icpt_operexc_on_all_vcpus(kvm);
553                 r = 0;
554                 break;
555         default:
556                 r = -EINVAL;
557                 break;
558         }
559         return r;
560 }
561
562 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
563 {
564         int ret;
565
566         switch (attr->attr) {
567         case KVM_S390_VM_MEM_LIMIT_SIZE:
568                 ret = 0;
569                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
570                          kvm->arch.mem_limit);
571                 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
572                         ret = -EFAULT;
573                 break;
574         default:
575                 ret = -ENXIO;
576                 break;
577         }
578         return ret;
579 }
580
581 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
582 {
583         int ret;
584         unsigned int idx;
585         switch (attr->attr) {
586         case KVM_S390_VM_MEM_ENABLE_CMMA:
587                 ret = -ENXIO;
588                 if (!sclp.has_cmma)
589                         break;
590
591                 ret = -EBUSY;
592                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
593                 mutex_lock(&kvm->lock);
594                 if (!kvm->created_vcpus) {
595                         kvm->arch.use_cmma = 1;
596                         ret = 0;
597                 }
598                 mutex_unlock(&kvm->lock);
599                 break;
600         case KVM_S390_VM_MEM_CLR_CMMA:
601                 ret = -ENXIO;
602                 if (!sclp.has_cmma)
603                         break;
604                 ret = -EINVAL;
605                 if (!kvm->arch.use_cmma)
606                         break;
607
608                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
609                 mutex_lock(&kvm->lock);
610                 idx = srcu_read_lock(&kvm->srcu);
611                 s390_reset_cmma(kvm->arch.gmap->mm);
612                 srcu_read_unlock(&kvm->srcu, idx);
613                 mutex_unlock(&kvm->lock);
614                 ret = 0;
615                 break;
616         case KVM_S390_VM_MEM_LIMIT_SIZE: {
617                 unsigned long new_limit;
618
619                 if (kvm_is_ucontrol(kvm))
620                         return -EINVAL;
621
622                 if (get_user(new_limit, (u64 __user *)attr->addr))
623                         return -EFAULT;
624
625                 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
626                     new_limit > kvm->arch.mem_limit)
627                         return -E2BIG;
628
629                 if (!new_limit)
630                         return -EINVAL;
631
632                 /* gmap_create takes last usable address */
633                 if (new_limit != KVM_S390_NO_MEM_LIMIT)
634                         new_limit -= 1;
635
636                 ret = -EBUSY;
637                 mutex_lock(&kvm->lock);
638                 if (!kvm->created_vcpus) {
639                         /* gmap_create will round the limit up */
640                         struct gmap *new = gmap_create(current->mm, new_limit);
641
642                         if (!new) {
643                                 ret = -ENOMEM;
644                         } else {
645                                 gmap_remove(kvm->arch.gmap);
646                                 new->private = kvm;
647                                 kvm->arch.gmap = new;
648                                 ret = 0;
649                         }
650                 }
651                 mutex_unlock(&kvm->lock);
652                 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
653                 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
654                          (void *) kvm->arch.gmap->asce);
655                 break;
656         }
657         default:
658                 ret = -ENXIO;
659                 break;
660         }
661         return ret;
662 }
663
664 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
665
666 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
667 {
668         struct kvm_vcpu *vcpu;
669         int i;
670
671         if (!test_kvm_facility(kvm, 76))
672                 return -EINVAL;
673
674         mutex_lock(&kvm->lock);
675         switch (attr->attr) {
676         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
677                 get_random_bytes(
678                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
679                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
680                 kvm->arch.crypto.aes_kw = 1;
681                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
682                 break;
683         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
684                 get_random_bytes(
685                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
686                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
687                 kvm->arch.crypto.dea_kw = 1;
688                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
689                 break;
690         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
691                 kvm->arch.crypto.aes_kw = 0;
692                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
693                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
694                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
695                 break;
696         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
697                 kvm->arch.crypto.dea_kw = 0;
698                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
699                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
700                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
701                 break;
702         default:
703                 mutex_unlock(&kvm->lock);
704                 return -ENXIO;
705         }
706
707         kvm_for_each_vcpu(i, vcpu, kvm) {
708                 kvm_s390_vcpu_crypto_setup(vcpu);
709                 exit_sie(vcpu);
710         }
711         mutex_unlock(&kvm->lock);
712         return 0;
713 }
714
715 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
716 {
717         u8 gtod_high;
718
719         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
720                                            sizeof(gtod_high)))
721                 return -EFAULT;
722
723         if (gtod_high != 0)
724                 return -EINVAL;
725         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
726
727         return 0;
728 }
729
730 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
731 {
732         u64 gtod;
733
734         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
735                 return -EFAULT;
736
737         kvm_s390_set_tod_clock(kvm, gtod);
738         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
739         return 0;
740 }
741
742 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
743 {
744         int ret;
745
746         if (attr->flags)
747                 return -EINVAL;
748
749         switch (attr->attr) {
750         case KVM_S390_VM_TOD_HIGH:
751                 ret = kvm_s390_set_tod_high(kvm, attr);
752                 break;
753         case KVM_S390_VM_TOD_LOW:
754                 ret = kvm_s390_set_tod_low(kvm, attr);
755                 break;
756         default:
757                 ret = -ENXIO;
758                 break;
759         }
760         return ret;
761 }
762
763 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
764 {
765         u8 gtod_high = 0;
766
767         if (copy_to_user((void __user *)attr->addr, &gtod_high,
768                                          sizeof(gtod_high)))
769                 return -EFAULT;
770         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
771
772         return 0;
773 }
774
775 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
776 {
777         u64 gtod;
778
779         gtod = kvm_s390_get_tod_clock_fast(kvm);
780         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
781                 return -EFAULT;
782         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
783
784         return 0;
785 }
786
787 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
788 {
789         int ret;
790
791         if (attr->flags)
792                 return -EINVAL;
793
794         switch (attr->attr) {
795         case KVM_S390_VM_TOD_HIGH:
796                 ret = kvm_s390_get_tod_high(kvm, attr);
797                 break;
798         case KVM_S390_VM_TOD_LOW:
799                 ret = kvm_s390_get_tod_low(kvm, attr);
800                 break;
801         default:
802                 ret = -ENXIO;
803                 break;
804         }
805         return ret;
806 }
807
808 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
809 {
810         struct kvm_s390_vm_cpu_processor *proc;
811         u16 lowest_ibc, unblocked_ibc;
812         int ret = 0;
813
814         mutex_lock(&kvm->lock);
815         if (kvm->created_vcpus) {
816                 ret = -EBUSY;
817                 goto out;
818         }
819         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
820         if (!proc) {
821                 ret = -ENOMEM;
822                 goto out;
823         }
824         if (!copy_from_user(proc, (void __user *)attr->addr,
825                             sizeof(*proc))) {
826                 kvm->arch.model.cpuid = proc->cpuid;
827                 lowest_ibc = sclp.ibc >> 16 & 0xfff;
828                 unblocked_ibc = sclp.ibc & 0xfff;
829                 if (lowest_ibc && proc->ibc) {
830                         if (proc->ibc > unblocked_ibc)
831                                 kvm->arch.model.ibc = unblocked_ibc;
832                         else if (proc->ibc < lowest_ibc)
833                                 kvm->arch.model.ibc = lowest_ibc;
834                         else
835                                 kvm->arch.model.ibc = proc->ibc;
836                 }
837                 memcpy(kvm->arch.model.fac_list, proc->fac_list,
838                        S390_ARCH_FAC_LIST_SIZE_BYTE);
839                 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
840                          kvm->arch.model.ibc,
841                          kvm->arch.model.cpuid);
842                 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
843                          kvm->arch.model.fac_list[0],
844                          kvm->arch.model.fac_list[1],
845                          kvm->arch.model.fac_list[2]);
846         } else
847                 ret = -EFAULT;
848         kfree(proc);
849 out:
850         mutex_unlock(&kvm->lock);
851         return ret;
852 }
853
854 static int kvm_s390_set_processor_feat(struct kvm *kvm,
855                                        struct kvm_device_attr *attr)
856 {
857         struct kvm_s390_vm_cpu_feat data;
858         int ret = -EBUSY;
859
860         if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
861                 return -EFAULT;
862         if (!bitmap_subset((unsigned long *) data.feat,
863                            kvm_s390_available_cpu_feat,
864                            KVM_S390_VM_CPU_FEAT_NR_BITS))
865                 return -EINVAL;
866
867         mutex_lock(&kvm->lock);
868         if (!atomic_read(&kvm->online_vcpus)) {
869                 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
870                             KVM_S390_VM_CPU_FEAT_NR_BITS);
871                 ret = 0;
872         }
873         mutex_unlock(&kvm->lock);
874         return ret;
875 }
876
877 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
878                                           struct kvm_device_attr *attr)
879 {
880         /*
881          * Once supported by kernel + hw, we have to store the subfunctions
882          * in kvm->arch and remember that user space configured them.
883          */
884         return -ENXIO;
885 }
886
887 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
888 {
889         int ret = -ENXIO;
890
891         switch (attr->attr) {
892         case KVM_S390_VM_CPU_PROCESSOR:
893                 ret = kvm_s390_set_processor(kvm, attr);
894                 break;
895         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
896                 ret = kvm_s390_set_processor_feat(kvm, attr);
897                 break;
898         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
899                 ret = kvm_s390_set_processor_subfunc(kvm, attr);
900                 break;
901         }
902         return ret;
903 }
904
905 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
906 {
907         struct kvm_s390_vm_cpu_processor *proc;
908         int ret = 0;
909
910         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
911         if (!proc) {
912                 ret = -ENOMEM;
913                 goto out;
914         }
915         proc->cpuid = kvm->arch.model.cpuid;
916         proc->ibc = kvm->arch.model.ibc;
917         memcpy(&proc->fac_list, kvm->arch.model.fac_list,
918                S390_ARCH_FAC_LIST_SIZE_BYTE);
919         VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
920                  kvm->arch.model.ibc,
921                  kvm->arch.model.cpuid);
922         VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
923                  kvm->arch.model.fac_list[0],
924                  kvm->arch.model.fac_list[1],
925                  kvm->arch.model.fac_list[2]);
926         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
927                 ret = -EFAULT;
928         kfree(proc);
929 out:
930         return ret;
931 }
932
933 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
934 {
935         struct kvm_s390_vm_cpu_machine *mach;
936         int ret = 0;
937
938         mach = kzalloc(sizeof(*mach), GFP_KERNEL);
939         if (!mach) {
940                 ret = -ENOMEM;
941                 goto out;
942         }
943         get_cpu_id((struct cpuid *) &mach->cpuid);
944         mach->ibc = sclp.ibc;
945         memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
946                S390_ARCH_FAC_LIST_SIZE_BYTE);
947         memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
948                sizeof(S390_lowcore.stfle_fac_list));
949         VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
950                  kvm->arch.model.ibc,
951                  kvm->arch.model.cpuid);
952         VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
953                  mach->fac_mask[0],
954                  mach->fac_mask[1],
955                  mach->fac_mask[2]);
956         VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
957                  mach->fac_list[0],
958                  mach->fac_list[1],
959                  mach->fac_list[2]);
960         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
961                 ret = -EFAULT;
962         kfree(mach);
963 out:
964         return ret;
965 }
966
967 static int kvm_s390_get_processor_feat(struct kvm *kvm,
968                                        struct kvm_device_attr *attr)
969 {
970         struct kvm_s390_vm_cpu_feat data;
971
972         bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
973                     KVM_S390_VM_CPU_FEAT_NR_BITS);
974         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
975                 return -EFAULT;
976         return 0;
977 }
978
979 static int kvm_s390_get_machine_feat(struct kvm *kvm,
980                                      struct kvm_device_attr *attr)
981 {
982         struct kvm_s390_vm_cpu_feat data;
983
984         bitmap_copy((unsigned long *) data.feat,
985                     kvm_s390_available_cpu_feat,
986                     KVM_S390_VM_CPU_FEAT_NR_BITS);
987         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
988                 return -EFAULT;
989         return 0;
990 }
991
992 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
993                                           struct kvm_device_attr *attr)
994 {
995         /*
996          * Once we can actually configure subfunctions (kernel + hw support),
997          * we have to check if they were already set by user space, if so copy
998          * them from kvm->arch.
999          */
1000         return -ENXIO;
1001 }
1002
1003 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1004                                         struct kvm_device_attr *attr)
1005 {
1006         if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1007             sizeof(struct kvm_s390_vm_cpu_subfunc)))
1008                 return -EFAULT;
1009         return 0;
1010 }
1011 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1012 {
1013         int ret = -ENXIO;
1014
1015         switch (attr->attr) {
1016         case KVM_S390_VM_CPU_PROCESSOR:
1017                 ret = kvm_s390_get_processor(kvm, attr);
1018                 break;
1019         case KVM_S390_VM_CPU_MACHINE:
1020                 ret = kvm_s390_get_machine(kvm, attr);
1021                 break;
1022         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1023                 ret = kvm_s390_get_processor_feat(kvm, attr);
1024                 break;
1025         case KVM_S390_VM_CPU_MACHINE_FEAT:
1026                 ret = kvm_s390_get_machine_feat(kvm, attr);
1027                 break;
1028         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1029                 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1030                 break;
1031         case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1032                 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1033                 break;
1034         }
1035         return ret;
1036 }
1037
1038 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1039 {
1040         int ret;
1041
1042         switch (attr->group) {
1043         case KVM_S390_VM_MEM_CTRL:
1044                 ret = kvm_s390_set_mem_control(kvm, attr);
1045                 break;
1046         case KVM_S390_VM_TOD:
1047                 ret = kvm_s390_set_tod(kvm, attr);
1048                 break;
1049         case KVM_S390_VM_CPU_MODEL:
1050                 ret = kvm_s390_set_cpu_model(kvm, attr);
1051                 break;
1052         case KVM_S390_VM_CRYPTO:
1053                 ret = kvm_s390_vm_set_crypto(kvm, attr);
1054                 break;
1055         default:
1056                 ret = -ENXIO;
1057                 break;
1058         }
1059
1060         return ret;
1061 }
1062
1063 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1064 {
1065         int ret;
1066
1067         switch (attr->group) {
1068         case KVM_S390_VM_MEM_CTRL:
1069                 ret = kvm_s390_get_mem_control(kvm, attr);
1070                 break;
1071         case KVM_S390_VM_TOD:
1072                 ret = kvm_s390_get_tod(kvm, attr);
1073                 break;
1074         case KVM_S390_VM_CPU_MODEL:
1075                 ret = kvm_s390_get_cpu_model(kvm, attr);
1076                 break;
1077         default:
1078                 ret = -ENXIO;
1079                 break;
1080         }
1081
1082         return ret;
1083 }
1084
1085 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1086 {
1087         int ret;
1088
1089         switch (attr->group) {
1090         case KVM_S390_VM_MEM_CTRL:
1091                 switch (attr->attr) {
1092                 case KVM_S390_VM_MEM_ENABLE_CMMA:
1093                 case KVM_S390_VM_MEM_CLR_CMMA:
1094                         ret = sclp.has_cmma ? 0 : -ENXIO;
1095                         break;
1096                 case KVM_S390_VM_MEM_LIMIT_SIZE:
1097                         ret = 0;
1098                         break;
1099                 default:
1100                         ret = -ENXIO;
1101                         break;
1102                 }
1103                 break;
1104         case KVM_S390_VM_TOD:
1105                 switch (attr->attr) {
1106                 case KVM_S390_VM_TOD_LOW:
1107                 case KVM_S390_VM_TOD_HIGH:
1108                         ret = 0;
1109                         break;
1110                 default:
1111                         ret = -ENXIO;
1112                         break;
1113                 }
1114                 break;
1115         case KVM_S390_VM_CPU_MODEL:
1116                 switch (attr->attr) {
1117                 case KVM_S390_VM_CPU_PROCESSOR:
1118                 case KVM_S390_VM_CPU_MACHINE:
1119                 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1120                 case KVM_S390_VM_CPU_MACHINE_FEAT:
1121                 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1122                         ret = 0;
1123                         break;
1124                 /* configuring subfunctions is not supported yet */
1125                 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1126                 default:
1127                         ret = -ENXIO;
1128                         break;
1129                 }
1130                 break;
1131         case KVM_S390_VM_CRYPTO:
1132                 switch (attr->attr) {
1133                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1134                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1135                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1136                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1137                         ret = 0;
1138                         break;
1139                 default:
1140                         ret = -ENXIO;
1141                         break;
1142                 }
1143                 break;
1144         default:
1145                 ret = -ENXIO;
1146                 break;
1147         }
1148
1149         return ret;
1150 }
1151
1152 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1153 {
1154         uint8_t *keys;
1155         uint64_t hva;
1156         int i, r = 0;
1157
1158         if (args->flags != 0)
1159                 return -EINVAL;
1160
1161         /* Is this guest using storage keys? */
1162         if (!mm_use_skey(current->mm))
1163                 return KVM_S390_GET_SKEYS_NONE;
1164
1165         /* Enforce sane limit on memory allocation */
1166         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1167                 return -EINVAL;
1168
1169         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1170         if (!keys)
1171                 return -ENOMEM;
1172
1173         down_read(&current->mm->mmap_sem);
1174         for (i = 0; i < args->count; i++) {
1175                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1176                 if (kvm_is_error_hva(hva)) {
1177                         r = -EFAULT;
1178                         break;
1179                 }
1180
1181                 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1182                 if (r)
1183                         break;
1184         }
1185         up_read(&current->mm->mmap_sem);
1186
1187         if (!r) {
1188                 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1189                                  sizeof(uint8_t) * args->count);
1190                 if (r)
1191                         r = -EFAULT;
1192         }
1193
1194         kvfree(keys);
1195         return r;
1196 }
1197
1198 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1199 {
1200         uint8_t *keys;
1201         uint64_t hva;
1202         int i, r = 0;
1203
1204         if (args->flags != 0)
1205                 return -EINVAL;
1206
1207         /* Enforce sane limit on memory allocation */
1208         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1209                 return -EINVAL;
1210
1211         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1212         if (!keys)
1213                 return -ENOMEM;
1214
1215         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1216                            sizeof(uint8_t) * args->count);
1217         if (r) {
1218                 r = -EFAULT;
1219                 goto out;
1220         }
1221
1222         /* Enable storage key handling for the guest */
1223         r = s390_enable_skey();
1224         if (r)
1225                 goto out;
1226
1227         down_read(&current->mm->mmap_sem);
1228         for (i = 0; i < args->count; i++) {
1229                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1230                 if (kvm_is_error_hva(hva)) {
1231                         r = -EFAULT;
1232                         break;
1233                 }
1234
1235                 /* Lowest order bit is reserved */
1236                 if (keys[i] & 0x01) {
1237                         r = -EINVAL;
1238                         break;
1239                 }
1240
1241                 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1242                 if (r)
1243                         break;
1244         }
1245         up_read(&current->mm->mmap_sem);
1246 out:
1247         kvfree(keys);
1248         return r;
1249 }
1250
1251 long kvm_arch_vm_ioctl(struct file *filp,
1252                        unsigned int ioctl, unsigned long arg)
1253 {
1254         struct kvm *kvm = filp->private_data;
1255         void __user *argp = (void __user *)arg;
1256         struct kvm_device_attr attr;
1257         int r;
1258
1259         switch (ioctl) {
1260         case KVM_S390_INTERRUPT: {
1261                 struct kvm_s390_interrupt s390int;
1262
1263                 r = -EFAULT;
1264                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1265                         break;
1266                 r = kvm_s390_inject_vm(kvm, &s390int);
1267                 break;
1268         }
1269         case KVM_ENABLE_CAP: {
1270                 struct kvm_enable_cap cap;
1271                 r = -EFAULT;
1272                 if (copy_from_user(&cap, argp, sizeof(cap)))
1273                         break;
1274                 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1275                 break;
1276         }
1277         case KVM_CREATE_IRQCHIP: {
1278                 struct kvm_irq_routing_entry routing;
1279
1280                 r = -EINVAL;
1281                 if (kvm->arch.use_irqchip) {
1282                         /* Set up dummy routing. */
1283                         memset(&routing, 0, sizeof(routing));
1284                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1285                 }
1286                 break;
1287         }
1288         case KVM_SET_DEVICE_ATTR: {
1289                 r = -EFAULT;
1290                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1291                         break;
1292                 r = kvm_s390_vm_set_attr(kvm, &attr);
1293                 break;
1294         }
1295         case KVM_GET_DEVICE_ATTR: {
1296                 r = -EFAULT;
1297                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1298                         break;
1299                 r = kvm_s390_vm_get_attr(kvm, &attr);
1300                 break;
1301         }
1302         case KVM_HAS_DEVICE_ATTR: {
1303                 r = -EFAULT;
1304                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1305                         break;
1306                 r = kvm_s390_vm_has_attr(kvm, &attr);
1307                 break;
1308         }
1309         case KVM_S390_GET_SKEYS: {
1310                 struct kvm_s390_skeys args;
1311
1312                 r = -EFAULT;
1313                 if (copy_from_user(&args, argp,
1314                                    sizeof(struct kvm_s390_skeys)))
1315                         break;
1316                 r = kvm_s390_get_skeys(kvm, &args);
1317                 break;
1318         }
1319         case KVM_S390_SET_SKEYS: {
1320                 struct kvm_s390_skeys args;
1321
1322                 r = -EFAULT;
1323                 if (copy_from_user(&args, argp,
1324                                    sizeof(struct kvm_s390_skeys)))
1325                         break;
1326                 r = kvm_s390_set_skeys(kvm, &args);
1327                 break;
1328         }
1329         default:
1330                 r = -ENOTTY;
1331         }
1332
1333         return r;
1334 }
1335
1336 static int kvm_s390_query_ap_config(u8 *config)
1337 {
1338         u32 fcn_code = 0x04000000UL;
1339         u32 cc = 0;
1340
1341         memset(config, 0, 128);
1342         asm volatile(
1343                 "lgr 0,%1\n"
1344                 "lgr 2,%2\n"
1345                 ".long 0xb2af0000\n"            /* PQAP(QCI) */
1346                 "0: ipm %0\n"
1347                 "srl %0,28\n"
1348                 "1:\n"
1349                 EX_TABLE(0b, 1b)
1350                 : "+r" (cc)
1351                 : "r" (fcn_code), "r" (config)
1352                 : "cc", "0", "2", "memory"
1353         );
1354
1355         return cc;
1356 }
1357
1358 static int kvm_s390_apxa_installed(void)
1359 {
1360         u8 config[128];
1361         int cc;
1362
1363         if (test_facility(12)) {
1364                 cc = kvm_s390_query_ap_config(config);
1365
1366                 if (cc)
1367                         pr_err("PQAP(QCI) failed with cc=%d", cc);
1368                 else
1369                         return config[0] & 0x40;
1370         }
1371
1372         return 0;
1373 }
1374
1375 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1376 {
1377         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1378
1379         if (kvm_s390_apxa_installed())
1380                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1381         else
1382                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1383 }
1384
1385 static u64 kvm_s390_get_initial_cpuid(void)
1386 {
1387         struct cpuid cpuid;
1388
1389         get_cpu_id(&cpuid);
1390         cpuid.version = 0xff;
1391         return *((u64 *) &cpuid);
1392 }
1393
1394 static void kvm_s390_crypto_init(struct kvm *kvm)
1395 {
1396         if (!test_kvm_facility(kvm, 76))
1397                 return;
1398
1399         kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1400         kvm_s390_set_crycb_format(kvm);
1401
1402         /* Enable AES/DEA protected key functions by default */
1403         kvm->arch.crypto.aes_kw = 1;
1404         kvm->arch.crypto.dea_kw = 1;
1405         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1406                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1407         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1408                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1409 }
1410
1411 static void sca_dispose(struct kvm *kvm)
1412 {
1413         if (kvm->arch.use_esca)
1414                 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1415         else
1416                 free_page((unsigned long)(kvm->arch.sca));
1417         kvm->arch.sca = NULL;
1418 }
1419
1420 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1421 {
1422         gfp_t alloc_flags = GFP_KERNEL;
1423         int i, rc;
1424         char debug_name[16];
1425         static unsigned long sca_offset;
1426
1427         rc = -EINVAL;
1428 #ifdef CONFIG_KVM_S390_UCONTROL
1429         if (type & ~KVM_VM_S390_UCONTROL)
1430                 goto out_err;
1431         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1432                 goto out_err;
1433 #else
1434         if (type)
1435                 goto out_err;
1436 #endif
1437
1438         rc = s390_enable_sie();
1439         if (rc)
1440                 goto out_err;
1441
1442         rc = -ENOMEM;
1443
1444         ratelimit_state_init(&kvm->arch.sthyi_limit, 5 * HZ, 500);
1445
1446         kvm->arch.use_esca = 0; /* start with basic SCA */
1447         if (!sclp.has_64bscao)
1448                 alloc_flags |= GFP_DMA;
1449         rwlock_init(&kvm->arch.sca_lock);
1450         kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1451         if (!kvm->arch.sca)
1452                 goto out_err;
1453         spin_lock(&kvm_lock);
1454         sca_offset += 16;
1455         if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1456                 sca_offset = 0;
1457         kvm->arch.sca = (struct bsca_block *)
1458                         ((char *) kvm->arch.sca + sca_offset);
1459         spin_unlock(&kvm_lock);
1460
1461         sprintf(debug_name, "kvm-%u", current->pid);
1462
1463         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1464         if (!kvm->arch.dbf)
1465                 goto out_err;
1466
1467         kvm->arch.sie_page2 =
1468              (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1469         if (!kvm->arch.sie_page2)
1470                 goto out_err;
1471
1472         /* Populate the facility mask initially. */
1473         memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1474                sizeof(S390_lowcore.stfle_fac_list));
1475         for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1476                 if (i < kvm_s390_fac_list_mask_size())
1477                         kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1478                 else
1479                         kvm->arch.model.fac_mask[i] = 0UL;
1480         }
1481
1482         /* Populate the facility list initially. */
1483         kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1484         memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1485                S390_ARCH_FAC_LIST_SIZE_BYTE);
1486
1487         set_kvm_facility(kvm->arch.model.fac_mask, 74);
1488         set_kvm_facility(kvm->arch.model.fac_list, 74);
1489
1490         kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
1491         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1492
1493         kvm_s390_crypto_init(kvm);
1494
1495         spin_lock_init(&kvm->arch.float_int.lock);
1496         for (i = 0; i < FIRQ_LIST_COUNT; i++)
1497                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1498         init_waitqueue_head(&kvm->arch.ipte_wq);
1499         mutex_init(&kvm->arch.ipte_mutex);
1500
1501         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1502         VM_EVENT(kvm, 3, "vm created with type %lu", type);
1503
1504         if (type & KVM_VM_S390_UCONTROL) {
1505                 kvm->arch.gmap = NULL;
1506                 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1507         } else {
1508                 if (sclp.hamax == U64_MAX)
1509                         kvm->arch.mem_limit = TASK_SIZE_MAX;
1510                 else
1511                         kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
1512                                                     sclp.hamax + 1);
1513                 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
1514                 if (!kvm->arch.gmap)
1515                         goto out_err;
1516                 kvm->arch.gmap->private = kvm;
1517                 kvm->arch.gmap->pfault_enabled = 0;
1518         }
1519
1520         kvm->arch.css_support = 0;
1521         kvm->arch.use_irqchip = 0;
1522         kvm->arch.epoch = 0;
1523
1524         spin_lock_init(&kvm->arch.start_stop_lock);
1525         kvm_s390_vsie_init(kvm);
1526         KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1527
1528         return 0;
1529 out_err:
1530         free_page((unsigned long)kvm->arch.sie_page2);
1531         debug_unregister(kvm->arch.dbf);
1532         sca_dispose(kvm);
1533         KVM_EVENT(3, "creation of vm failed: %d", rc);
1534         return rc;
1535 }
1536
1537 bool kvm_arch_has_vcpu_debugfs(void)
1538 {
1539         return false;
1540 }
1541
1542 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
1543 {
1544         return 0;
1545 }
1546
1547 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1548 {
1549         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1550         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1551         kvm_s390_clear_local_irqs(vcpu);
1552         kvm_clear_async_pf_completion_queue(vcpu);
1553         if (!kvm_is_ucontrol(vcpu->kvm))
1554                 sca_del_vcpu(vcpu);
1555
1556         if (kvm_is_ucontrol(vcpu->kvm))
1557                 gmap_remove(vcpu->arch.gmap);
1558
1559         if (vcpu->kvm->arch.use_cmma)
1560                 kvm_s390_vcpu_unsetup_cmma(vcpu);
1561         free_page((unsigned long)(vcpu->arch.sie_block));
1562
1563         kvm_vcpu_uninit(vcpu);
1564         kmem_cache_free(kvm_vcpu_cache, vcpu);
1565 }
1566
1567 static void kvm_free_vcpus(struct kvm *kvm)
1568 {
1569         unsigned int i;
1570         struct kvm_vcpu *vcpu;
1571
1572         kvm_for_each_vcpu(i, vcpu, kvm)
1573                 kvm_arch_vcpu_destroy(vcpu);
1574
1575         mutex_lock(&kvm->lock);
1576         for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1577                 kvm->vcpus[i] = NULL;
1578
1579         atomic_set(&kvm->online_vcpus, 0);
1580         mutex_unlock(&kvm->lock);
1581 }
1582
1583 void kvm_arch_destroy_vm(struct kvm *kvm)
1584 {
1585         kvm_free_vcpus(kvm);
1586         sca_dispose(kvm);
1587         debug_unregister(kvm->arch.dbf);
1588         free_page((unsigned long)kvm->arch.sie_page2);
1589         if (!kvm_is_ucontrol(kvm))
1590                 gmap_remove(kvm->arch.gmap);
1591         kvm_s390_destroy_adapters(kvm);
1592         kvm_s390_clear_float_irqs(kvm);
1593         kvm_s390_vsie_destroy(kvm);
1594         KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
1595 }
1596
1597 /* Section: vcpu related */
1598 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1599 {
1600         vcpu->arch.gmap = gmap_create(current->mm, -1UL);
1601         if (!vcpu->arch.gmap)
1602                 return -ENOMEM;
1603         vcpu->arch.gmap->private = vcpu->kvm;
1604
1605         return 0;
1606 }
1607
1608 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
1609 {
1610         if (!kvm_s390_use_sca_entries())
1611                 return;
1612         read_lock(&vcpu->kvm->arch.sca_lock);
1613         if (vcpu->kvm->arch.use_esca) {
1614                 struct esca_block *sca = vcpu->kvm->arch.sca;
1615
1616                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1617                 sca->cpu[vcpu->vcpu_id].sda = 0;
1618         } else {
1619                 struct bsca_block *sca = vcpu->kvm->arch.sca;
1620
1621                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1622                 sca->cpu[vcpu->vcpu_id].sda = 0;
1623         }
1624         read_unlock(&vcpu->kvm->arch.sca_lock);
1625 }
1626
1627 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
1628 {
1629         if (!kvm_s390_use_sca_entries()) {
1630                 struct bsca_block *sca = vcpu->kvm->arch.sca;
1631
1632                 /* we still need the basic sca for the ipte control */
1633                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1634                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
1635         }
1636         read_lock(&vcpu->kvm->arch.sca_lock);
1637         if (vcpu->kvm->arch.use_esca) {
1638                 struct esca_block *sca = vcpu->kvm->arch.sca;
1639
1640                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1641                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1642                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
1643                 vcpu->arch.sie_block->ecb2 |= 0x04U;
1644                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1645         } else {
1646                 struct bsca_block *sca = vcpu->kvm->arch.sca;
1647
1648                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1649                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1650                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
1651                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1652         }
1653         read_unlock(&vcpu->kvm->arch.sca_lock);
1654 }
1655
1656 /* Basic SCA to Extended SCA data copy routines */
1657 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
1658 {
1659         d->sda = s->sda;
1660         d->sigp_ctrl.c = s->sigp_ctrl.c;
1661         d->sigp_ctrl.scn = s->sigp_ctrl.scn;
1662 }
1663
1664 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
1665 {
1666         int i;
1667
1668         d->ipte_control = s->ipte_control;
1669         d->mcn[0] = s->mcn;
1670         for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
1671                 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
1672 }
1673
1674 static int sca_switch_to_extended(struct kvm *kvm)
1675 {
1676         struct bsca_block *old_sca = kvm->arch.sca;
1677         struct esca_block *new_sca;
1678         struct kvm_vcpu *vcpu;
1679         unsigned int vcpu_idx;
1680         u32 scaol, scaoh;
1681
1682         new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
1683         if (!new_sca)
1684                 return -ENOMEM;
1685
1686         scaoh = (u32)((u64)(new_sca) >> 32);
1687         scaol = (u32)(u64)(new_sca) & ~0x3fU;
1688
1689         kvm_s390_vcpu_block_all(kvm);
1690         write_lock(&kvm->arch.sca_lock);
1691
1692         sca_copy_b_to_e(new_sca, old_sca);
1693
1694         kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
1695                 vcpu->arch.sie_block->scaoh = scaoh;
1696                 vcpu->arch.sie_block->scaol = scaol;
1697                 vcpu->arch.sie_block->ecb2 |= 0x04U;
1698         }
1699         kvm->arch.sca = new_sca;
1700         kvm->arch.use_esca = 1;
1701
1702         write_unlock(&kvm->arch.sca_lock);
1703         kvm_s390_vcpu_unblock_all(kvm);
1704
1705         free_page((unsigned long)old_sca);
1706
1707         VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
1708                  old_sca, kvm->arch.sca);
1709         return 0;
1710 }
1711
1712 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
1713 {
1714         int rc;
1715
1716         if (!kvm_s390_use_sca_entries()) {
1717                 if (id < KVM_MAX_VCPUS)
1718                         return true;
1719                 return false;
1720         }
1721         if (id < KVM_S390_BSCA_CPU_SLOTS)
1722                 return true;
1723         if (!sclp.has_esca || !sclp.has_64bscao)
1724                 return false;
1725
1726         mutex_lock(&kvm->lock);
1727         rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
1728         mutex_unlock(&kvm->lock);
1729
1730         return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
1731 }
1732
1733 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1734 {
1735         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1736         kvm_clear_async_pf_completion_queue(vcpu);
1737         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
1738                                     KVM_SYNC_GPRS |
1739                                     KVM_SYNC_ACRS |
1740                                     KVM_SYNC_CRS |
1741                                     KVM_SYNC_ARCH0 |
1742                                     KVM_SYNC_PFAULT;
1743         kvm_s390_set_prefix(vcpu, 0);
1744         if (test_kvm_facility(vcpu->kvm, 64))
1745                 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
1746         /* fprs can be synchronized via vrs, even if the guest has no vx. With
1747          * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
1748          */
1749         if (MACHINE_HAS_VX)
1750                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
1751         else
1752                 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
1753
1754         if (kvm_is_ucontrol(vcpu->kvm))
1755                 return __kvm_ucontrol_vcpu_init(vcpu);
1756
1757         return 0;
1758 }
1759
1760 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1761 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1762 {
1763         WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
1764         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1765         vcpu->arch.cputm_start = get_tod_clock_fast();
1766         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1767 }
1768
1769 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1770 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1771 {
1772         WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
1773         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1774         vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1775         vcpu->arch.cputm_start = 0;
1776         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1777 }
1778
1779 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1780 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1781 {
1782         WARN_ON_ONCE(vcpu->arch.cputm_enabled);
1783         vcpu->arch.cputm_enabled = true;
1784         __start_cpu_timer_accounting(vcpu);
1785 }
1786
1787 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1788 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1789 {
1790         WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
1791         __stop_cpu_timer_accounting(vcpu);
1792         vcpu->arch.cputm_enabled = false;
1793 }
1794
1795 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1796 {
1797         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1798         __enable_cpu_timer_accounting(vcpu);
1799         preempt_enable();
1800 }
1801
1802 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1803 {
1804         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1805         __disable_cpu_timer_accounting(vcpu);
1806         preempt_enable();
1807 }
1808
1809 /* set the cpu timer - may only be called from the VCPU thread itself */
1810 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
1811 {
1812         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1813         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1814         if (vcpu->arch.cputm_enabled)
1815                 vcpu->arch.cputm_start = get_tod_clock_fast();
1816         vcpu->arch.sie_block->cputm = cputm;
1817         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1818         preempt_enable();
1819 }
1820
1821 /* update and get the cpu timer - can also be called from other VCPU threads */
1822 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
1823 {
1824         unsigned int seq;
1825         __u64 value;
1826
1827         if (unlikely(!vcpu->arch.cputm_enabled))
1828                 return vcpu->arch.sie_block->cputm;
1829
1830         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1831         do {
1832                 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
1833                 /*
1834                  * If the writer would ever execute a read in the critical
1835                  * section, e.g. in irq context, we have a deadlock.
1836                  */
1837                 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
1838                 value = vcpu->arch.sie_block->cputm;
1839                 /* if cputm_start is 0, accounting is being started/stopped */
1840                 if (likely(vcpu->arch.cputm_start))
1841                         value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1842         } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
1843         preempt_enable();
1844         return value;
1845 }
1846
1847 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1848 {
1849
1850         gmap_enable(vcpu->arch.enabled_gmap);
1851         atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1852         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1853                 __start_cpu_timer_accounting(vcpu);
1854         vcpu->cpu = cpu;
1855 }
1856
1857 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1858 {
1859         vcpu->cpu = -1;
1860         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1861                 __stop_cpu_timer_accounting(vcpu);
1862         atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1863         vcpu->arch.enabled_gmap = gmap_get_enabled();
1864         gmap_disable(vcpu->arch.enabled_gmap);
1865
1866 }
1867
1868 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
1869 {
1870         /* this equals initial cpu reset in pop, but we don't switch to ESA */
1871         vcpu->arch.sie_block->gpsw.mask = 0UL;
1872         vcpu->arch.sie_block->gpsw.addr = 0UL;
1873         kvm_s390_set_prefix(vcpu, 0);
1874         kvm_s390_set_cpu_timer(vcpu, 0);
1875         vcpu->arch.sie_block->ckc       = 0UL;
1876         vcpu->arch.sie_block->todpr     = 0;
1877         memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
1878         vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
1879         vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
1880         /* make sure the new fpc will be lazily loaded */
1881         save_fpu_regs();
1882         current->thread.fpu.fpc = 0;
1883         vcpu->arch.sie_block->gbea = 1;
1884         vcpu->arch.sie_block->pp = 0;
1885         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1886         kvm_clear_async_pf_completion_queue(vcpu);
1887         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
1888                 kvm_s390_vcpu_stop(vcpu);
1889         kvm_s390_clear_local_irqs(vcpu);
1890 }
1891
1892 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
1893 {
1894         mutex_lock(&vcpu->kvm->lock);
1895         preempt_disable();
1896         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
1897         preempt_enable();
1898         mutex_unlock(&vcpu->kvm->lock);
1899         if (!kvm_is_ucontrol(vcpu->kvm)) {
1900                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
1901                 sca_add_vcpu(vcpu);
1902         }
1903         if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
1904                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
1905         /* make vcpu_load load the right gmap on the first trigger */
1906         vcpu->arch.enabled_gmap = vcpu->arch.gmap;
1907 }
1908
1909 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
1910 {
1911         if (!test_kvm_facility(vcpu->kvm, 76))
1912                 return;
1913
1914         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
1915
1916         if (vcpu->kvm->arch.crypto.aes_kw)
1917                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
1918         if (vcpu->kvm->arch.crypto.dea_kw)
1919                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
1920
1921         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
1922 }
1923
1924 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
1925 {
1926         free_page(vcpu->arch.sie_block->cbrlo);
1927         vcpu->arch.sie_block->cbrlo = 0;
1928 }
1929
1930 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
1931 {
1932         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
1933         if (!vcpu->arch.sie_block->cbrlo)
1934                 return -ENOMEM;
1935
1936         vcpu->arch.sie_block->ecb2 |= 0x80;
1937         vcpu->arch.sie_block->ecb2 &= ~0x08;
1938         return 0;
1939 }
1940
1941 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
1942 {
1943         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
1944
1945         vcpu->arch.sie_block->ibc = model->ibc;
1946         if (test_kvm_facility(vcpu->kvm, 7))
1947                 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
1948 }
1949
1950 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1951 {
1952         int rc = 0;
1953
1954         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
1955                                                     CPUSTAT_SM |
1956                                                     CPUSTAT_STOPPED);
1957
1958         if (test_kvm_facility(vcpu->kvm, 78))
1959                 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
1960         else if (test_kvm_facility(vcpu->kvm, 8))
1961                 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
1962
1963         kvm_s390_vcpu_setup_model(vcpu);
1964
1965         /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
1966         if (MACHINE_HAS_ESOP)
1967                 vcpu->arch.sie_block->ecb |= 0x02;
1968         if (test_kvm_facility(vcpu->kvm, 9))
1969                 vcpu->arch.sie_block->ecb |= 0x04;
1970         if (test_kvm_facility(vcpu->kvm, 73))
1971                 vcpu->arch.sie_block->ecb |= 0x10;
1972
1973         if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
1974                 vcpu->arch.sie_block->ecb2 |= 0x08;
1975         if (test_kvm_facility(vcpu->kvm, 130))
1976                 vcpu->arch.sie_block->ecb2 |= 0x20;
1977         vcpu->arch.sie_block->eca = 0x1002000U;
1978         if (sclp.has_cei)
1979                 vcpu->arch.sie_block->eca |= 0x80000000U;
1980         if (sclp.has_ib)
1981                 vcpu->arch.sie_block->eca |= 0x40000000U;
1982         if (sclp.has_siif)
1983                 vcpu->arch.sie_block->eca |= 1;
1984         if (sclp.has_sigpif)
1985                 vcpu->arch.sie_block->eca |= 0x10000000U;
1986         if (test_kvm_facility(vcpu->kvm, 129)) {
1987                 vcpu->arch.sie_block->eca |= 0x00020000;
1988                 vcpu->arch.sie_block->ecd |= 0x20000000;
1989         }
1990         vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
1991         vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
1992
1993         if (vcpu->kvm->arch.use_cmma) {
1994                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
1995                 if (rc)
1996                         return rc;
1997         }
1998         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1999         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
2000
2001         kvm_s390_vcpu_crypto_setup(vcpu);
2002
2003         return rc;
2004 }
2005
2006 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
2007                                       unsigned int id)
2008 {
2009         struct kvm_vcpu *vcpu;
2010         struct sie_page *sie_page;
2011         int rc = -EINVAL;
2012
2013         if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
2014                 goto out;
2015
2016         rc = -ENOMEM;
2017
2018         vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2019         if (!vcpu)
2020                 goto out;
2021
2022         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2023         if (!sie_page)
2024                 goto out_free_cpu;
2025
2026         vcpu->arch.sie_block = &sie_page->sie_block;
2027         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2028
2029         /* the real guest size will always be smaller than msl */
2030         vcpu->arch.sie_block->mso = 0;
2031         vcpu->arch.sie_block->msl = sclp.hamax;
2032
2033         vcpu->arch.sie_block->icpua = id;
2034         spin_lock_init(&vcpu->arch.local_int.lock);
2035         vcpu->arch.local_int.float_int = &kvm->arch.float_int;
2036         vcpu->arch.local_int.wq = &vcpu->wq;
2037         vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
2038         seqcount_init(&vcpu->arch.cputm_seqcount);
2039
2040         rc = kvm_vcpu_init(vcpu, kvm, id);
2041         if (rc)
2042                 goto out_free_sie_block;
2043         VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2044                  vcpu->arch.sie_block);
2045         trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2046
2047         return vcpu;
2048 out_free_sie_block:
2049         free_page((unsigned long)(vcpu->arch.sie_block));
2050 out_free_cpu:
2051         kmem_cache_free(kvm_vcpu_cache, vcpu);
2052 out:
2053         return ERR_PTR(rc);
2054 }
2055
2056 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2057 {
2058         return kvm_s390_vcpu_has_irq(vcpu, 0);
2059 }
2060
2061 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2062 {
2063         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2064         exit_sie(vcpu);
2065 }
2066
2067 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2068 {
2069         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2070 }
2071
2072 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2073 {
2074         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2075         exit_sie(vcpu);
2076 }
2077
2078 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2079 {
2080         atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2081 }
2082
2083 /*
2084  * Kick a guest cpu out of SIE and wait until SIE is not running.
2085  * If the CPU is not running (e.g. waiting as idle) the function will
2086  * return immediately. */
2087 void exit_sie(struct kvm_vcpu *vcpu)
2088 {
2089         atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
2090         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2091                 cpu_relax();
2092 }
2093
2094 /* Kick a guest cpu out of SIE to process a request synchronously */
2095 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2096 {
2097         kvm_make_request(req, vcpu);
2098         kvm_s390_vcpu_request(vcpu);
2099 }
2100
2101 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2102                               unsigned long end)
2103 {
2104         struct kvm *kvm = gmap->private;
2105         struct kvm_vcpu *vcpu;
2106         unsigned long prefix;
2107         int i;
2108
2109         if (gmap_is_shadow(gmap))
2110                 return;
2111         if (start >= 1UL << 31)
2112                 /* We are only interested in prefix pages */
2113                 return;
2114         kvm_for_each_vcpu(i, vcpu, kvm) {
2115                 /* match against both prefix pages */
2116                 prefix = kvm_s390_get_prefix(vcpu);
2117                 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2118                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2119                                    start, end);
2120                         kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2121                 }
2122         }
2123 }
2124
2125 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2126 {
2127         /* kvm common code refers to this, but never calls it */
2128         BUG();
2129         return 0;
2130 }
2131
2132 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2133                                            struct kvm_one_reg *reg)
2134 {
2135         int r = -EINVAL;
2136
2137         switch (reg->id) {
2138         case KVM_REG_S390_TODPR:
2139                 r = put_user(vcpu->arch.sie_block->todpr,
2140                              (u32 __user *)reg->addr);
2141                 break;
2142         case KVM_REG_S390_EPOCHDIFF:
2143                 r = put_user(vcpu->arch.sie_block->epoch,
2144                              (u64 __user *)reg->addr);
2145                 break;
2146         case KVM_REG_S390_CPU_TIMER:
2147                 r = put_user(kvm_s390_get_cpu_timer(vcpu),
2148                              (u64 __user *)reg->addr);
2149                 break;
2150         case KVM_REG_S390_CLOCK_COMP:
2151                 r = put_user(vcpu->arch.sie_block->ckc,
2152                              (u64 __user *)reg->addr);
2153                 break;
2154         case KVM_REG_S390_PFTOKEN:
2155                 r = put_user(vcpu->arch.pfault_token,
2156                              (u64 __user *)reg->addr);
2157                 break;
2158         case KVM_REG_S390_PFCOMPARE:
2159                 r = put_user(vcpu->arch.pfault_compare,
2160                              (u64 __user *)reg->addr);
2161                 break;
2162         case KVM_REG_S390_PFSELECT:
2163                 r = put_user(vcpu->arch.pfault_select,
2164                              (u64 __user *)reg->addr);
2165                 break;
2166         case KVM_REG_S390_PP:
2167                 r = put_user(vcpu->arch.sie_block->pp,
2168                              (u64 __user *)reg->addr);
2169                 break;
2170         case KVM_REG_S390_GBEA:
2171                 r = put_user(vcpu->arch.sie_block->gbea,
2172                              (u64 __user *)reg->addr);
2173                 break;
2174         default:
2175                 break;
2176         }
2177
2178         return r;
2179 }
2180
2181 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2182                                            struct kvm_one_reg *reg)
2183 {
2184         int r = -EINVAL;
2185         __u64 val;
2186
2187         switch (reg->id) {
2188         case KVM_REG_S390_TODPR:
2189                 r = get_user(vcpu->arch.sie_block->todpr,
2190                              (u32 __user *)reg->addr);
2191                 break;
2192         case KVM_REG_S390_EPOCHDIFF:
2193                 r = get_user(vcpu->arch.sie_block->epoch,
2194                              (u64 __user *)reg->addr);
2195                 break;
2196         case KVM_REG_S390_CPU_TIMER:
2197                 r = get_user(val, (u64 __user *)reg->addr);
2198                 if (!r)
2199                         kvm_s390_set_cpu_timer(vcpu, val);
2200                 break;
2201         case KVM_REG_S390_CLOCK_COMP:
2202                 r = get_user(vcpu->arch.sie_block->ckc,
2203                              (u64 __user *)reg->addr);
2204                 break;
2205         case KVM_REG_S390_PFTOKEN:
2206                 r = get_user(vcpu->arch.pfault_token,
2207                              (u64 __user *)reg->addr);
2208                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2209                         kvm_clear_async_pf_completion_queue(vcpu);
2210                 break;
2211         case KVM_REG_S390_PFCOMPARE:
2212                 r = get_user(vcpu->arch.pfault_compare,
2213                              (u64 __user *)reg->addr);
2214                 break;
2215         case KVM_REG_S390_PFSELECT:
2216                 r = get_user(vcpu->arch.pfault_select,
2217                              (u64 __user *)reg->addr);
2218                 break;
2219         case KVM_REG_S390_PP:
2220                 r = get_user(vcpu->arch.sie_block->pp,
2221                              (u64 __user *)reg->addr);
2222                 break;
2223         case KVM_REG_S390_GBEA:
2224                 r = get_user(vcpu->arch.sie_block->gbea,
2225                              (u64 __user *)reg->addr);
2226                 break;
2227         default:
2228                 break;
2229         }
2230
2231         return r;
2232 }
2233
2234 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2235 {
2236         kvm_s390_vcpu_initial_reset(vcpu);
2237         return 0;
2238 }
2239
2240 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2241 {
2242         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
2243         return 0;
2244 }
2245
2246 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2247 {
2248         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2249         return 0;
2250 }
2251
2252 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2253                                   struct kvm_sregs *sregs)
2254 {
2255         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2256         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2257         return 0;
2258 }
2259
2260 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2261                                   struct kvm_sregs *sregs)
2262 {
2263         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2264         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2265         return 0;
2266 }
2267
2268 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2269 {
2270         if (test_fp_ctl(fpu->fpc))
2271                 return -EINVAL;
2272         vcpu->run->s.regs.fpc = fpu->fpc;
2273         if (MACHINE_HAS_VX)
2274                 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
2275                                  (freg_t *) fpu->fprs);
2276         else
2277                 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
2278         return 0;
2279 }
2280
2281 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2282 {
2283         /* make sure we have the latest values */
2284         save_fpu_regs();
2285         if (MACHINE_HAS_VX)
2286                 convert_vx_to_fp((freg_t *) fpu->fprs,
2287                                  (__vector128 *) vcpu->run->s.regs.vrs);
2288         else
2289                 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
2290         fpu->fpc = vcpu->run->s.regs.fpc;
2291         return 0;
2292 }
2293
2294 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2295 {
2296         int rc = 0;
2297
2298         if (!is_vcpu_stopped(vcpu))
2299                 rc = -EBUSY;
2300         else {
2301                 vcpu->run->psw_mask = psw.mask;
2302                 vcpu->run->psw_addr = psw.addr;
2303         }
2304         return rc;
2305 }
2306
2307 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2308                                   struct kvm_translation *tr)
2309 {
2310         return -EINVAL; /* not implemented yet */
2311 }
2312
2313 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2314                               KVM_GUESTDBG_USE_HW_BP | \
2315                               KVM_GUESTDBG_ENABLE)
2316
2317 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2318                                         struct kvm_guest_debug *dbg)
2319 {
2320         int rc = 0;
2321
2322         vcpu->guest_debug = 0;
2323         kvm_s390_clear_bp_data(vcpu);
2324
2325         if (dbg->control & ~VALID_GUESTDBG_FLAGS)
2326                 return -EINVAL;
2327         if (!sclp.has_gpere)
2328                 return -EINVAL;
2329
2330         if (dbg->control & KVM_GUESTDBG_ENABLE) {
2331                 vcpu->guest_debug = dbg->control;
2332                 /* enforce guest PER */
2333                 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2334
2335                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2336                         rc = kvm_s390_import_bp_data(vcpu, dbg);
2337         } else {
2338                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2339                 vcpu->arch.guestdbg.last_bp = 0;
2340         }
2341
2342         if (rc) {
2343                 vcpu->guest_debug = 0;
2344                 kvm_s390_clear_bp_data(vcpu);
2345                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2346         }
2347
2348         return rc;
2349 }
2350
2351 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2352                                     struct kvm_mp_state *mp_state)
2353 {
2354         /* CHECK_STOP and LOAD are not supported yet */
2355         return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2356                                        KVM_MP_STATE_OPERATING;
2357 }
2358
2359 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2360                                     struct kvm_mp_state *mp_state)
2361 {
2362         int rc = 0;
2363
2364         /* user space knows about this interface - let it control the state */
2365         vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2366
2367         switch (mp_state->mp_state) {
2368         case KVM_MP_STATE_STOPPED:
2369                 kvm_s390_vcpu_stop(vcpu);
2370                 break;
2371         case KVM_MP_STATE_OPERATING:
2372                 kvm_s390_vcpu_start(vcpu);
2373                 break;
2374         case KVM_MP_STATE_LOAD:
2375         case KVM_MP_STATE_CHECK_STOP:
2376                 /* fall through - CHECK_STOP and LOAD are not supported yet */
2377         default:
2378                 rc = -ENXIO;
2379         }
2380
2381         return rc;
2382 }
2383
2384 static bool ibs_enabled(struct kvm_vcpu *vcpu)
2385 {
2386         return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
2387 }
2388
2389 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2390 {
2391 retry:
2392         kvm_s390_vcpu_request_handled(vcpu);
2393         if (!vcpu->requests)
2394                 return 0;
2395         /*
2396          * We use MMU_RELOAD just to re-arm the ipte notifier for the
2397          * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
2398          * This ensures that the ipte instruction for this request has
2399          * already finished. We might race against a second unmapper that
2400          * wants to set the blocking bit. Lets just retry the request loop.
2401          */
2402         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2403                 int rc;
2404                 rc = gmap_mprotect_notify(vcpu->arch.gmap,
2405                                           kvm_s390_get_prefix(vcpu),
2406                                           PAGE_SIZE * 2, PROT_WRITE);
2407                 if (rc) {
2408                         kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
2409                         return rc;
2410                 }
2411                 goto retry;
2412         }
2413
2414         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2415                 vcpu->arch.sie_block->ihcpu = 0xffff;
2416                 goto retry;
2417         }
2418
2419         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
2420                 if (!ibs_enabled(vcpu)) {
2421                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
2422                         atomic_or(CPUSTAT_IBS,
2423                                         &vcpu->arch.sie_block->cpuflags);
2424                 }
2425                 goto retry;
2426         }
2427
2428         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2429                 if (ibs_enabled(vcpu)) {
2430                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2431                         atomic_andnot(CPUSTAT_IBS,
2432                                           &vcpu->arch.sie_block->cpuflags);
2433                 }
2434                 goto retry;
2435         }
2436
2437         if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
2438                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2439                 goto retry;
2440         }
2441
2442         /* nothing to do, just clear the request */
2443         clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
2444
2445         return 0;
2446 }
2447
2448 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2449 {
2450         struct kvm_vcpu *vcpu;
2451         int i;
2452
2453         mutex_lock(&kvm->lock);
2454         preempt_disable();
2455         kvm->arch.epoch = tod - get_tod_clock();
2456         kvm_s390_vcpu_block_all(kvm);
2457         kvm_for_each_vcpu(i, vcpu, kvm)
2458                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2459         kvm_s390_vcpu_unblock_all(kvm);
2460         preempt_enable();
2461         mutex_unlock(&kvm->lock);
2462 }
2463
2464 /**
2465  * kvm_arch_fault_in_page - fault-in guest page if necessary
2466  * @vcpu: The corresponding virtual cpu
2467  * @gpa: Guest physical address
2468  * @writable: Whether the page should be writable or not
2469  *
2470  * Make sure that a guest page has been faulted-in on the host.
2471  *
2472  * Return: Zero on success, negative error code otherwise.
2473  */
2474 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
2475 {
2476         return gmap_fault(vcpu->arch.gmap, gpa,
2477                           writable ? FAULT_FLAG_WRITE : 0);
2478 }
2479
2480 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
2481                                       unsigned long token)
2482 {
2483         struct kvm_s390_interrupt inti;
2484         struct kvm_s390_irq irq;
2485
2486         if (start_token) {
2487                 irq.u.ext.ext_params2 = token;
2488                 irq.type = KVM_S390_INT_PFAULT_INIT;
2489                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
2490         } else {
2491                 inti.type = KVM_S390_INT_PFAULT_DONE;
2492                 inti.parm64 = token;
2493                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
2494         }
2495 }
2496
2497 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
2498                                      struct kvm_async_pf *work)
2499 {
2500         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
2501         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
2502 }
2503
2504 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
2505                                  struct kvm_async_pf *work)
2506 {
2507         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
2508         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
2509 }
2510
2511 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
2512                                struct kvm_async_pf *work)
2513 {
2514         /* s390 will always inject the page directly */
2515 }
2516
2517 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
2518 {
2519         /*
2520          * s390 will always inject the page directly,
2521          * but we still want check_async_completion to cleanup
2522          */
2523         return true;
2524 }
2525
2526 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
2527 {
2528         hva_t hva;
2529         struct kvm_arch_async_pf arch;
2530         int rc;
2531
2532         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2533                 return 0;
2534         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
2535             vcpu->arch.pfault_compare)
2536                 return 0;
2537         if (psw_extint_disabled(vcpu))
2538                 return 0;
2539         if (kvm_s390_vcpu_has_irq(vcpu, 0))
2540                 return 0;
2541         if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
2542                 return 0;
2543         if (!vcpu->arch.gmap->pfault_enabled)
2544                 return 0;
2545
2546         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
2547         hva += current->thread.gmap_addr & ~PAGE_MASK;
2548         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
2549                 return 0;
2550
2551         rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
2552         return rc;
2553 }
2554
2555 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
2556 {
2557         int rc, cpuflags;
2558
2559         /*
2560          * On s390 notifications for arriving pages will be delivered directly
2561          * to the guest but the house keeping for completed pfaults is
2562          * handled outside the worker.
2563          */
2564         kvm_check_async_pf_completion(vcpu);
2565
2566         vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
2567         vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
2568
2569         if (need_resched())
2570                 schedule();
2571
2572         if (test_cpu_flag(CIF_MCCK_PENDING))
2573                 s390_handle_mcck();
2574
2575         if (!kvm_is_ucontrol(vcpu->kvm)) {
2576                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
2577                 if (rc)
2578                         return rc;
2579         }
2580
2581         rc = kvm_s390_handle_requests(vcpu);
2582         if (rc)
2583                 return rc;
2584
2585         if (guestdbg_enabled(vcpu)) {
2586                 kvm_s390_backup_guest_per_regs(vcpu);
2587                 kvm_s390_patch_guest_per_regs(vcpu);
2588         }
2589
2590         vcpu->arch.sie_block->icptcode = 0;
2591         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
2592         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
2593         trace_kvm_s390_sie_enter(vcpu, cpuflags);
2594
2595         return 0;
2596 }
2597
2598 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
2599 {
2600         struct kvm_s390_pgm_info pgm_info = {
2601                 .code = PGM_ADDRESSING,
2602         };
2603         u8 opcode, ilen;
2604         int rc;
2605
2606         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
2607         trace_kvm_s390_sie_fault(vcpu);
2608
2609         /*
2610          * We want to inject an addressing exception, which is defined as a
2611          * suppressing or terminating exception. However, since we came here
2612          * by a DAT access exception, the PSW still points to the faulting
2613          * instruction since DAT exceptions are nullifying. So we've got
2614          * to look up the current opcode to get the length of the instruction
2615          * to be able to forward the PSW.
2616          */
2617         rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
2618         ilen = insn_length(opcode);
2619         if (rc < 0) {
2620                 return rc;
2621         } else if (rc) {
2622                 /* Instruction-Fetching Exceptions - we can't detect the ilen.
2623                  * Forward by arbitrary ilc, injection will take care of
2624                  * nullification if necessary.
2625                  */
2626                 pgm_info = vcpu->arch.pgm;
2627                 ilen = 4;
2628         }
2629         pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
2630         kvm_s390_forward_psw(vcpu, ilen);
2631         return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
2632 }
2633
2634 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
2635 {
2636         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
2637                    vcpu->arch.sie_block->icptcode);
2638         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
2639
2640         if (guestdbg_enabled(vcpu))
2641                 kvm_s390_restore_guest_per_regs(vcpu);
2642
2643         vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
2644         vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
2645
2646         if (vcpu->arch.sie_block->icptcode > 0) {
2647                 int rc = kvm_handle_sie_intercept(vcpu);
2648
2649                 if (rc != -EOPNOTSUPP)
2650                         return rc;
2651                 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
2652                 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
2653                 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
2654                 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
2655                 return -EREMOTE;
2656         } else if (exit_reason != -EFAULT) {
2657                 vcpu->stat.exit_null++;
2658                 return 0;
2659         } else if (kvm_is_ucontrol(vcpu->kvm)) {
2660                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
2661                 vcpu->run->s390_ucontrol.trans_exc_code =
2662                                                 current->thread.gmap_addr;
2663                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
2664                 return -EREMOTE;
2665         } else if (current->thread.gmap_pfault) {
2666                 trace_kvm_s390_major_guest_pfault(vcpu);
2667                 current->thread.gmap_pfault = 0;
2668                 if (kvm_arch_setup_async_pf(vcpu))
2669                         return 0;
2670                 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
2671         }
2672         return vcpu_post_run_fault_in_sie(vcpu);
2673 }
2674
2675 static int __vcpu_run(struct kvm_vcpu *vcpu)
2676 {
2677         int rc, exit_reason;
2678
2679         /*
2680          * We try to hold kvm->srcu during most of vcpu_run (except when run-
2681          * ning the guest), so that memslots (and other stuff) are protected
2682          */
2683         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2684
2685         do {
2686                 rc = vcpu_pre_run(vcpu);
2687                 if (rc)
2688                         break;
2689
2690                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2691                 /*
2692                  * As PF_VCPU will be used in fault handler, between
2693                  * guest_enter and guest_exit should be no uaccess.
2694                  */
2695                 local_irq_disable();
2696                 guest_enter_irqoff();
2697                 __disable_cpu_timer_accounting(vcpu);
2698                 local_irq_enable();
2699                 exit_reason = sie64a(vcpu->arch.sie_block,
2700                                      vcpu->run->s.regs.gprs);
2701                 local_irq_disable();
2702                 __enable_cpu_timer_accounting(vcpu);
2703                 guest_exit_irqoff();
2704                 local_irq_enable();
2705                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2706
2707                 rc = vcpu_post_run(vcpu, exit_reason);
2708         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
2709
2710         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2711         return rc;
2712 }
2713
2714 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2715 {
2716         vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
2717         vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
2718         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
2719                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
2720         if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
2721                 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
2722                 /* some control register changes require a tlb flush */
2723                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2724         }
2725         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
2726                 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
2727                 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
2728                 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
2729                 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
2730                 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
2731         }
2732         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
2733                 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
2734                 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
2735                 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
2736                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2737                         kvm_clear_async_pf_completion_queue(vcpu);
2738         }
2739         /*
2740          * If userspace sets the riccb (e.g. after migration) to a valid state,
2741          * we should enable RI here instead of doing the lazy enablement.
2742          */
2743         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
2744             test_kvm_facility(vcpu->kvm, 64)) {
2745                 struct runtime_instr_cb *riccb =
2746                         (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
2747
2748                 if (riccb->valid)
2749                         vcpu->arch.sie_block->ecb3 |= 0x01;
2750         }
2751         save_access_regs(vcpu->arch.host_acrs);
2752         restore_access_regs(vcpu->run->s.regs.acrs);
2753         /* save host (userspace) fprs/vrs */
2754         save_fpu_regs();
2755         vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
2756         vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
2757         if (MACHINE_HAS_VX)
2758                 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
2759         else
2760                 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
2761         current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
2762         if (test_fp_ctl(current->thread.fpu.fpc))
2763                 /* User space provided an invalid FPC, let's clear it */
2764                 current->thread.fpu.fpc = 0;
2765
2766         kvm_run->kvm_dirty_regs = 0;
2767 }
2768
2769 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2770 {
2771         kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
2772         kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
2773         kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
2774         memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
2775         kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
2776         kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
2777         kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
2778         kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
2779         kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
2780         kvm_run->s.regs.pft = vcpu->arch.pfault_token;
2781         kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
2782         kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
2783         save_access_regs(vcpu->run->s.regs.acrs);
2784         restore_access_regs(vcpu->arch.host_acrs);
2785         /* Save guest register state */
2786         save_fpu_regs();
2787         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
2788         /* Restore will be done lazily at return */
2789         current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
2790         current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
2791
2792 }
2793
2794 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2795 {
2796         int rc;
2797         sigset_t sigsaved;
2798
2799         if (kvm_run->immediate_exit)
2800                 return -EINTR;
2801
2802         if (guestdbg_exit_pending(vcpu)) {
2803                 kvm_s390_prepare_debug_exit(vcpu);
2804                 return 0;
2805         }
2806
2807         if (vcpu->sigset_active)
2808                 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
2809
2810         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
2811                 kvm_s390_vcpu_start(vcpu);
2812         } else if (is_vcpu_stopped(vcpu)) {
2813                 pr_err_ratelimited("can't run stopped vcpu %d\n",
2814                                    vcpu->vcpu_id);
2815                 return -EINVAL;
2816         }
2817
2818         sync_regs(vcpu, kvm_run);
2819         enable_cpu_timer_accounting(vcpu);
2820
2821         might_fault();
2822         rc = __vcpu_run(vcpu);
2823
2824         if (signal_pending(current) && !rc) {
2825                 kvm_run->exit_reason = KVM_EXIT_INTR;
2826                 rc = -EINTR;
2827         }
2828
2829         if (guestdbg_exit_pending(vcpu) && !rc)  {
2830                 kvm_s390_prepare_debug_exit(vcpu);
2831                 rc = 0;
2832         }
2833
2834         if (rc == -EREMOTE) {
2835                 /* userspace support is needed, kvm_run has been prepared */
2836                 rc = 0;
2837         }
2838
2839         disable_cpu_timer_accounting(vcpu);
2840         store_regs(vcpu, kvm_run);
2841
2842         if (vcpu->sigset_active)
2843                 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
2844
2845         vcpu->stat.exit_userspace++;
2846         return rc;
2847 }
2848
2849 /*
2850  * store status at address
2851  * we use have two special cases:
2852  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
2853  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
2854  */
2855 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
2856 {
2857         unsigned char archmode = 1;
2858         freg_t fprs[NUM_FPRS];
2859         unsigned int px;
2860         u64 clkcomp, cputm;
2861         int rc;
2862
2863         px = kvm_s390_get_prefix(vcpu);
2864         if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
2865                 if (write_guest_abs(vcpu, 163, &archmode, 1))
2866                         return -EFAULT;
2867                 gpa = 0;
2868         } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
2869                 if (write_guest_real(vcpu, 163, &archmode, 1))
2870                         return -EFAULT;
2871                 gpa = px;
2872         } else
2873                 gpa -= __LC_FPREGS_SAVE_AREA;
2874
2875         /* manually convert vector registers if necessary */
2876         if (MACHINE_HAS_VX) {
2877                 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
2878                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2879                                      fprs, 128);
2880         } else {
2881                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2882                                      vcpu->run->s.regs.fprs, 128);
2883         }
2884         rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
2885                               vcpu->run->s.regs.gprs, 128);
2886         rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
2887                               &vcpu->arch.sie_block->gpsw, 16);
2888         rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
2889                               &px, 4);
2890         rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
2891                               &vcpu->run->s.regs.fpc, 4);
2892         rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
2893                               &vcpu->arch.sie_block->todpr, 4);
2894         cputm = kvm_s390_get_cpu_timer(vcpu);
2895         rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
2896                               &cputm, 8);
2897         clkcomp = vcpu->arch.sie_block->ckc >> 8;
2898         rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
2899                               &clkcomp, 8);
2900         rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
2901                               &vcpu->run->s.regs.acrs, 64);
2902         rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
2903                               &vcpu->arch.sie_block->gcr, 128);
2904         return rc ? -EFAULT : 0;
2905 }
2906
2907 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
2908 {
2909         /*
2910          * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
2911          * switch in the run ioctl. Let's update our copies before we save
2912          * it into the save area
2913          */
2914         save_fpu_regs();
2915         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
2916         save_access_regs(vcpu->run->s.regs.acrs);
2917
2918         return kvm_s390_store_status_unloaded(vcpu, addr);
2919 }
2920
2921 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2922 {
2923         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
2924         kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
2925 }
2926
2927 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
2928 {
2929         unsigned int i;
2930         struct kvm_vcpu *vcpu;
2931
2932         kvm_for_each_vcpu(i, vcpu, kvm) {
2933                 __disable_ibs_on_vcpu(vcpu);
2934         }
2935 }
2936
2937 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2938 {
2939         if (!sclp.has_ibs)
2940                 return;
2941         kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
2942         kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
2943 }
2944
2945 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
2946 {
2947         int i, online_vcpus, started_vcpus = 0;
2948
2949         if (!is_vcpu_stopped(vcpu))
2950                 return;
2951
2952         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
2953         /* Only one cpu at a time may enter/leave the STOPPED state. */
2954         spin_lock(&vcpu->kvm->arch.start_stop_lock);
2955         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2956
2957         for (i = 0; i < online_vcpus; i++) {
2958                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
2959                         started_vcpus++;
2960         }
2961
2962         if (started_vcpus == 0) {
2963                 /* we're the only active VCPU -> speed it up */
2964                 __enable_ibs_on_vcpu(vcpu);
2965         } else if (started_vcpus == 1) {
2966                 /*
2967                  * As we are starting a second VCPU, we have to disable
2968                  * the IBS facility on all VCPUs to remove potentially
2969                  * oustanding ENABLE requests.
2970                  */
2971                 __disable_ibs_on_all_vcpus(vcpu->kvm);
2972         }
2973
2974         atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2975         /*
2976          * Another VCPU might have used IBS while we were offline.
2977          * Let's play safe and flush the VCPU at startup.
2978          */
2979         kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2980         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2981         return;
2982 }
2983
2984 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
2985 {
2986         int i, online_vcpus, started_vcpus = 0;
2987         struct kvm_vcpu *started_vcpu = NULL;
2988
2989         if (is_vcpu_stopped(vcpu))
2990                 return;
2991
2992         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
2993         /* Only one cpu at a time may enter/leave the STOPPED state. */
2994         spin_lock(&vcpu->kvm->arch.start_stop_lock);
2995         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2996
2997         /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
2998         kvm_s390_clear_stop_irq(vcpu);
2999
3000         atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
3001         __disable_ibs_on_vcpu(vcpu);
3002
3003         for (i = 0; i < online_vcpus; i++) {
3004                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
3005                         started_vcpus++;
3006                         started_vcpu = vcpu->kvm->vcpus[i];
3007                 }
3008         }
3009
3010         if (started_vcpus == 1) {
3011                 /*
3012                  * As we only have one VCPU left, we want to enable the
3013                  * IBS facility for that VCPU to speed it up.
3014                  */
3015                 __enable_ibs_on_vcpu(started_vcpu);
3016         }
3017
3018         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3019         return;
3020 }
3021
3022 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
3023                                      struct kvm_enable_cap *cap)
3024 {
3025         int r;
3026
3027         if (cap->flags)
3028                 return -EINVAL;
3029
3030         switch (cap->cap) {
3031         case KVM_CAP_S390_CSS_SUPPORT:
3032                 if (!vcpu->kvm->arch.css_support) {
3033                         vcpu->kvm->arch.css_support = 1;
3034                         VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
3035                         trace_kvm_s390_enable_css(vcpu->kvm);
3036                 }
3037                 r = 0;
3038                 break;
3039         default:
3040                 r = -EINVAL;
3041                 break;
3042         }
3043         return r;
3044 }
3045
3046 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3047                                   struct kvm_s390_mem_op *mop)
3048 {
3049         void __user *uaddr = (void __user *)mop->buf;
3050         void *tmpbuf = NULL;
3051         int r, srcu_idx;
3052         const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3053                                     | KVM_S390_MEMOP_F_CHECK_ONLY;
3054
3055         if (mop->flags & ~supported_flags)
3056                 return -EINVAL;
3057
3058         if (mop->size > MEM_OP_MAX_SIZE)
3059                 return -E2BIG;
3060
3061         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3062                 tmpbuf = vmalloc(mop->size);
3063                 if (!tmpbuf)
3064                         return -ENOMEM;
3065         }
3066
3067         srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3068
3069         switch (mop->op) {
3070         case KVM_S390_MEMOP_LOGICAL_READ:
3071                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3072                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3073                                             mop->size, GACC_FETCH);
3074                         break;
3075                 }
3076                 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3077                 if (r == 0) {
3078                         if (copy_to_user(uaddr, tmpbuf, mop->size))
3079                                 r = -EFAULT;
3080                 }
3081                 break;
3082         case KVM_S390_MEMOP_LOGICAL_WRITE:
3083                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3084                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3085                                             mop->size, GACC_STORE);
3086                         break;
3087                 }
3088                 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
3089                         r = -EFAULT;
3090                         break;
3091                 }
3092                 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3093                 break;
3094         default:
3095                 r = -EINVAL;
3096         }
3097
3098         srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
3099
3100         if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
3101                 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
3102
3103         vfree(tmpbuf);
3104         return r;
3105 }
3106
3107 long kvm_arch_vcpu_ioctl(struct file *filp,
3108                          unsigned int ioctl, unsigned long arg)
3109 {
3110         struct kvm_vcpu *vcpu = filp->private_data;
3111         void __user *argp = (void __user *)arg;
3112         int idx;
3113         long r;
3114
3115         switch (ioctl) {
3116         case KVM_S390_IRQ: {
3117                 struct kvm_s390_irq s390irq;
3118
3119                 r = -EFAULT;
3120                 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3121                         break;
3122                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3123                 break;
3124         }
3125         case KVM_S390_INTERRUPT: {
3126                 struct kvm_s390_interrupt s390int;
3127                 struct kvm_s390_irq s390irq;
3128
3129                 r = -EFAULT;
3130                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
3131                         break;
3132                 if (s390int_to_s390irq(&s390int, &s390irq))
3133                         return -EINVAL;
3134                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3135                 break;
3136         }
3137         case KVM_S390_STORE_STATUS:
3138                 idx = srcu_read_lock(&vcpu->kvm->srcu);
3139                 r = kvm_s390_vcpu_store_status(vcpu, arg);
3140                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
3141                 break;
3142         case KVM_S390_SET_INITIAL_PSW: {
3143                 psw_t psw;
3144
3145                 r = -EFAULT;
3146                 if (copy_from_user(&psw, argp, sizeof(psw)))
3147                         break;
3148                 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3149                 break;
3150         }
3151         case KVM_S390_INITIAL_RESET:
3152                 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3153                 break;
3154         case KVM_SET_ONE_REG:
3155         case KVM_GET_ONE_REG: {
3156                 struct kvm_one_reg reg;
3157                 r = -EFAULT;
3158                 if (copy_from_user(&reg, argp, sizeof(reg)))
3159                         break;
3160                 if (ioctl == KVM_SET_ONE_REG)
3161                         r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
3162                 else
3163                         r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
3164                 break;
3165         }
3166 #ifdef CONFIG_KVM_S390_UCONTROL
3167         case KVM_S390_UCAS_MAP: {
3168                 struct kvm_s390_ucas_mapping ucasmap;
3169
3170                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3171                         r = -EFAULT;
3172                         break;
3173                 }
3174
3175                 if (!kvm_is_ucontrol(vcpu->kvm)) {
3176                         r = -EINVAL;
3177                         break;
3178                 }
3179
3180                 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
3181                                      ucasmap.vcpu_addr, ucasmap.length);
3182                 break;
3183         }
3184         case KVM_S390_UCAS_UNMAP: {
3185                 struct kvm_s390_ucas_mapping ucasmap;
3186
3187                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3188                         r = -EFAULT;
3189                         break;
3190                 }
3191
3192                 if (!kvm_is_ucontrol(vcpu->kvm)) {
3193                         r = -EINVAL;
3194                         break;
3195                 }
3196
3197                 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
3198                         ucasmap.length);
3199                 break;
3200         }
3201 #endif
3202         case KVM_S390_VCPU_FAULT: {
3203                 r = gmap_fault(vcpu->arch.gmap, arg, 0);
3204                 break;
3205         }
3206         case KVM_ENABLE_CAP:
3207         {
3208                 struct kvm_enable_cap cap;
3209                 r = -EFAULT;
3210                 if (copy_from_user(&cap, argp, sizeof(cap)))
3211                         break;
3212                 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
3213                 break;
3214         }
3215         case KVM_S390_MEM_OP: {
3216                 struct kvm_s390_mem_op mem_op;
3217
3218                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3219                         r = kvm_s390_guest_mem_op(vcpu, &mem_op);
3220                 else
3221                         r = -EFAULT;
3222                 break;
3223         }
3224         case KVM_S390_SET_IRQ_STATE: {
3225                 struct kvm_s390_irq_state irq_state;
3226
3227                 r = -EFAULT;
3228                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3229                         break;
3230                 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
3231                     irq_state.len == 0 ||
3232                     irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
3233                         r = -EINVAL;
3234                         break;
3235                 }
3236                 r = kvm_s390_set_irq_state(vcpu,
3237                                            (void __user *) irq_state.buf,
3238                                            irq_state.len);
3239                 break;
3240         }
3241         case KVM_S390_GET_IRQ_STATE: {
3242                 struct kvm_s390_irq_state irq_state;
3243
3244                 r = -EFAULT;
3245                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3246                         break;
3247                 if (irq_state.len == 0) {
3248                         r = -EINVAL;
3249                         break;
3250                 }
3251                 r = kvm_s390_get_irq_state(vcpu,
3252                                            (__u8 __user *)  irq_state.buf,
3253                                            irq_state.len);
3254                 break;
3255         }
3256         default:
3257                 r = -ENOTTY;
3258         }
3259         return r;
3260 }
3261
3262 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3263 {
3264 #ifdef CONFIG_KVM_S390_UCONTROL
3265         if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
3266                  && (kvm_is_ucontrol(vcpu->kvm))) {
3267                 vmf->page = virt_to_page(vcpu->arch.sie_block);
3268                 get_page(vmf->page);
3269                 return 0;
3270         }
3271 #endif
3272         return VM_FAULT_SIGBUS;
3273 }
3274
3275 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
3276                             unsigned long npages)
3277 {
3278         return 0;
3279 }
3280
3281 /* Section: memory related */
3282 int kvm_arch_prepare_memory_region(struct kvm *kvm,
3283                                    struct kvm_memory_slot *memslot,
3284                                    const struct kvm_userspace_memory_region *mem,
3285                                    enum kvm_mr_change change)
3286 {
3287         /* A few sanity checks. We can have memory slots which have to be
3288            located/ended at a segment boundary (1MB). The memory in userland is
3289            ok to be fragmented into various different vmas. It is okay to mmap()
3290            and munmap() stuff in this slot after doing this call at any time */
3291
3292         if (mem->userspace_addr & 0xffffful)
3293                 return -EINVAL;
3294
3295         if (mem->memory_size & 0xffffful)
3296                 return -EINVAL;
3297
3298         if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
3299                 return -EINVAL;
3300
3301         return 0;
3302 }
3303
3304 void kvm_arch_commit_memory_region(struct kvm *kvm,
3305                                 const struct kvm_userspace_memory_region *mem,
3306                                 const struct kvm_memory_slot *old,
3307                                 const struct kvm_memory_slot *new,
3308                                 enum kvm_mr_change change)
3309 {
3310         int rc;
3311
3312         /* If the basics of the memslot do not change, we do not want
3313          * to update the gmap. Every update causes several unnecessary
3314          * segment translation exceptions. This is usually handled just
3315          * fine by the normal fault handler + gmap, but it will also
3316          * cause faults on the prefix page of running guest CPUs.
3317          */
3318         if (old->userspace_addr == mem->userspace_addr &&
3319             old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
3320             old->npages * PAGE_SIZE == mem->memory_size)
3321                 return;
3322
3323         rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
3324                 mem->guest_phys_addr, mem->memory_size);
3325         if (rc)
3326                 pr_warn("failed to commit memory region\n");
3327         return;
3328 }
3329
3330 static inline unsigned long nonhyp_mask(int i)
3331 {
3332         unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
3333
3334         return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
3335 }
3336
3337 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
3338 {
3339         vcpu->valid_wakeup = false;
3340 }
3341
3342 static int __init kvm_s390_init(void)
3343 {
3344         int i;
3345
3346         if (!sclp.has_sief2) {
3347                 pr_info("SIE not available\n");
3348                 return -ENODEV;
3349         }
3350
3351         for (i = 0; i < 16; i++)
3352                 kvm_s390_fac_list_mask[i] |=
3353                         S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
3354
3355         return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
3356 }
3357
3358 static void __exit kvm_s390_exit(void)
3359 {
3360         kvm_exit();
3361 }
3362
3363 module_init(kvm_s390_init);
3364 module_exit(kvm_s390_exit);
3365
3366 /*
3367  * Enable autoloading of the kvm module.
3368  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
3369  * since x86 takes a different approach.
3370  */
3371 #include <linux/miscdevice.h>
3372 MODULE_ALIAS_MISCDEV(KVM_MINOR);
3373 MODULE_ALIAS("devname:kvm");