]> git.karo-electronics.de Git - karo-tx-linux.git/blob - arch/s390/kvm/kvm-s390.c
Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso...
[karo-tx-linux.git] / arch / s390 / kvm / kvm-s390.c
1 /*
2  * hosting zSeries kernel virtual machines
3  *
4  * Copyright IBM Corp. 2008, 2009
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License (version 2 only)
8  * as published by the Free Software Foundation.
9  *
10  *    Author(s): Carsten Otte <cotte@de.ibm.com>
11  *               Christian Borntraeger <borntraeger@de.ibm.com>
12  *               Heiko Carstens <heiko.carstens@de.ibm.com>
13  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
14  *               Jason J. Herne <jjherne@us.ibm.com>
15  */
16
17 #include <linux/compiler.h>
18 #include <linux/err.h>
19 #include <linux/fs.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/mman.h>
25 #include <linux/module.h>
26 #include <linux/moduleparam.h>
27 #include <linux/random.h>
28 #include <linux/slab.h>
29 #include <linux/timer.h>
30 #include <linux/vmalloc.h>
31 #include <linux/bitmap.h>
32 #include <linux/sched/signal.h>
33
34 #include <asm/asm-offsets.h>
35 #include <asm/lowcore.h>
36 #include <asm/stp.h>
37 #include <asm/pgtable.h>
38 #include <asm/gmap.h>
39 #include <asm/nmi.h>
40 #include <asm/switch_to.h>
41 #include <asm/isc.h>
42 #include <asm/sclp.h>
43 #include <asm/cpacf.h>
44 #include <asm/timex.h>
45 #include "kvm-s390.h"
46 #include "gaccess.h"
47
48 #define KMSG_COMPONENT "kvm-s390"
49 #undef pr_fmt
50 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
51
52 #define CREATE_TRACE_POINTS
53 #include "trace.h"
54 #include "trace-s390.h"
55
56 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
57 #define LOCAL_IRQS 32
58 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
59                            (KVM_MAX_VCPUS + LOCAL_IRQS))
60
61 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
62
63 struct kvm_stats_debugfs_item debugfs_entries[] = {
64         { "userspace_handled", VCPU_STAT(exit_userspace) },
65         { "exit_null", VCPU_STAT(exit_null) },
66         { "exit_validity", VCPU_STAT(exit_validity) },
67         { "exit_stop_request", VCPU_STAT(exit_stop_request) },
68         { "exit_external_request", VCPU_STAT(exit_external_request) },
69         { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
70         { "exit_instruction", VCPU_STAT(exit_instruction) },
71         { "exit_pei", VCPU_STAT(exit_pei) },
72         { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
73         { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
74         { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
75         { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
76         { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
77         { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
78         { "halt_wakeup", VCPU_STAT(halt_wakeup) },
79         { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
80         { "instruction_lctl", VCPU_STAT(instruction_lctl) },
81         { "instruction_stctl", VCPU_STAT(instruction_stctl) },
82         { "instruction_stctg", VCPU_STAT(instruction_stctg) },
83         { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
84         { "deliver_external_call", VCPU_STAT(deliver_external_call) },
85         { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
86         { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
87         { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
88         { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
89         { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
90         { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
91         { "exit_wait_state", VCPU_STAT(exit_wait_state) },
92         { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
93         { "instruction_stidp", VCPU_STAT(instruction_stidp) },
94         { "instruction_spx", VCPU_STAT(instruction_spx) },
95         { "instruction_stpx", VCPU_STAT(instruction_stpx) },
96         { "instruction_stap", VCPU_STAT(instruction_stap) },
97         { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
98         { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
99         { "instruction_stsch", VCPU_STAT(instruction_stsch) },
100         { "instruction_chsc", VCPU_STAT(instruction_chsc) },
101         { "instruction_essa", VCPU_STAT(instruction_essa) },
102         { "instruction_stsi", VCPU_STAT(instruction_stsi) },
103         { "instruction_stfl", VCPU_STAT(instruction_stfl) },
104         { "instruction_tprot", VCPU_STAT(instruction_tprot) },
105         { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
106         { "instruction_sie", VCPU_STAT(instruction_sie) },
107         { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
108         { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
109         { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
110         { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
111         { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
112         { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
113         { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
114         { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
115         { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
116         { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
117         { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
118         { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
119         { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
120         { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
121         { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
122         { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
123         { "diagnose_10", VCPU_STAT(diagnose_10) },
124         { "diagnose_44", VCPU_STAT(diagnose_44) },
125         { "diagnose_9c", VCPU_STAT(diagnose_9c) },
126         { "diagnose_258", VCPU_STAT(diagnose_258) },
127         { "diagnose_308", VCPU_STAT(diagnose_308) },
128         { "diagnose_500", VCPU_STAT(diagnose_500) },
129         { NULL }
130 };
131
132 /* allow nested virtualization in KVM (if enabled by user space) */
133 static int nested;
134 module_param(nested, int, S_IRUGO);
135 MODULE_PARM_DESC(nested, "Nested virtualization support");
136
137 /* upper facilities limit for kvm */
138 unsigned long kvm_s390_fac_list_mask[16] = { FACILITIES_KVM };
139
140 unsigned long kvm_s390_fac_list_mask_size(void)
141 {
142         BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
143         return ARRAY_SIZE(kvm_s390_fac_list_mask);
144 }
145
146 /* available cpu features supported by kvm */
147 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
148 /* available subfunctions indicated via query / "test bit" */
149 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
150
151 static struct gmap_notifier gmap_notifier;
152 static struct gmap_notifier vsie_gmap_notifier;
153 debug_info_t *kvm_s390_dbf;
154
155 /* Section: not file related */
156 int kvm_arch_hardware_enable(void)
157 {
158         /* every s390 is virtualization enabled ;-) */
159         return 0;
160 }
161
162 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
163                               unsigned long end);
164
165 /*
166  * This callback is executed during stop_machine(). All CPUs are therefore
167  * temporarily stopped. In order not to change guest behavior, we have to
168  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
169  * so a CPU won't be stopped while calculating with the epoch.
170  */
171 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
172                           void *v)
173 {
174         struct kvm *kvm;
175         struct kvm_vcpu *vcpu;
176         int i;
177         unsigned long long *delta = v;
178
179         list_for_each_entry(kvm, &vm_list, vm_list) {
180                 kvm->arch.epoch -= *delta;
181                 kvm_for_each_vcpu(i, vcpu, kvm) {
182                         vcpu->arch.sie_block->epoch -= *delta;
183                         if (vcpu->arch.cputm_enabled)
184                                 vcpu->arch.cputm_start += *delta;
185                         if (vcpu->arch.vsie_block)
186                                 vcpu->arch.vsie_block->epoch -= *delta;
187                 }
188         }
189         return NOTIFY_OK;
190 }
191
192 static struct notifier_block kvm_clock_notifier = {
193         .notifier_call = kvm_clock_sync,
194 };
195
196 int kvm_arch_hardware_setup(void)
197 {
198         gmap_notifier.notifier_call = kvm_gmap_notifier;
199         gmap_register_pte_notifier(&gmap_notifier);
200         vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
201         gmap_register_pte_notifier(&vsie_gmap_notifier);
202         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
203                                        &kvm_clock_notifier);
204         return 0;
205 }
206
207 void kvm_arch_hardware_unsetup(void)
208 {
209         gmap_unregister_pte_notifier(&gmap_notifier);
210         gmap_unregister_pte_notifier(&vsie_gmap_notifier);
211         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
212                                          &kvm_clock_notifier);
213 }
214
215 static void allow_cpu_feat(unsigned long nr)
216 {
217         set_bit_inv(nr, kvm_s390_available_cpu_feat);
218 }
219
220 static inline int plo_test_bit(unsigned char nr)
221 {
222         register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
223         int cc;
224
225         asm volatile(
226                 /* Parameter registers are ignored for "test bit" */
227                 "       plo     0,0,0,0(0)\n"
228                 "       ipm     %0\n"
229                 "       srl     %0,28\n"
230                 : "=d" (cc)
231                 : "d" (r0)
232                 : "cc");
233         return cc == 0;
234 }
235
236 static void kvm_s390_cpu_feat_init(void)
237 {
238         int i;
239
240         for (i = 0; i < 256; ++i) {
241                 if (plo_test_bit(i))
242                         kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
243         }
244
245         if (test_facility(28)) /* TOD-clock steering */
246                 ptff(kvm_s390_available_subfunc.ptff,
247                      sizeof(kvm_s390_available_subfunc.ptff),
248                      PTFF_QAF);
249
250         if (test_facility(17)) { /* MSA */
251                 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
252                               kvm_s390_available_subfunc.kmac);
253                 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
254                               kvm_s390_available_subfunc.kmc);
255                 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
256                               kvm_s390_available_subfunc.km);
257                 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
258                               kvm_s390_available_subfunc.kimd);
259                 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
260                               kvm_s390_available_subfunc.klmd);
261         }
262         if (test_facility(76)) /* MSA3 */
263                 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
264                               kvm_s390_available_subfunc.pckmo);
265         if (test_facility(77)) { /* MSA4 */
266                 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
267                               kvm_s390_available_subfunc.kmctr);
268                 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
269                               kvm_s390_available_subfunc.kmf);
270                 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
271                               kvm_s390_available_subfunc.kmo);
272                 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
273                               kvm_s390_available_subfunc.pcc);
274         }
275         if (test_facility(57)) /* MSA5 */
276                 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
277                               kvm_s390_available_subfunc.ppno);
278
279         if (MACHINE_HAS_ESOP)
280                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
281         /*
282          * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
283          * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
284          */
285         if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
286             !test_facility(3) || !nested)
287                 return;
288         allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
289         if (sclp.has_64bscao)
290                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
291         if (sclp.has_siif)
292                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
293         if (sclp.has_gpere)
294                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
295         if (sclp.has_gsls)
296                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
297         if (sclp.has_ib)
298                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
299         if (sclp.has_cei)
300                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
301         if (sclp.has_ibs)
302                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
303         /*
304          * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
305          * all skey handling functions read/set the skey from the PGSTE
306          * instead of the real storage key.
307          *
308          * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
309          * pages being detected as preserved although they are resident.
310          *
311          * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
312          * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
313          *
314          * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
315          * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
316          * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
317          *
318          * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
319          * cannot easily shadow the SCA because of the ipte lock.
320          */
321 }
322
323 int kvm_arch_init(void *opaque)
324 {
325         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
326         if (!kvm_s390_dbf)
327                 return -ENOMEM;
328
329         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
330                 debug_unregister(kvm_s390_dbf);
331                 return -ENOMEM;
332         }
333
334         kvm_s390_cpu_feat_init();
335
336         /* Register floating interrupt controller interface. */
337         return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
338 }
339
340 void kvm_arch_exit(void)
341 {
342         debug_unregister(kvm_s390_dbf);
343 }
344
345 /* Section: device related */
346 long kvm_arch_dev_ioctl(struct file *filp,
347                         unsigned int ioctl, unsigned long arg)
348 {
349         if (ioctl == KVM_S390_ENABLE_SIE)
350                 return s390_enable_sie();
351         return -EINVAL;
352 }
353
354 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
355 {
356         int r;
357
358         switch (ext) {
359         case KVM_CAP_S390_PSW:
360         case KVM_CAP_S390_GMAP:
361         case KVM_CAP_SYNC_MMU:
362 #ifdef CONFIG_KVM_S390_UCONTROL
363         case KVM_CAP_S390_UCONTROL:
364 #endif
365         case KVM_CAP_ASYNC_PF:
366         case KVM_CAP_SYNC_REGS:
367         case KVM_CAP_ONE_REG:
368         case KVM_CAP_ENABLE_CAP:
369         case KVM_CAP_S390_CSS_SUPPORT:
370         case KVM_CAP_IOEVENTFD:
371         case KVM_CAP_DEVICE_CTRL:
372         case KVM_CAP_ENABLE_CAP_VM:
373         case KVM_CAP_S390_IRQCHIP:
374         case KVM_CAP_VM_ATTRIBUTES:
375         case KVM_CAP_MP_STATE:
376         case KVM_CAP_IMMEDIATE_EXIT:
377         case KVM_CAP_S390_INJECT_IRQ:
378         case KVM_CAP_S390_USER_SIGP:
379         case KVM_CAP_S390_USER_STSI:
380         case KVM_CAP_S390_SKEYS:
381         case KVM_CAP_S390_IRQ_STATE:
382         case KVM_CAP_S390_USER_INSTR0:
383                 r = 1;
384                 break;
385         case KVM_CAP_S390_MEM_OP:
386                 r = MEM_OP_MAX_SIZE;
387                 break;
388         case KVM_CAP_NR_VCPUS:
389         case KVM_CAP_MAX_VCPUS:
390                 r = KVM_S390_BSCA_CPU_SLOTS;
391                 if (!kvm_s390_use_sca_entries())
392                         r = KVM_MAX_VCPUS;
393                 else if (sclp.has_esca && sclp.has_64bscao)
394                         r = KVM_S390_ESCA_CPU_SLOTS;
395                 break;
396         case KVM_CAP_NR_MEMSLOTS:
397                 r = KVM_USER_MEM_SLOTS;
398                 break;
399         case KVM_CAP_S390_COW:
400                 r = MACHINE_HAS_ESOP;
401                 break;
402         case KVM_CAP_S390_VECTOR_REGISTERS:
403                 r = MACHINE_HAS_VX;
404                 break;
405         case KVM_CAP_S390_RI:
406                 r = test_facility(64);
407                 break;
408         default:
409                 r = 0;
410         }
411         return r;
412 }
413
414 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
415                                         struct kvm_memory_slot *memslot)
416 {
417         gfn_t cur_gfn, last_gfn;
418         unsigned long address;
419         struct gmap *gmap = kvm->arch.gmap;
420
421         /* Loop over all guest pages */
422         last_gfn = memslot->base_gfn + memslot->npages;
423         for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
424                 address = gfn_to_hva_memslot(memslot, cur_gfn);
425
426                 if (test_and_clear_guest_dirty(gmap->mm, address))
427                         mark_page_dirty(kvm, cur_gfn);
428                 if (fatal_signal_pending(current))
429                         return;
430                 cond_resched();
431         }
432 }
433
434 /* Section: vm related */
435 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
436
437 /*
438  * Get (and clear) the dirty memory log for a memory slot.
439  */
440 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
441                                struct kvm_dirty_log *log)
442 {
443         int r;
444         unsigned long n;
445         struct kvm_memslots *slots;
446         struct kvm_memory_slot *memslot;
447         int is_dirty = 0;
448
449         if (kvm_is_ucontrol(kvm))
450                 return -EINVAL;
451
452         mutex_lock(&kvm->slots_lock);
453
454         r = -EINVAL;
455         if (log->slot >= KVM_USER_MEM_SLOTS)
456                 goto out;
457
458         slots = kvm_memslots(kvm);
459         memslot = id_to_memslot(slots, log->slot);
460         r = -ENOENT;
461         if (!memslot->dirty_bitmap)
462                 goto out;
463
464         kvm_s390_sync_dirty_log(kvm, memslot);
465         r = kvm_get_dirty_log(kvm, log, &is_dirty);
466         if (r)
467                 goto out;
468
469         /* Clear the dirty log */
470         if (is_dirty) {
471                 n = kvm_dirty_bitmap_bytes(memslot);
472                 memset(memslot->dirty_bitmap, 0, n);
473         }
474         r = 0;
475 out:
476         mutex_unlock(&kvm->slots_lock);
477         return r;
478 }
479
480 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
481 {
482         unsigned int i;
483         struct kvm_vcpu *vcpu;
484
485         kvm_for_each_vcpu(i, vcpu, kvm) {
486                 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
487         }
488 }
489
490 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
491 {
492         int r;
493
494         if (cap->flags)
495                 return -EINVAL;
496
497         switch (cap->cap) {
498         case KVM_CAP_S390_IRQCHIP:
499                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
500                 kvm->arch.use_irqchip = 1;
501                 r = 0;
502                 break;
503         case KVM_CAP_S390_USER_SIGP:
504                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
505                 kvm->arch.user_sigp = 1;
506                 r = 0;
507                 break;
508         case KVM_CAP_S390_VECTOR_REGISTERS:
509                 mutex_lock(&kvm->lock);
510                 if (kvm->created_vcpus) {
511                         r = -EBUSY;
512                 } else if (MACHINE_HAS_VX) {
513                         set_kvm_facility(kvm->arch.model.fac_mask, 129);
514                         set_kvm_facility(kvm->arch.model.fac_list, 129);
515                         if (test_facility(134)) {
516                                 set_kvm_facility(kvm->arch.model.fac_mask, 134);
517                                 set_kvm_facility(kvm->arch.model.fac_list, 134);
518                         }
519                         if (test_facility(135)) {
520                                 set_kvm_facility(kvm->arch.model.fac_mask, 135);
521                                 set_kvm_facility(kvm->arch.model.fac_list, 135);
522                         }
523                         r = 0;
524                 } else
525                         r = -EINVAL;
526                 mutex_unlock(&kvm->lock);
527                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
528                          r ? "(not available)" : "(success)");
529                 break;
530         case KVM_CAP_S390_RI:
531                 r = -EINVAL;
532                 mutex_lock(&kvm->lock);
533                 if (kvm->created_vcpus) {
534                         r = -EBUSY;
535                 } else if (test_facility(64)) {
536                         set_kvm_facility(kvm->arch.model.fac_mask, 64);
537                         set_kvm_facility(kvm->arch.model.fac_list, 64);
538                         r = 0;
539                 }
540                 mutex_unlock(&kvm->lock);
541                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
542                          r ? "(not available)" : "(success)");
543                 break;
544         case KVM_CAP_S390_USER_STSI:
545                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
546                 kvm->arch.user_stsi = 1;
547                 r = 0;
548                 break;
549         case KVM_CAP_S390_USER_INSTR0:
550                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
551                 kvm->arch.user_instr0 = 1;
552                 icpt_operexc_on_all_vcpus(kvm);
553                 r = 0;
554                 break;
555         default:
556                 r = -EINVAL;
557                 break;
558         }
559         return r;
560 }
561
562 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
563 {
564         int ret;
565
566         switch (attr->attr) {
567         case KVM_S390_VM_MEM_LIMIT_SIZE:
568                 ret = 0;
569                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
570                          kvm->arch.mem_limit);
571                 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
572                         ret = -EFAULT;
573                 break;
574         default:
575                 ret = -ENXIO;
576                 break;
577         }
578         return ret;
579 }
580
581 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
582 {
583         int ret;
584         unsigned int idx;
585         switch (attr->attr) {
586         case KVM_S390_VM_MEM_ENABLE_CMMA:
587                 ret = -ENXIO;
588                 if (!sclp.has_cmma)
589                         break;
590
591                 ret = -EBUSY;
592                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
593                 mutex_lock(&kvm->lock);
594                 if (!kvm->created_vcpus) {
595                         kvm->arch.use_cmma = 1;
596                         ret = 0;
597                 }
598                 mutex_unlock(&kvm->lock);
599                 break;
600         case KVM_S390_VM_MEM_CLR_CMMA:
601                 ret = -ENXIO;
602                 if (!sclp.has_cmma)
603                         break;
604                 ret = -EINVAL;
605                 if (!kvm->arch.use_cmma)
606                         break;
607
608                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
609                 mutex_lock(&kvm->lock);
610                 idx = srcu_read_lock(&kvm->srcu);
611                 s390_reset_cmma(kvm->arch.gmap->mm);
612                 srcu_read_unlock(&kvm->srcu, idx);
613                 mutex_unlock(&kvm->lock);
614                 ret = 0;
615                 break;
616         case KVM_S390_VM_MEM_LIMIT_SIZE: {
617                 unsigned long new_limit;
618
619                 if (kvm_is_ucontrol(kvm))
620                         return -EINVAL;
621
622                 if (get_user(new_limit, (u64 __user *)attr->addr))
623                         return -EFAULT;
624
625                 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
626                     new_limit > kvm->arch.mem_limit)
627                         return -E2BIG;
628
629                 if (!new_limit)
630                         return -EINVAL;
631
632                 /* gmap_create takes last usable address */
633                 if (new_limit != KVM_S390_NO_MEM_LIMIT)
634                         new_limit -= 1;
635
636                 ret = -EBUSY;
637                 mutex_lock(&kvm->lock);
638                 if (!kvm->created_vcpus) {
639                         /* gmap_create will round the limit up */
640                         struct gmap *new = gmap_create(current->mm, new_limit);
641
642                         if (!new) {
643                                 ret = -ENOMEM;
644                         } else {
645                                 gmap_remove(kvm->arch.gmap);
646                                 new->private = kvm;
647                                 kvm->arch.gmap = new;
648                                 ret = 0;
649                         }
650                 }
651                 mutex_unlock(&kvm->lock);
652                 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
653                 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
654                          (void *) kvm->arch.gmap->asce);
655                 break;
656         }
657         default:
658                 ret = -ENXIO;
659                 break;
660         }
661         return ret;
662 }
663
664 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
665
666 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
667 {
668         struct kvm_vcpu *vcpu;
669         int i;
670
671         if (!test_kvm_facility(kvm, 76))
672                 return -EINVAL;
673
674         mutex_lock(&kvm->lock);
675         switch (attr->attr) {
676         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
677                 get_random_bytes(
678                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
679                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
680                 kvm->arch.crypto.aes_kw = 1;
681                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
682                 break;
683         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
684                 get_random_bytes(
685                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
686                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
687                 kvm->arch.crypto.dea_kw = 1;
688                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
689                 break;
690         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
691                 kvm->arch.crypto.aes_kw = 0;
692                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
693                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
694                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
695                 break;
696         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
697                 kvm->arch.crypto.dea_kw = 0;
698                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
699                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
700                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
701                 break;
702         default:
703                 mutex_unlock(&kvm->lock);
704                 return -ENXIO;
705         }
706
707         kvm_for_each_vcpu(i, vcpu, kvm) {
708                 kvm_s390_vcpu_crypto_setup(vcpu);
709                 exit_sie(vcpu);
710         }
711         mutex_unlock(&kvm->lock);
712         return 0;
713 }
714
715 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
716 {
717         u8 gtod_high;
718
719         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
720                                            sizeof(gtod_high)))
721                 return -EFAULT;
722
723         if (gtod_high != 0)
724                 return -EINVAL;
725         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
726
727         return 0;
728 }
729
730 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
731 {
732         u64 gtod;
733
734         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
735                 return -EFAULT;
736
737         kvm_s390_set_tod_clock(kvm, gtod);
738         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
739         return 0;
740 }
741
742 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
743 {
744         int ret;
745
746         if (attr->flags)
747                 return -EINVAL;
748
749         switch (attr->attr) {
750         case KVM_S390_VM_TOD_HIGH:
751                 ret = kvm_s390_set_tod_high(kvm, attr);
752                 break;
753         case KVM_S390_VM_TOD_LOW:
754                 ret = kvm_s390_set_tod_low(kvm, attr);
755                 break;
756         default:
757                 ret = -ENXIO;
758                 break;
759         }
760         return ret;
761 }
762
763 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
764 {
765         u8 gtod_high = 0;
766
767         if (copy_to_user((void __user *)attr->addr, &gtod_high,
768                                          sizeof(gtod_high)))
769                 return -EFAULT;
770         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
771
772         return 0;
773 }
774
775 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
776 {
777         u64 gtod;
778
779         gtod = kvm_s390_get_tod_clock_fast(kvm);
780         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
781                 return -EFAULT;
782         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
783
784         return 0;
785 }
786
787 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
788 {
789         int ret;
790
791         if (attr->flags)
792                 return -EINVAL;
793
794         switch (attr->attr) {
795         case KVM_S390_VM_TOD_HIGH:
796                 ret = kvm_s390_get_tod_high(kvm, attr);
797                 break;
798         case KVM_S390_VM_TOD_LOW:
799                 ret = kvm_s390_get_tod_low(kvm, attr);
800                 break;
801         default:
802                 ret = -ENXIO;
803                 break;
804         }
805         return ret;
806 }
807
808 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
809 {
810         struct kvm_s390_vm_cpu_processor *proc;
811         u16 lowest_ibc, unblocked_ibc;
812         int ret = 0;
813
814         mutex_lock(&kvm->lock);
815         if (kvm->created_vcpus) {
816                 ret = -EBUSY;
817                 goto out;
818         }
819         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
820         if (!proc) {
821                 ret = -ENOMEM;
822                 goto out;
823         }
824         if (!copy_from_user(proc, (void __user *)attr->addr,
825                             sizeof(*proc))) {
826                 kvm->arch.model.cpuid = proc->cpuid;
827                 lowest_ibc = sclp.ibc >> 16 & 0xfff;
828                 unblocked_ibc = sclp.ibc & 0xfff;
829                 if (lowest_ibc && proc->ibc) {
830                         if (proc->ibc > unblocked_ibc)
831                                 kvm->arch.model.ibc = unblocked_ibc;
832                         else if (proc->ibc < lowest_ibc)
833                                 kvm->arch.model.ibc = lowest_ibc;
834                         else
835                                 kvm->arch.model.ibc = proc->ibc;
836                 }
837                 memcpy(kvm->arch.model.fac_list, proc->fac_list,
838                        S390_ARCH_FAC_LIST_SIZE_BYTE);
839                 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
840                          kvm->arch.model.ibc,
841                          kvm->arch.model.cpuid);
842                 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
843                          kvm->arch.model.fac_list[0],
844                          kvm->arch.model.fac_list[1],
845                          kvm->arch.model.fac_list[2]);
846         } else
847                 ret = -EFAULT;
848         kfree(proc);
849 out:
850         mutex_unlock(&kvm->lock);
851         return ret;
852 }
853
854 static int kvm_s390_set_processor_feat(struct kvm *kvm,
855                                        struct kvm_device_attr *attr)
856 {
857         struct kvm_s390_vm_cpu_feat data;
858         int ret = -EBUSY;
859
860         if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
861                 return -EFAULT;
862         if (!bitmap_subset((unsigned long *) data.feat,
863                            kvm_s390_available_cpu_feat,
864                            KVM_S390_VM_CPU_FEAT_NR_BITS))
865                 return -EINVAL;
866
867         mutex_lock(&kvm->lock);
868         if (!atomic_read(&kvm->online_vcpus)) {
869                 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
870                             KVM_S390_VM_CPU_FEAT_NR_BITS);
871                 ret = 0;
872         }
873         mutex_unlock(&kvm->lock);
874         return ret;
875 }
876
877 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
878                                           struct kvm_device_attr *attr)
879 {
880         /*
881          * Once supported by kernel + hw, we have to store the subfunctions
882          * in kvm->arch and remember that user space configured them.
883          */
884         return -ENXIO;
885 }
886
887 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
888 {
889         int ret = -ENXIO;
890
891         switch (attr->attr) {
892         case KVM_S390_VM_CPU_PROCESSOR:
893                 ret = kvm_s390_set_processor(kvm, attr);
894                 break;
895         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
896                 ret = kvm_s390_set_processor_feat(kvm, attr);
897                 break;
898         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
899                 ret = kvm_s390_set_processor_subfunc(kvm, attr);
900                 break;
901         }
902         return ret;
903 }
904
905 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
906 {
907         struct kvm_s390_vm_cpu_processor *proc;
908         int ret = 0;
909
910         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
911         if (!proc) {
912                 ret = -ENOMEM;
913                 goto out;
914         }
915         proc->cpuid = kvm->arch.model.cpuid;
916         proc->ibc = kvm->arch.model.ibc;
917         memcpy(&proc->fac_list, kvm->arch.model.fac_list,
918                S390_ARCH_FAC_LIST_SIZE_BYTE);
919         VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
920                  kvm->arch.model.ibc,
921                  kvm->arch.model.cpuid);
922         VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
923                  kvm->arch.model.fac_list[0],
924                  kvm->arch.model.fac_list[1],
925                  kvm->arch.model.fac_list[2]);
926         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
927                 ret = -EFAULT;
928         kfree(proc);
929 out:
930         return ret;
931 }
932
933 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
934 {
935         struct kvm_s390_vm_cpu_machine *mach;
936         int ret = 0;
937
938         mach = kzalloc(sizeof(*mach), GFP_KERNEL);
939         if (!mach) {
940                 ret = -ENOMEM;
941                 goto out;
942         }
943         get_cpu_id((struct cpuid *) &mach->cpuid);
944         mach->ibc = sclp.ibc;
945         memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
946                S390_ARCH_FAC_LIST_SIZE_BYTE);
947         memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
948                sizeof(S390_lowcore.stfle_fac_list));
949         VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
950                  kvm->arch.model.ibc,
951                  kvm->arch.model.cpuid);
952         VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
953                  mach->fac_mask[0],
954                  mach->fac_mask[1],
955                  mach->fac_mask[2]);
956         VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
957                  mach->fac_list[0],
958                  mach->fac_list[1],
959                  mach->fac_list[2]);
960         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
961                 ret = -EFAULT;
962         kfree(mach);
963 out:
964         return ret;
965 }
966
967 static int kvm_s390_get_processor_feat(struct kvm *kvm,
968                                        struct kvm_device_attr *attr)
969 {
970         struct kvm_s390_vm_cpu_feat data;
971
972         bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
973                     KVM_S390_VM_CPU_FEAT_NR_BITS);
974         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
975                 return -EFAULT;
976         return 0;
977 }
978
979 static int kvm_s390_get_machine_feat(struct kvm *kvm,
980                                      struct kvm_device_attr *attr)
981 {
982         struct kvm_s390_vm_cpu_feat data;
983
984         bitmap_copy((unsigned long *) data.feat,
985                     kvm_s390_available_cpu_feat,
986                     KVM_S390_VM_CPU_FEAT_NR_BITS);
987         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
988                 return -EFAULT;
989         return 0;
990 }
991
992 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
993                                           struct kvm_device_attr *attr)
994 {
995         /*
996          * Once we can actually configure subfunctions (kernel + hw support),
997          * we have to check if they were already set by user space, if so copy
998          * them from kvm->arch.
999          */
1000         return -ENXIO;
1001 }
1002
1003 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1004                                         struct kvm_device_attr *attr)
1005 {
1006         if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1007             sizeof(struct kvm_s390_vm_cpu_subfunc)))
1008                 return -EFAULT;
1009         return 0;
1010 }
1011 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1012 {
1013         int ret = -ENXIO;
1014
1015         switch (attr->attr) {
1016         case KVM_S390_VM_CPU_PROCESSOR:
1017                 ret = kvm_s390_get_processor(kvm, attr);
1018                 break;
1019         case KVM_S390_VM_CPU_MACHINE:
1020                 ret = kvm_s390_get_machine(kvm, attr);
1021                 break;
1022         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1023                 ret = kvm_s390_get_processor_feat(kvm, attr);
1024                 break;
1025         case KVM_S390_VM_CPU_MACHINE_FEAT:
1026                 ret = kvm_s390_get_machine_feat(kvm, attr);
1027                 break;
1028         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1029                 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1030                 break;
1031         case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1032                 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1033                 break;
1034         }
1035         return ret;
1036 }
1037
1038 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1039 {
1040         int ret;
1041
1042         switch (attr->group) {
1043         case KVM_S390_VM_MEM_CTRL:
1044                 ret = kvm_s390_set_mem_control(kvm, attr);
1045                 break;
1046         case KVM_S390_VM_TOD:
1047                 ret = kvm_s390_set_tod(kvm, attr);
1048                 break;
1049         case KVM_S390_VM_CPU_MODEL:
1050                 ret = kvm_s390_set_cpu_model(kvm, attr);
1051                 break;
1052         case KVM_S390_VM_CRYPTO:
1053                 ret = kvm_s390_vm_set_crypto(kvm, attr);
1054                 break;
1055         default:
1056                 ret = -ENXIO;
1057                 break;
1058         }
1059
1060         return ret;
1061 }
1062
1063 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1064 {
1065         int ret;
1066
1067         switch (attr->group) {
1068         case KVM_S390_VM_MEM_CTRL:
1069                 ret = kvm_s390_get_mem_control(kvm, attr);
1070                 break;
1071         case KVM_S390_VM_TOD:
1072                 ret = kvm_s390_get_tod(kvm, attr);
1073                 break;
1074         case KVM_S390_VM_CPU_MODEL:
1075                 ret = kvm_s390_get_cpu_model(kvm, attr);
1076                 break;
1077         default:
1078                 ret = -ENXIO;
1079                 break;
1080         }
1081
1082         return ret;
1083 }
1084
1085 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1086 {
1087         int ret;
1088
1089         switch (attr->group) {
1090         case KVM_S390_VM_MEM_CTRL:
1091                 switch (attr->attr) {
1092                 case KVM_S390_VM_MEM_ENABLE_CMMA:
1093                 case KVM_S390_VM_MEM_CLR_CMMA:
1094                         ret = sclp.has_cmma ? 0 : -ENXIO;
1095                         break;
1096                 case KVM_S390_VM_MEM_LIMIT_SIZE:
1097                         ret = 0;
1098                         break;
1099                 default:
1100                         ret = -ENXIO;
1101                         break;
1102                 }
1103                 break;
1104         case KVM_S390_VM_TOD:
1105                 switch (attr->attr) {
1106                 case KVM_S390_VM_TOD_LOW:
1107                 case KVM_S390_VM_TOD_HIGH:
1108                         ret = 0;
1109                         break;
1110                 default:
1111                         ret = -ENXIO;
1112                         break;
1113                 }
1114                 break;
1115         case KVM_S390_VM_CPU_MODEL:
1116                 switch (attr->attr) {
1117                 case KVM_S390_VM_CPU_PROCESSOR:
1118                 case KVM_S390_VM_CPU_MACHINE:
1119                 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1120                 case KVM_S390_VM_CPU_MACHINE_FEAT:
1121                 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1122                         ret = 0;
1123                         break;
1124                 /* configuring subfunctions is not supported yet */
1125                 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1126                 default:
1127                         ret = -ENXIO;
1128                         break;
1129                 }
1130                 break;
1131         case KVM_S390_VM_CRYPTO:
1132                 switch (attr->attr) {
1133                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1134                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1135                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1136                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1137                         ret = 0;
1138                         break;
1139                 default:
1140                         ret = -ENXIO;
1141                         break;
1142                 }
1143                 break;
1144         default:
1145                 ret = -ENXIO;
1146                 break;
1147         }
1148
1149         return ret;
1150 }
1151
1152 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1153 {
1154         uint8_t *keys;
1155         uint64_t hva;
1156         int i, r = 0;
1157
1158         if (args->flags != 0)
1159                 return -EINVAL;
1160
1161         /* Is this guest using storage keys? */
1162         if (!mm_use_skey(current->mm))
1163                 return KVM_S390_GET_SKEYS_NONE;
1164
1165         /* Enforce sane limit on memory allocation */
1166         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1167                 return -EINVAL;
1168
1169         keys = kmalloc_array(args->count, sizeof(uint8_t),
1170                              GFP_KERNEL | __GFP_NOWARN);
1171         if (!keys)
1172                 keys = vmalloc(sizeof(uint8_t) * args->count);
1173         if (!keys)
1174                 return -ENOMEM;
1175
1176         down_read(&current->mm->mmap_sem);
1177         for (i = 0; i < args->count; i++) {
1178                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1179                 if (kvm_is_error_hva(hva)) {
1180                         r = -EFAULT;
1181                         break;
1182                 }
1183
1184                 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1185                 if (r)
1186                         break;
1187         }
1188         up_read(&current->mm->mmap_sem);
1189
1190         if (!r) {
1191                 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1192                                  sizeof(uint8_t) * args->count);
1193                 if (r)
1194                         r = -EFAULT;
1195         }
1196
1197         kvfree(keys);
1198         return r;
1199 }
1200
1201 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1202 {
1203         uint8_t *keys;
1204         uint64_t hva;
1205         int i, r = 0;
1206
1207         if (args->flags != 0)
1208                 return -EINVAL;
1209
1210         /* Enforce sane limit on memory allocation */
1211         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1212                 return -EINVAL;
1213
1214         keys = kmalloc_array(args->count, sizeof(uint8_t),
1215                              GFP_KERNEL | __GFP_NOWARN);
1216         if (!keys)
1217                 keys = vmalloc(sizeof(uint8_t) * args->count);
1218         if (!keys)
1219                 return -ENOMEM;
1220
1221         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1222                            sizeof(uint8_t) * args->count);
1223         if (r) {
1224                 r = -EFAULT;
1225                 goto out;
1226         }
1227
1228         /* Enable storage key handling for the guest */
1229         r = s390_enable_skey();
1230         if (r)
1231                 goto out;
1232
1233         down_read(&current->mm->mmap_sem);
1234         for (i = 0; i < args->count; i++) {
1235                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1236                 if (kvm_is_error_hva(hva)) {
1237                         r = -EFAULT;
1238                         break;
1239                 }
1240
1241                 /* Lowest order bit is reserved */
1242                 if (keys[i] & 0x01) {
1243                         r = -EINVAL;
1244                         break;
1245                 }
1246
1247                 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1248                 if (r)
1249                         break;
1250         }
1251         up_read(&current->mm->mmap_sem);
1252 out:
1253         kvfree(keys);
1254         return r;
1255 }
1256
1257 long kvm_arch_vm_ioctl(struct file *filp,
1258                        unsigned int ioctl, unsigned long arg)
1259 {
1260         struct kvm *kvm = filp->private_data;
1261         void __user *argp = (void __user *)arg;
1262         struct kvm_device_attr attr;
1263         int r;
1264
1265         switch (ioctl) {
1266         case KVM_S390_INTERRUPT: {
1267                 struct kvm_s390_interrupt s390int;
1268
1269                 r = -EFAULT;
1270                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1271                         break;
1272                 r = kvm_s390_inject_vm(kvm, &s390int);
1273                 break;
1274         }
1275         case KVM_ENABLE_CAP: {
1276                 struct kvm_enable_cap cap;
1277                 r = -EFAULT;
1278                 if (copy_from_user(&cap, argp, sizeof(cap)))
1279                         break;
1280                 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1281                 break;
1282         }
1283         case KVM_CREATE_IRQCHIP: {
1284                 struct kvm_irq_routing_entry routing;
1285
1286                 r = -EINVAL;
1287                 if (kvm->arch.use_irqchip) {
1288                         /* Set up dummy routing. */
1289                         memset(&routing, 0, sizeof(routing));
1290                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1291                 }
1292                 break;
1293         }
1294         case KVM_SET_DEVICE_ATTR: {
1295                 r = -EFAULT;
1296                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1297                         break;
1298                 r = kvm_s390_vm_set_attr(kvm, &attr);
1299                 break;
1300         }
1301         case KVM_GET_DEVICE_ATTR: {
1302                 r = -EFAULT;
1303                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1304                         break;
1305                 r = kvm_s390_vm_get_attr(kvm, &attr);
1306                 break;
1307         }
1308         case KVM_HAS_DEVICE_ATTR: {
1309                 r = -EFAULT;
1310                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1311                         break;
1312                 r = kvm_s390_vm_has_attr(kvm, &attr);
1313                 break;
1314         }
1315         case KVM_S390_GET_SKEYS: {
1316                 struct kvm_s390_skeys args;
1317
1318                 r = -EFAULT;
1319                 if (copy_from_user(&args, argp,
1320                                    sizeof(struct kvm_s390_skeys)))
1321                         break;
1322                 r = kvm_s390_get_skeys(kvm, &args);
1323                 break;
1324         }
1325         case KVM_S390_SET_SKEYS: {
1326                 struct kvm_s390_skeys args;
1327
1328                 r = -EFAULT;
1329                 if (copy_from_user(&args, argp,
1330                                    sizeof(struct kvm_s390_skeys)))
1331                         break;
1332                 r = kvm_s390_set_skeys(kvm, &args);
1333                 break;
1334         }
1335         default:
1336                 r = -ENOTTY;
1337         }
1338
1339         return r;
1340 }
1341
1342 static int kvm_s390_query_ap_config(u8 *config)
1343 {
1344         u32 fcn_code = 0x04000000UL;
1345         u32 cc = 0;
1346
1347         memset(config, 0, 128);
1348         asm volatile(
1349                 "lgr 0,%1\n"
1350                 "lgr 2,%2\n"
1351                 ".long 0xb2af0000\n"            /* PQAP(QCI) */
1352                 "0: ipm %0\n"
1353                 "srl %0,28\n"
1354                 "1:\n"
1355                 EX_TABLE(0b, 1b)
1356                 : "+r" (cc)
1357                 : "r" (fcn_code), "r" (config)
1358                 : "cc", "0", "2", "memory"
1359         );
1360
1361         return cc;
1362 }
1363
1364 static int kvm_s390_apxa_installed(void)
1365 {
1366         u8 config[128];
1367         int cc;
1368
1369         if (test_facility(12)) {
1370                 cc = kvm_s390_query_ap_config(config);
1371
1372                 if (cc)
1373                         pr_err("PQAP(QCI) failed with cc=%d", cc);
1374                 else
1375                         return config[0] & 0x40;
1376         }
1377
1378         return 0;
1379 }
1380
1381 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1382 {
1383         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1384
1385         if (kvm_s390_apxa_installed())
1386                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1387         else
1388                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1389 }
1390
1391 static u64 kvm_s390_get_initial_cpuid(void)
1392 {
1393         struct cpuid cpuid;
1394
1395         get_cpu_id(&cpuid);
1396         cpuid.version = 0xff;
1397         return *((u64 *) &cpuid);
1398 }
1399
1400 static void kvm_s390_crypto_init(struct kvm *kvm)
1401 {
1402         if (!test_kvm_facility(kvm, 76))
1403                 return;
1404
1405         kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1406         kvm_s390_set_crycb_format(kvm);
1407
1408         /* Enable AES/DEA protected key functions by default */
1409         kvm->arch.crypto.aes_kw = 1;
1410         kvm->arch.crypto.dea_kw = 1;
1411         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1412                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1413         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1414                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1415 }
1416
1417 static void sca_dispose(struct kvm *kvm)
1418 {
1419         if (kvm->arch.use_esca)
1420                 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1421         else
1422                 free_page((unsigned long)(kvm->arch.sca));
1423         kvm->arch.sca = NULL;
1424 }
1425
1426 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1427 {
1428         gfp_t alloc_flags = GFP_KERNEL;
1429         int i, rc;
1430         char debug_name[16];
1431         static unsigned long sca_offset;
1432
1433         rc = -EINVAL;
1434 #ifdef CONFIG_KVM_S390_UCONTROL
1435         if (type & ~KVM_VM_S390_UCONTROL)
1436                 goto out_err;
1437         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1438                 goto out_err;
1439 #else
1440         if (type)
1441                 goto out_err;
1442 #endif
1443
1444         rc = s390_enable_sie();
1445         if (rc)
1446                 goto out_err;
1447
1448         rc = -ENOMEM;
1449
1450         ratelimit_state_init(&kvm->arch.sthyi_limit, 5 * HZ, 500);
1451
1452         kvm->arch.use_esca = 0; /* start with basic SCA */
1453         if (!sclp.has_64bscao)
1454                 alloc_flags |= GFP_DMA;
1455         rwlock_init(&kvm->arch.sca_lock);
1456         kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1457         if (!kvm->arch.sca)
1458                 goto out_err;
1459         spin_lock(&kvm_lock);
1460         sca_offset += 16;
1461         if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1462                 sca_offset = 0;
1463         kvm->arch.sca = (struct bsca_block *)
1464                         ((char *) kvm->arch.sca + sca_offset);
1465         spin_unlock(&kvm_lock);
1466
1467         sprintf(debug_name, "kvm-%u", current->pid);
1468
1469         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1470         if (!kvm->arch.dbf)
1471                 goto out_err;
1472
1473         kvm->arch.sie_page2 =
1474              (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1475         if (!kvm->arch.sie_page2)
1476                 goto out_err;
1477
1478         /* Populate the facility mask initially. */
1479         memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1480                sizeof(S390_lowcore.stfle_fac_list));
1481         for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1482                 if (i < kvm_s390_fac_list_mask_size())
1483                         kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1484                 else
1485                         kvm->arch.model.fac_mask[i] = 0UL;
1486         }
1487
1488         /* Populate the facility list initially. */
1489         kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1490         memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1491                S390_ARCH_FAC_LIST_SIZE_BYTE);
1492
1493         set_kvm_facility(kvm->arch.model.fac_mask, 74);
1494         set_kvm_facility(kvm->arch.model.fac_list, 74);
1495
1496         kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
1497         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1498
1499         kvm_s390_crypto_init(kvm);
1500
1501         spin_lock_init(&kvm->arch.float_int.lock);
1502         for (i = 0; i < FIRQ_LIST_COUNT; i++)
1503                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1504         init_waitqueue_head(&kvm->arch.ipte_wq);
1505         mutex_init(&kvm->arch.ipte_mutex);
1506
1507         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1508         VM_EVENT(kvm, 3, "vm created with type %lu", type);
1509
1510         if (type & KVM_VM_S390_UCONTROL) {
1511                 kvm->arch.gmap = NULL;
1512                 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1513         } else {
1514                 if (sclp.hamax == U64_MAX)
1515                         kvm->arch.mem_limit = TASK_SIZE_MAX;
1516                 else
1517                         kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
1518                                                     sclp.hamax + 1);
1519                 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
1520                 if (!kvm->arch.gmap)
1521                         goto out_err;
1522                 kvm->arch.gmap->private = kvm;
1523                 kvm->arch.gmap->pfault_enabled = 0;
1524         }
1525
1526         kvm->arch.css_support = 0;
1527         kvm->arch.use_irqchip = 0;
1528         kvm->arch.epoch = 0;
1529
1530         spin_lock_init(&kvm->arch.start_stop_lock);
1531         kvm_s390_vsie_init(kvm);
1532         KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1533
1534         return 0;
1535 out_err:
1536         free_page((unsigned long)kvm->arch.sie_page2);
1537         debug_unregister(kvm->arch.dbf);
1538         sca_dispose(kvm);
1539         KVM_EVENT(3, "creation of vm failed: %d", rc);
1540         return rc;
1541 }
1542
1543 bool kvm_arch_has_vcpu_debugfs(void)
1544 {
1545         return false;
1546 }
1547
1548 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
1549 {
1550         return 0;
1551 }
1552
1553 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1554 {
1555         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1556         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1557         kvm_s390_clear_local_irqs(vcpu);
1558         kvm_clear_async_pf_completion_queue(vcpu);
1559         if (!kvm_is_ucontrol(vcpu->kvm))
1560                 sca_del_vcpu(vcpu);
1561
1562         if (kvm_is_ucontrol(vcpu->kvm))
1563                 gmap_remove(vcpu->arch.gmap);
1564
1565         if (vcpu->kvm->arch.use_cmma)
1566                 kvm_s390_vcpu_unsetup_cmma(vcpu);
1567         free_page((unsigned long)(vcpu->arch.sie_block));
1568
1569         kvm_vcpu_uninit(vcpu);
1570         kmem_cache_free(kvm_vcpu_cache, vcpu);
1571 }
1572
1573 static void kvm_free_vcpus(struct kvm *kvm)
1574 {
1575         unsigned int i;
1576         struct kvm_vcpu *vcpu;
1577
1578         kvm_for_each_vcpu(i, vcpu, kvm)
1579                 kvm_arch_vcpu_destroy(vcpu);
1580
1581         mutex_lock(&kvm->lock);
1582         for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1583                 kvm->vcpus[i] = NULL;
1584
1585         atomic_set(&kvm->online_vcpus, 0);
1586         mutex_unlock(&kvm->lock);
1587 }
1588
1589 void kvm_arch_destroy_vm(struct kvm *kvm)
1590 {
1591         kvm_free_vcpus(kvm);
1592         sca_dispose(kvm);
1593         debug_unregister(kvm->arch.dbf);
1594         free_page((unsigned long)kvm->arch.sie_page2);
1595         if (!kvm_is_ucontrol(kvm))
1596                 gmap_remove(kvm->arch.gmap);
1597         kvm_s390_destroy_adapters(kvm);
1598         kvm_s390_clear_float_irqs(kvm);
1599         kvm_s390_vsie_destroy(kvm);
1600         KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
1601 }
1602
1603 /* Section: vcpu related */
1604 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1605 {
1606         vcpu->arch.gmap = gmap_create(current->mm, -1UL);
1607         if (!vcpu->arch.gmap)
1608                 return -ENOMEM;
1609         vcpu->arch.gmap->private = vcpu->kvm;
1610
1611         return 0;
1612 }
1613
1614 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
1615 {
1616         if (!kvm_s390_use_sca_entries())
1617                 return;
1618         read_lock(&vcpu->kvm->arch.sca_lock);
1619         if (vcpu->kvm->arch.use_esca) {
1620                 struct esca_block *sca = vcpu->kvm->arch.sca;
1621
1622                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1623                 sca->cpu[vcpu->vcpu_id].sda = 0;
1624         } else {
1625                 struct bsca_block *sca = vcpu->kvm->arch.sca;
1626
1627                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1628                 sca->cpu[vcpu->vcpu_id].sda = 0;
1629         }
1630         read_unlock(&vcpu->kvm->arch.sca_lock);
1631 }
1632
1633 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
1634 {
1635         if (!kvm_s390_use_sca_entries()) {
1636                 struct bsca_block *sca = vcpu->kvm->arch.sca;
1637
1638                 /* we still need the basic sca for the ipte control */
1639                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1640                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
1641         }
1642         read_lock(&vcpu->kvm->arch.sca_lock);
1643         if (vcpu->kvm->arch.use_esca) {
1644                 struct esca_block *sca = vcpu->kvm->arch.sca;
1645
1646                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1647                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1648                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
1649                 vcpu->arch.sie_block->ecb2 |= 0x04U;
1650                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1651         } else {
1652                 struct bsca_block *sca = vcpu->kvm->arch.sca;
1653
1654                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1655                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1656                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
1657                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1658         }
1659         read_unlock(&vcpu->kvm->arch.sca_lock);
1660 }
1661
1662 /* Basic SCA to Extended SCA data copy routines */
1663 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
1664 {
1665         d->sda = s->sda;
1666         d->sigp_ctrl.c = s->sigp_ctrl.c;
1667         d->sigp_ctrl.scn = s->sigp_ctrl.scn;
1668 }
1669
1670 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
1671 {
1672         int i;
1673
1674         d->ipte_control = s->ipte_control;
1675         d->mcn[0] = s->mcn;
1676         for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
1677                 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
1678 }
1679
1680 static int sca_switch_to_extended(struct kvm *kvm)
1681 {
1682         struct bsca_block *old_sca = kvm->arch.sca;
1683         struct esca_block *new_sca;
1684         struct kvm_vcpu *vcpu;
1685         unsigned int vcpu_idx;
1686         u32 scaol, scaoh;
1687
1688         new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
1689         if (!new_sca)
1690                 return -ENOMEM;
1691
1692         scaoh = (u32)((u64)(new_sca) >> 32);
1693         scaol = (u32)(u64)(new_sca) & ~0x3fU;
1694
1695         kvm_s390_vcpu_block_all(kvm);
1696         write_lock(&kvm->arch.sca_lock);
1697
1698         sca_copy_b_to_e(new_sca, old_sca);
1699
1700         kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
1701                 vcpu->arch.sie_block->scaoh = scaoh;
1702                 vcpu->arch.sie_block->scaol = scaol;
1703                 vcpu->arch.sie_block->ecb2 |= 0x04U;
1704         }
1705         kvm->arch.sca = new_sca;
1706         kvm->arch.use_esca = 1;
1707
1708         write_unlock(&kvm->arch.sca_lock);
1709         kvm_s390_vcpu_unblock_all(kvm);
1710
1711         free_page((unsigned long)old_sca);
1712
1713         VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
1714                  old_sca, kvm->arch.sca);
1715         return 0;
1716 }
1717
1718 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
1719 {
1720         int rc;
1721
1722         if (!kvm_s390_use_sca_entries()) {
1723                 if (id < KVM_MAX_VCPUS)
1724                         return true;
1725                 return false;
1726         }
1727         if (id < KVM_S390_BSCA_CPU_SLOTS)
1728                 return true;
1729         if (!sclp.has_esca || !sclp.has_64bscao)
1730                 return false;
1731
1732         mutex_lock(&kvm->lock);
1733         rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
1734         mutex_unlock(&kvm->lock);
1735
1736         return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
1737 }
1738
1739 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1740 {
1741         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1742         kvm_clear_async_pf_completion_queue(vcpu);
1743         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
1744                                     KVM_SYNC_GPRS |
1745                                     KVM_SYNC_ACRS |
1746                                     KVM_SYNC_CRS |
1747                                     KVM_SYNC_ARCH0 |
1748                                     KVM_SYNC_PFAULT;
1749         kvm_s390_set_prefix(vcpu, 0);
1750         if (test_kvm_facility(vcpu->kvm, 64))
1751                 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
1752         /* fprs can be synchronized via vrs, even if the guest has no vx. With
1753          * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
1754          */
1755         if (MACHINE_HAS_VX)
1756                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
1757         else
1758                 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
1759
1760         if (kvm_is_ucontrol(vcpu->kvm))
1761                 return __kvm_ucontrol_vcpu_init(vcpu);
1762
1763         return 0;
1764 }
1765
1766 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1767 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1768 {
1769         WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
1770         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1771         vcpu->arch.cputm_start = get_tod_clock_fast();
1772         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1773 }
1774
1775 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1776 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1777 {
1778         WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
1779         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1780         vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1781         vcpu->arch.cputm_start = 0;
1782         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1783 }
1784
1785 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1786 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1787 {
1788         WARN_ON_ONCE(vcpu->arch.cputm_enabled);
1789         vcpu->arch.cputm_enabled = true;
1790         __start_cpu_timer_accounting(vcpu);
1791 }
1792
1793 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1794 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1795 {
1796         WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
1797         __stop_cpu_timer_accounting(vcpu);
1798         vcpu->arch.cputm_enabled = false;
1799 }
1800
1801 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1802 {
1803         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1804         __enable_cpu_timer_accounting(vcpu);
1805         preempt_enable();
1806 }
1807
1808 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1809 {
1810         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1811         __disable_cpu_timer_accounting(vcpu);
1812         preempt_enable();
1813 }
1814
1815 /* set the cpu timer - may only be called from the VCPU thread itself */
1816 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
1817 {
1818         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1819         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1820         if (vcpu->arch.cputm_enabled)
1821                 vcpu->arch.cputm_start = get_tod_clock_fast();
1822         vcpu->arch.sie_block->cputm = cputm;
1823         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1824         preempt_enable();
1825 }
1826
1827 /* update and get the cpu timer - can also be called from other VCPU threads */
1828 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
1829 {
1830         unsigned int seq;
1831         __u64 value;
1832
1833         if (unlikely(!vcpu->arch.cputm_enabled))
1834                 return vcpu->arch.sie_block->cputm;
1835
1836         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1837         do {
1838                 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
1839                 /*
1840                  * If the writer would ever execute a read in the critical
1841                  * section, e.g. in irq context, we have a deadlock.
1842                  */
1843                 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
1844                 value = vcpu->arch.sie_block->cputm;
1845                 /* if cputm_start is 0, accounting is being started/stopped */
1846                 if (likely(vcpu->arch.cputm_start))
1847                         value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1848         } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
1849         preempt_enable();
1850         return value;
1851 }
1852
1853 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1854 {
1855
1856         gmap_enable(vcpu->arch.enabled_gmap);
1857         atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1858         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1859                 __start_cpu_timer_accounting(vcpu);
1860         vcpu->cpu = cpu;
1861 }
1862
1863 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1864 {
1865         vcpu->cpu = -1;
1866         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1867                 __stop_cpu_timer_accounting(vcpu);
1868         atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1869         vcpu->arch.enabled_gmap = gmap_get_enabled();
1870         gmap_disable(vcpu->arch.enabled_gmap);
1871
1872 }
1873
1874 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
1875 {
1876         /* this equals initial cpu reset in pop, but we don't switch to ESA */
1877         vcpu->arch.sie_block->gpsw.mask = 0UL;
1878         vcpu->arch.sie_block->gpsw.addr = 0UL;
1879         kvm_s390_set_prefix(vcpu, 0);
1880         kvm_s390_set_cpu_timer(vcpu, 0);
1881         vcpu->arch.sie_block->ckc       = 0UL;
1882         vcpu->arch.sie_block->todpr     = 0;
1883         memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
1884         vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
1885         vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
1886         /* make sure the new fpc will be lazily loaded */
1887         save_fpu_regs();
1888         current->thread.fpu.fpc = 0;
1889         vcpu->arch.sie_block->gbea = 1;
1890         vcpu->arch.sie_block->pp = 0;
1891         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1892         kvm_clear_async_pf_completion_queue(vcpu);
1893         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
1894                 kvm_s390_vcpu_stop(vcpu);
1895         kvm_s390_clear_local_irqs(vcpu);
1896 }
1897
1898 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
1899 {
1900         mutex_lock(&vcpu->kvm->lock);
1901         preempt_disable();
1902         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
1903         preempt_enable();
1904         mutex_unlock(&vcpu->kvm->lock);
1905         if (!kvm_is_ucontrol(vcpu->kvm)) {
1906                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
1907                 sca_add_vcpu(vcpu);
1908         }
1909         if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
1910                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
1911         /* make vcpu_load load the right gmap on the first trigger */
1912         vcpu->arch.enabled_gmap = vcpu->arch.gmap;
1913 }
1914
1915 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
1916 {
1917         if (!test_kvm_facility(vcpu->kvm, 76))
1918                 return;
1919
1920         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
1921
1922         if (vcpu->kvm->arch.crypto.aes_kw)
1923                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
1924         if (vcpu->kvm->arch.crypto.dea_kw)
1925                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
1926
1927         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
1928 }
1929
1930 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
1931 {
1932         free_page(vcpu->arch.sie_block->cbrlo);
1933         vcpu->arch.sie_block->cbrlo = 0;
1934 }
1935
1936 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
1937 {
1938         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
1939         if (!vcpu->arch.sie_block->cbrlo)
1940                 return -ENOMEM;
1941
1942         vcpu->arch.sie_block->ecb2 |= 0x80;
1943         vcpu->arch.sie_block->ecb2 &= ~0x08;
1944         return 0;
1945 }
1946
1947 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
1948 {
1949         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
1950
1951         vcpu->arch.sie_block->ibc = model->ibc;
1952         if (test_kvm_facility(vcpu->kvm, 7))
1953                 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
1954 }
1955
1956 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1957 {
1958         int rc = 0;
1959
1960         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
1961                                                     CPUSTAT_SM |
1962                                                     CPUSTAT_STOPPED);
1963
1964         if (test_kvm_facility(vcpu->kvm, 78))
1965                 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
1966         else if (test_kvm_facility(vcpu->kvm, 8))
1967                 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
1968
1969         kvm_s390_vcpu_setup_model(vcpu);
1970
1971         /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
1972         if (MACHINE_HAS_ESOP)
1973                 vcpu->arch.sie_block->ecb |= 0x02;
1974         if (test_kvm_facility(vcpu->kvm, 9))
1975                 vcpu->arch.sie_block->ecb |= 0x04;
1976         if (test_kvm_facility(vcpu->kvm, 73))
1977                 vcpu->arch.sie_block->ecb |= 0x10;
1978
1979         if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
1980                 vcpu->arch.sie_block->ecb2 |= 0x08;
1981         if (test_kvm_facility(vcpu->kvm, 130))
1982                 vcpu->arch.sie_block->ecb2 |= 0x20;
1983         vcpu->arch.sie_block->eca = 0x1002000U;
1984         if (sclp.has_cei)
1985                 vcpu->arch.sie_block->eca |= 0x80000000U;
1986         if (sclp.has_ib)
1987                 vcpu->arch.sie_block->eca |= 0x40000000U;
1988         if (sclp.has_siif)
1989                 vcpu->arch.sie_block->eca |= 1;
1990         if (sclp.has_sigpif)
1991                 vcpu->arch.sie_block->eca |= 0x10000000U;
1992         if (test_kvm_facility(vcpu->kvm, 129)) {
1993                 vcpu->arch.sie_block->eca |= 0x00020000;
1994                 vcpu->arch.sie_block->ecd |= 0x20000000;
1995         }
1996         vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
1997         vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
1998
1999         if (vcpu->kvm->arch.use_cmma) {
2000                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
2001                 if (rc)
2002                         return rc;
2003         }
2004         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2005         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
2006
2007         kvm_s390_vcpu_crypto_setup(vcpu);
2008
2009         return rc;
2010 }
2011
2012 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
2013                                       unsigned int id)
2014 {
2015         struct kvm_vcpu *vcpu;
2016         struct sie_page *sie_page;
2017         int rc = -EINVAL;
2018
2019         if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
2020                 goto out;
2021
2022         rc = -ENOMEM;
2023
2024         vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2025         if (!vcpu)
2026                 goto out;
2027
2028         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2029         if (!sie_page)
2030                 goto out_free_cpu;
2031
2032         vcpu->arch.sie_block = &sie_page->sie_block;
2033         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2034
2035         /* the real guest size will always be smaller than msl */
2036         vcpu->arch.sie_block->mso = 0;
2037         vcpu->arch.sie_block->msl = sclp.hamax;
2038
2039         vcpu->arch.sie_block->icpua = id;
2040         spin_lock_init(&vcpu->arch.local_int.lock);
2041         vcpu->arch.local_int.float_int = &kvm->arch.float_int;
2042         vcpu->arch.local_int.wq = &vcpu->wq;
2043         vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
2044         seqcount_init(&vcpu->arch.cputm_seqcount);
2045
2046         rc = kvm_vcpu_init(vcpu, kvm, id);
2047         if (rc)
2048                 goto out_free_sie_block;
2049         VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2050                  vcpu->arch.sie_block);
2051         trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2052
2053         return vcpu;
2054 out_free_sie_block:
2055         free_page((unsigned long)(vcpu->arch.sie_block));
2056 out_free_cpu:
2057         kmem_cache_free(kvm_vcpu_cache, vcpu);
2058 out:
2059         return ERR_PTR(rc);
2060 }
2061
2062 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2063 {
2064         return kvm_s390_vcpu_has_irq(vcpu, 0);
2065 }
2066
2067 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2068 {
2069         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2070         exit_sie(vcpu);
2071 }
2072
2073 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2074 {
2075         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2076 }
2077
2078 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2079 {
2080         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2081         exit_sie(vcpu);
2082 }
2083
2084 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2085 {
2086         atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2087 }
2088
2089 /*
2090  * Kick a guest cpu out of SIE and wait until SIE is not running.
2091  * If the CPU is not running (e.g. waiting as idle) the function will
2092  * return immediately. */
2093 void exit_sie(struct kvm_vcpu *vcpu)
2094 {
2095         atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
2096         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2097                 cpu_relax();
2098 }
2099
2100 /* Kick a guest cpu out of SIE to process a request synchronously */
2101 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2102 {
2103         kvm_make_request(req, vcpu);
2104         kvm_s390_vcpu_request(vcpu);
2105 }
2106
2107 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2108                               unsigned long end)
2109 {
2110         struct kvm *kvm = gmap->private;
2111         struct kvm_vcpu *vcpu;
2112         unsigned long prefix;
2113         int i;
2114
2115         if (gmap_is_shadow(gmap))
2116                 return;
2117         if (start >= 1UL << 31)
2118                 /* We are only interested in prefix pages */
2119                 return;
2120         kvm_for_each_vcpu(i, vcpu, kvm) {
2121                 /* match against both prefix pages */
2122                 prefix = kvm_s390_get_prefix(vcpu);
2123                 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2124                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2125                                    start, end);
2126                         kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2127                 }
2128         }
2129 }
2130
2131 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2132 {
2133         /* kvm common code refers to this, but never calls it */
2134         BUG();
2135         return 0;
2136 }
2137
2138 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2139                                            struct kvm_one_reg *reg)
2140 {
2141         int r = -EINVAL;
2142
2143         switch (reg->id) {
2144         case KVM_REG_S390_TODPR:
2145                 r = put_user(vcpu->arch.sie_block->todpr,
2146                              (u32 __user *)reg->addr);
2147                 break;
2148         case KVM_REG_S390_EPOCHDIFF:
2149                 r = put_user(vcpu->arch.sie_block->epoch,
2150                              (u64 __user *)reg->addr);
2151                 break;
2152         case KVM_REG_S390_CPU_TIMER:
2153                 r = put_user(kvm_s390_get_cpu_timer(vcpu),
2154                              (u64 __user *)reg->addr);
2155                 break;
2156         case KVM_REG_S390_CLOCK_COMP:
2157                 r = put_user(vcpu->arch.sie_block->ckc,
2158                              (u64 __user *)reg->addr);
2159                 break;
2160         case KVM_REG_S390_PFTOKEN:
2161                 r = put_user(vcpu->arch.pfault_token,
2162                              (u64 __user *)reg->addr);
2163                 break;
2164         case KVM_REG_S390_PFCOMPARE:
2165                 r = put_user(vcpu->arch.pfault_compare,
2166                              (u64 __user *)reg->addr);
2167                 break;
2168         case KVM_REG_S390_PFSELECT:
2169                 r = put_user(vcpu->arch.pfault_select,
2170                              (u64 __user *)reg->addr);
2171                 break;
2172         case KVM_REG_S390_PP:
2173                 r = put_user(vcpu->arch.sie_block->pp,
2174                              (u64 __user *)reg->addr);
2175                 break;
2176         case KVM_REG_S390_GBEA:
2177                 r = put_user(vcpu->arch.sie_block->gbea,
2178                              (u64 __user *)reg->addr);
2179                 break;
2180         default:
2181                 break;
2182         }
2183
2184         return r;
2185 }
2186
2187 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2188                                            struct kvm_one_reg *reg)
2189 {
2190         int r = -EINVAL;
2191         __u64 val;
2192
2193         switch (reg->id) {
2194         case KVM_REG_S390_TODPR:
2195                 r = get_user(vcpu->arch.sie_block->todpr,
2196                              (u32 __user *)reg->addr);
2197                 break;
2198         case KVM_REG_S390_EPOCHDIFF:
2199                 r = get_user(vcpu->arch.sie_block->epoch,
2200                              (u64 __user *)reg->addr);
2201                 break;
2202         case KVM_REG_S390_CPU_TIMER:
2203                 r = get_user(val, (u64 __user *)reg->addr);
2204                 if (!r)
2205                         kvm_s390_set_cpu_timer(vcpu, val);
2206                 break;
2207         case KVM_REG_S390_CLOCK_COMP:
2208                 r = get_user(vcpu->arch.sie_block->ckc,
2209                              (u64 __user *)reg->addr);
2210                 break;
2211         case KVM_REG_S390_PFTOKEN:
2212                 r = get_user(vcpu->arch.pfault_token,
2213                              (u64 __user *)reg->addr);
2214                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2215                         kvm_clear_async_pf_completion_queue(vcpu);
2216                 break;
2217         case KVM_REG_S390_PFCOMPARE:
2218                 r = get_user(vcpu->arch.pfault_compare,
2219                              (u64 __user *)reg->addr);
2220                 break;
2221         case KVM_REG_S390_PFSELECT:
2222                 r = get_user(vcpu->arch.pfault_select,
2223                              (u64 __user *)reg->addr);
2224                 break;
2225         case KVM_REG_S390_PP:
2226                 r = get_user(vcpu->arch.sie_block->pp,
2227                              (u64 __user *)reg->addr);
2228                 break;
2229         case KVM_REG_S390_GBEA:
2230                 r = get_user(vcpu->arch.sie_block->gbea,
2231                              (u64 __user *)reg->addr);
2232                 break;
2233         default:
2234                 break;
2235         }
2236
2237         return r;
2238 }
2239
2240 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2241 {
2242         kvm_s390_vcpu_initial_reset(vcpu);
2243         return 0;
2244 }
2245
2246 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2247 {
2248         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
2249         return 0;
2250 }
2251
2252 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2253 {
2254         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2255         return 0;
2256 }
2257
2258 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2259                                   struct kvm_sregs *sregs)
2260 {
2261         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2262         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2263         return 0;
2264 }
2265
2266 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2267                                   struct kvm_sregs *sregs)
2268 {
2269         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2270         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2271         return 0;
2272 }
2273
2274 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2275 {
2276         if (test_fp_ctl(fpu->fpc))
2277                 return -EINVAL;
2278         vcpu->run->s.regs.fpc = fpu->fpc;
2279         if (MACHINE_HAS_VX)
2280                 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
2281                                  (freg_t *) fpu->fprs);
2282         else
2283                 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
2284         return 0;
2285 }
2286
2287 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2288 {
2289         /* make sure we have the latest values */
2290         save_fpu_regs();
2291         if (MACHINE_HAS_VX)
2292                 convert_vx_to_fp((freg_t *) fpu->fprs,
2293                                  (__vector128 *) vcpu->run->s.regs.vrs);
2294         else
2295                 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
2296         fpu->fpc = vcpu->run->s.regs.fpc;
2297         return 0;
2298 }
2299
2300 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2301 {
2302         int rc = 0;
2303
2304         if (!is_vcpu_stopped(vcpu))
2305                 rc = -EBUSY;
2306         else {
2307                 vcpu->run->psw_mask = psw.mask;
2308                 vcpu->run->psw_addr = psw.addr;
2309         }
2310         return rc;
2311 }
2312
2313 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2314                                   struct kvm_translation *tr)
2315 {
2316         return -EINVAL; /* not implemented yet */
2317 }
2318
2319 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2320                               KVM_GUESTDBG_USE_HW_BP | \
2321                               KVM_GUESTDBG_ENABLE)
2322
2323 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2324                                         struct kvm_guest_debug *dbg)
2325 {
2326         int rc = 0;
2327
2328         vcpu->guest_debug = 0;
2329         kvm_s390_clear_bp_data(vcpu);
2330
2331         if (dbg->control & ~VALID_GUESTDBG_FLAGS)
2332                 return -EINVAL;
2333         if (!sclp.has_gpere)
2334                 return -EINVAL;
2335
2336         if (dbg->control & KVM_GUESTDBG_ENABLE) {
2337                 vcpu->guest_debug = dbg->control;
2338                 /* enforce guest PER */
2339                 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2340
2341                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2342                         rc = kvm_s390_import_bp_data(vcpu, dbg);
2343         } else {
2344                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2345                 vcpu->arch.guestdbg.last_bp = 0;
2346         }
2347
2348         if (rc) {
2349                 vcpu->guest_debug = 0;
2350                 kvm_s390_clear_bp_data(vcpu);
2351                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2352         }
2353
2354         return rc;
2355 }
2356
2357 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2358                                     struct kvm_mp_state *mp_state)
2359 {
2360         /* CHECK_STOP and LOAD are not supported yet */
2361         return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2362                                        KVM_MP_STATE_OPERATING;
2363 }
2364
2365 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2366                                     struct kvm_mp_state *mp_state)
2367 {
2368         int rc = 0;
2369
2370         /* user space knows about this interface - let it control the state */
2371         vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2372
2373         switch (mp_state->mp_state) {
2374         case KVM_MP_STATE_STOPPED:
2375                 kvm_s390_vcpu_stop(vcpu);
2376                 break;
2377         case KVM_MP_STATE_OPERATING:
2378                 kvm_s390_vcpu_start(vcpu);
2379                 break;
2380         case KVM_MP_STATE_LOAD:
2381         case KVM_MP_STATE_CHECK_STOP:
2382                 /* fall through - CHECK_STOP and LOAD are not supported yet */
2383         default:
2384                 rc = -ENXIO;
2385         }
2386
2387         return rc;
2388 }
2389
2390 static bool ibs_enabled(struct kvm_vcpu *vcpu)
2391 {
2392         return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
2393 }
2394
2395 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2396 {
2397 retry:
2398         kvm_s390_vcpu_request_handled(vcpu);
2399         if (!vcpu->requests)
2400                 return 0;
2401         /*
2402          * We use MMU_RELOAD just to re-arm the ipte notifier for the
2403          * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
2404          * This ensures that the ipte instruction for this request has
2405          * already finished. We might race against a second unmapper that
2406          * wants to set the blocking bit. Lets just retry the request loop.
2407          */
2408         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2409                 int rc;
2410                 rc = gmap_mprotect_notify(vcpu->arch.gmap,
2411                                           kvm_s390_get_prefix(vcpu),
2412                                           PAGE_SIZE * 2, PROT_WRITE);
2413                 if (rc) {
2414                         kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
2415                         return rc;
2416                 }
2417                 goto retry;
2418         }
2419
2420         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2421                 vcpu->arch.sie_block->ihcpu = 0xffff;
2422                 goto retry;
2423         }
2424
2425         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
2426                 if (!ibs_enabled(vcpu)) {
2427                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
2428                         atomic_or(CPUSTAT_IBS,
2429                                         &vcpu->arch.sie_block->cpuflags);
2430                 }
2431                 goto retry;
2432         }
2433
2434         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2435                 if (ibs_enabled(vcpu)) {
2436                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2437                         atomic_andnot(CPUSTAT_IBS,
2438                                           &vcpu->arch.sie_block->cpuflags);
2439                 }
2440                 goto retry;
2441         }
2442
2443         if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
2444                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2445                 goto retry;
2446         }
2447
2448         /* nothing to do, just clear the request */
2449         clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
2450
2451         return 0;
2452 }
2453
2454 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2455 {
2456         struct kvm_vcpu *vcpu;
2457         int i;
2458
2459         mutex_lock(&kvm->lock);
2460         preempt_disable();
2461         kvm->arch.epoch = tod - get_tod_clock();
2462         kvm_s390_vcpu_block_all(kvm);
2463         kvm_for_each_vcpu(i, vcpu, kvm)
2464                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2465         kvm_s390_vcpu_unblock_all(kvm);
2466         preempt_enable();
2467         mutex_unlock(&kvm->lock);
2468 }
2469
2470 /**
2471  * kvm_arch_fault_in_page - fault-in guest page if necessary
2472  * @vcpu: The corresponding virtual cpu
2473  * @gpa: Guest physical address
2474  * @writable: Whether the page should be writable or not
2475  *
2476  * Make sure that a guest page has been faulted-in on the host.
2477  *
2478  * Return: Zero on success, negative error code otherwise.
2479  */
2480 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
2481 {
2482         return gmap_fault(vcpu->arch.gmap, gpa,
2483                           writable ? FAULT_FLAG_WRITE : 0);
2484 }
2485
2486 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
2487                                       unsigned long token)
2488 {
2489         struct kvm_s390_interrupt inti;
2490         struct kvm_s390_irq irq;
2491
2492         if (start_token) {
2493                 irq.u.ext.ext_params2 = token;
2494                 irq.type = KVM_S390_INT_PFAULT_INIT;
2495                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
2496         } else {
2497                 inti.type = KVM_S390_INT_PFAULT_DONE;
2498                 inti.parm64 = token;
2499                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
2500         }
2501 }
2502
2503 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
2504                                      struct kvm_async_pf *work)
2505 {
2506         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
2507         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
2508 }
2509
2510 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
2511                                  struct kvm_async_pf *work)
2512 {
2513         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
2514         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
2515 }
2516
2517 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
2518                                struct kvm_async_pf *work)
2519 {
2520         /* s390 will always inject the page directly */
2521 }
2522
2523 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
2524 {
2525         /*
2526          * s390 will always inject the page directly,
2527          * but we still want check_async_completion to cleanup
2528          */
2529         return true;
2530 }
2531
2532 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
2533 {
2534         hva_t hva;
2535         struct kvm_arch_async_pf arch;
2536         int rc;
2537
2538         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2539                 return 0;
2540         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
2541             vcpu->arch.pfault_compare)
2542                 return 0;
2543         if (psw_extint_disabled(vcpu))
2544                 return 0;
2545         if (kvm_s390_vcpu_has_irq(vcpu, 0))
2546                 return 0;
2547         if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
2548                 return 0;
2549         if (!vcpu->arch.gmap->pfault_enabled)
2550                 return 0;
2551
2552         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
2553         hva += current->thread.gmap_addr & ~PAGE_MASK;
2554         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
2555                 return 0;
2556
2557         rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
2558         return rc;
2559 }
2560
2561 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
2562 {
2563         int rc, cpuflags;
2564
2565         /*
2566          * On s390 notifications for arriving pages will be delivered directly
2567          * to the guest but the house keeping for completed pfaults is
2568          * handled outside the worker.
2569          */
2570         kvm_check_async_pf_completion(vcpu);
2571
2572         vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
2573         vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
2574
2575         if (need_resched())
2576                 schedule();
2577
2578         if (test_cpu_flag(CIF_MCCK_PENDING))
2579                 s390_handle_mcck();
2580
2581         if (!kvm_is_ucontrol(vcpu->kvm)) {
2582                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
2583                 if (rc)
2584                         return rc;
2585         }
2586
2587         rc = kvm_s390_handle_requests(vcpu);
2588         if (rc)
2589                 return rc;
2590
2591         if (guestdbg_enabled(vcpu)) {
2592                 kvm_s390_backup_guest_per_regs(vcpu);
2593                 kvm_s390_patch_guest_per_regs(vcpu);
2594         }
2595
2596         vcpu->arch.sie_block->icptcode = 0;
2597         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
2598         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
2599         trace_kvm_s390_sie_enter(vcpu, cpuflags);
2600
2601         return 0;
2602 }
2603
2604 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
2605 {
2606         struct kvm_s390_pgm_info pgm_info = {
2607                 .code = PGM_ADDRESSING,
2608         };
2609         u8 opcode, ilen;
2610         int rc;
2611
2612         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
2613         trace_kvm_s390_sie_fault(vcpu);
2614
2615         /*
2616          * We want to inject an addressing exception, which is defined as a
2617          * suppressing or terminating exception. However, since we came here
2618          * by a DAT access exception, the PSW still points to the faulting
2619          * instruction since DAT exceptions are nullifying. So we've got
2620          * to look up the current opcode to get the length of the instruction
2621          * to be able to forward the PSW.
2622          */
2623         rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
2624         ilen = insn_length(opcode);
2625         if (rc < 0) {
2626                 return rc;
2627         } else if (rc) {
2628                 /* Instruction-Fetching Exceptions - we can't detect the ilen.
2629                  * Forward by arbitrary ilc, injection will take care of
2630                  * nullification if necessary.
2631                  */
2632                 pgm_info = vcpu->arch.pgm;
2633                 ilen = 4;
2634         }
2635         pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
2636         kvm_s390_forward_psw(vcpu, ilen);
2637         return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
2638 }
2639
2640 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
2641 {
2642         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
2643                    vcpu->arch.sie_block->icptcode);
2644         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
2645
2646         if (guestdbg_enabled(vcpu))
2647                 kvm_s390_restore_guest_per_regs(vcpu);
2648
2649         vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
2650         vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
2651
2652         if (vcpu->arch.sie_block->icptcode > 0) {
2653                 int rc = kvm_handle_sie_intercept(vcpu);
2654
2655                 if (rc != -EOPNOTSUPP)
2656                         return rc;
2657                 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
2658                 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
2659                 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
2660                 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
2661                 return -EREMOTE;
2662         } else if (exit_reason != -EFAULT) {
2663                 vcpu->stat.exit_null++;
2664                 return 0;
2665         } else if (kvm_is_ucontrol(vcpu->kvm)) {
2666                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
2667                 vcpu->run->s390_ucontrol.trans_exc_code =
2668                                                 current->thread.gmap_addr;
2669                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
2670                 return -EREMOTE;
2671         } else if (current->thread.gmap_pfault) {
2672                 trace_kvm_s390_major_guest_pfault(vcpu);
2673                 current->thread.gmap_pfault = 0;
2674                 if (kvm_arch_setup_async_pf(vcpu))
2675                         return 0;
2676                 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
2677         }
2678         return vcpu_post_run_fault_in_sie(vcpu);
2679 }
2680
2681 static int __vcpu_run(struct kvm_vcpu *vcpu)
2682 {
2683         int rc, exit_reason;
2684
2685         /*
2686          * We try to hold kvm->srcu during most of vcpu_run (except when run-
2687          * ning the guest), so that memslots (and other stuff) are protected
2688          */
2689         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2690
2691         do {
2692                 rc = vcpu_pre_run(vcpu);
2693                 if (rc)
2694                         break;
2695
2696                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2697                 /*
2698                  * As PF_VCPU will be used in fault handler, between
2699                  * guest_enter and guest_exit should be no uaccess.
2700                  */
2701                 local_irq_disable();
2702                 guest_enter_irqoff();
2703                 __disable_cpu_timer_accounting(vcpu);
2704                 local_irq_enable();
2705                 exit_reason = sie64a(vcpu->arch.sie_block,
2706                                      vcpu->run->s.regs.gprs);
2707                 local_irq_disable();
2708                 __enable_cpu_timer_accounting(vcpu);
2709                 guest_exit_irqoff();
2710                 local_irq_enable();
2711                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2712
2713                 rc = vcpu_post_run(vcpu, exit_reason);
2714         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
2715
2716         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2717         return rc;
2718 }
2719
2720 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2721 {
2722         vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
2723         vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
2724         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
2725                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
2726         if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
2727                 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
2728                 /* some control register changes require a tlb flush */
2729                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2730         }
2731         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
2732                 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
2733                 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
2734                 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
2735                 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
2736                 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
2737         }
2738         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
2739                 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
2740                 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
2741                 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
2742                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2743                         kvm_clear_async_pf_completion_queue(vcpu);
2744         }
2745         /*
2746          * If userspace sets the riccb (e.g. after migration) to a valid state,
2747          * we should enable RI here instead of doing the lazy enablement.
2748          */
2749         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
2750             test_kvm_facility(vcpu->kvm, 64)) {
2751                 struct runtime_instr_cb *riccb =
2752                         (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
2753
2754                 if (riccb->valid)
2755                         vcpu->arch.sie_block->ecb3 |= 0x01;
2756         }
2757         save_access_regs(vcpu->arch.host_acrs);
2758         restore_access_regs(vcpu->run->s.regs.acrs);
2759         /* save host (userspace) fprs/vrs */
2760         save_fpu_regs();
2761         vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
2762         vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
2763         if (MACHINE_HAS_VX)
2764                 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
2765         else
2766                 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
2767         current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
2768         if (test_fp_ctl(current->thread.fpu.fpc))
2769                 /* User space provided an invalid FPC, let's clear it */
2770                 current->thread.fpu.fpc = 0;
2771
2772         kvm_run->kvm_dirty_regs = 0;
2773 }
2774
2775 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2776 {
2777         kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
2778         kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
2779         kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
2780         memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
2781         kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
2782         kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
2783         kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
2784         kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
2785         kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
2786         kvm_run->s.regs.pft = vcpu->arch.pfault_token;
2787         kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
2788         kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
2789         save_access_regs(vcpu->run->s.regs.acrs);
2790         restore_access_regs(vcpu->arch.host_acrs);
2791         /* Save guest register state */
2792         save_fpu_regs();
2793         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
2794         /* Restore will be done lazily at return */
2795         current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
2796         current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
2797
2798 }
2799
2800 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2801 {
2802         int rc;
2803         sigset_t sigsaved;
2804
2805         if (kvm_run->immediate_exit)
2806                 return -EINTR;
2807
2808         if (guestdbg_exit_pending(vcpu)) {
2809                 kvm_s390_prepare_debug_exit(vcpu);
2810                 return 0;
2811         }
2812
2813         if (vcpu->sigset_active)
2814                 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
2815
2816         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
2817                 kvm_s390_vcpu_start(vcpu);
2818         } else if (is_vcpu_stopped(vcpu)) {
2819                 pr_err_ratelimited("can't run stopped vcpu %d\n",
2820                                    vcpu->vcpu_id);
2821                 return -EINVAL;
2822         }
2823
2824         sync_regs(vcpu, kvm_run);
2825         enable_cpu_timer_accounting(vcpu);
2826
2827         might_fault();
2828         rc = __vcpu_run(vcpu);
2829
2830         if (signal_pending(current) && !rc) {
2831                 kvm_run->exit_reason = KVM_EXIT_INTR;
2832                 rc = -EINTR;
2833         }
2834
2835         if (guestdbg_exit_pending(vcpu) && !rc)  {
2836                 kvm_s390_prepare_debug_exit(vcpu);
2837                 rc = 0;
2838         }
2839
2840         if (rc == -EREMOTE) {
2841                 /* userspace support is needed, kvm_run has been prepared */
2842                 rc = 0;
2843         }
2844
2845         disable_cpu_timer_accounting(vcpu);
2846         store_regs(vcpu, kvm_run);
2847
2848         if (vcpu->sigset_active)
2849                 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
2850
2851         vcpu->stat.exit_userspace++;
2852         return rc;
2853 }
2854
2855 /*
2856  * store status at address
2857  * we use have two special cases:
2858  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
2859  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
2860  */
2861 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
2862 {
2863         unsigned char archmode = 1;
2864         freg_t fprs[NUM_FPRS];
2865         unsigned int px;
2866         u64 clkcomp, cputm;
2867         int rc;
2868
2869         px = kvm_s390_get_prefix(vcpu);
2870         if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
2871                 if (write_guest_abs(vcpu, 163, &archmode, 1))
2872                         return -EFAULT;
2873                 gpa = 0;
2874         } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
2875                 if (write_guest_real(vcpu, 163, &archmode, 1))
2876                         return -EFAULT;
2877                 gpa = px;
2878         } else
2879                 gpa -= __LC_FPREGS_SAVE_AREA;
2880
2881         /* manually convert vector registers if necessary */
2882         if (MACHINE_HAS_VX) {
2883                 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
2884                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2885                                      fprs, 128);
2886         } else {
2887                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2888                                      vcpu->run->s.regs.fprs, 128);
2889         }
2890         rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
2891                               vcpu->run->s.regs.gprs, 128);
2892         rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
2893                               &vcpu->arch.sie_block->gpsw, 16);
2894         rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
2895                               &px, 4);
2896         rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
2897                               &vcpu->run->s.regs.fpc, 4);
2898         rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
2899                               &vcpu->arch.sie_block->todpr, 4);
2900         cputm = kvm_s390_get_cpu_timer(vcpu);
2901         rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
2902                               &cputm, 8);
2903         clkcomp = vcpu->arch.sie_block->ckc >> 8;
2904         rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
2905                               &clkcomp, 8);
2906         rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
2907                               &vcpu->run->s.regs.acrs, 64);
2908         rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
2909                               &vcpu->arch.sie_block->gcr, 128);
2910         return rc ? -EFAULT : 0;
2911 }
2912
2913 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
2914 {
2915         /*
2916          * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
2917          * switch in the run ioctl. Let's update our copies before we save
2918          * it into the save area
2919          */
2920         save_fpu_regs();
2921         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
2922         save_access_regs(vcpu->run->s.regs.acrs);
2923
2924         return kvm_s390_store_status_unloaded(vcpu, addr);
2925 }
2926
2927 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2928 {
2929         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
2930         kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
2931 }
2932
2933 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
2934 {
2935         unsigned int i;
2936         struct kvm_vcpu *vcpu;
2937
2938         kvm_for_each_vcpu(i, vcpu, kvm) {
2939                 __disable_ibs_on_vcpu(vcpu);
2940         }
2941 }
2942
2943 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2944 {
2945         if (!sclp.has_ibs)
2946                 return;
2947         kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
2948         kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
2949 }
2950
2951 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
2952 {
2953         int i, online_vcpus, started_vcpus = 0;
2954
2955         if (!is_vcpu_stopped(vcpu))
2956                 return;
2957
2958         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
2959         /* Only one cpu at a time may enter/leave the STOPPED state. */
2960         spin_lock(&vcpu->kvm->arch.start_stop_lock);
2961         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2962
2963         for (i = 0; i < online_vcpus; i++) {
2964                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
2965                         started_vcpus++;
2966         }
2967
2968         if (started_vcpus == 0) {
2969                 /* we're the only active VCPU -> speed it up */
2970                 __enable_ibs_on_vcpu(vcpu);
2971         } else if (started_vcpus == 1) {
2972                 /*
2973                  * As we are starting a second VCPU, we have to disable
2974                  * the IBS facility on all VCPUs to remove potentially
2975                  * oustanding ENABLE requests.
2976                  */
2977                 __disable_ibs_on_all_vcpus(vcpu->kvm);
2978         }
2979
2980         atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2981         /*
2982          * Another VCPU might have used IBS while we were offline.
2983          * Let's play safe and flush the VCPU at startup.
2984          */
2985         kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2986         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2987         return;
2988 }
2989
2990 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
2991 {
2992         int i, online_vcpus, started_vcpus = 0;
2993         struct kvm_vcpu *started_vcpu = NULL;
2994
2995         if (is_vcpu_stopped(vcpu))
2996                 return;
2997
2998         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
2999         /* Only one cpu at a time may enter/leave the STOPPED state. */
3000         spin_lock(&vcpu->kvm->arch.start_stop_lock);
3001         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3002
3003         /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
3004         kvm_s390_clear_stop_irq(vcpu);
3005
3006         atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
3007         __disable_ibs_on_vcpu(vcpu);
3008
3009         for (i = 0; i < online_vcpus; i++) {
3010                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
3011                         started_vcpus++;
3012                         started_vcpu = vcpu->kvm->vcpus[i];
3013                 }
3014         }
3015
3016         if (started_vcpus == 1) {
3017                 /*
3018                  * As we only have one VCPU left, we want to enable the
3019                  * IBS facility for that VCPU to speed it up.
3020                  */
3021                 __enable_ibs_on_vcpu(started_vcpu);
3022         }
3023
3024         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3025         return;
3026 }
3027
3028 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
3029                                      struct kvm_enable_cap *cap)
3030 {
3031         int r;
3032
3033         if (cap->flags)
3034                 return -EINVAL;
3035
3036         switch (cap->cap) {
3037         case KVM_CAP_S390_CSS_SUPPORT:
3038                 if (!vcpu->kvm->arch.css_support) {
3039                         vcpu->kvm->arch.css_support = 1;
3040                         VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
3041                         trace_kvm_s390_enable_css(vcpu->kvm);
3042                 }
3043                 r = 0;
3044                 break;
3045         default:
3046                 r = -EINVAL;
3047                 break;
3048         }
3049         return r;
3050 }
3051
3052 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3053                                   struct kvm_s390_mem_op *mop)
3054 {
3055         void __user *uaddr = (void __user *)mop->buf;
3056         void *tmpbuf = NULL;
3057         int r, srcu_idx;
3058         const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3059                                     | KVM_S390_MEMOP_F_CHECK_ONLY;
3060
3061         if (mop->flags & ~supported_flags)
3062                 return -EINVAL;
3063
3064         if (mop->size > MEM_OP_MAX_SIZE)
3065                 return -E2BIG;
3066
3067         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3068                 tmpbuf = vmalloc(mop->size);
3069                 if (!tmpbuf)
3070                         return -ENOMEM;
3071         }
3072
3073         srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3074
3075         switch (mop->op) {
3076         case KVM_S390_MEMOP_LOGICAL_READ:
3077                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3078                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3079                                             mop->size, GACC_FETCH);
3080                         break;
3081                 }
3082                 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3083                 if (r == 0) {
3084                         if (copy_to_user(uaddr, tmpbuf, mop->size))
3085                                 r = -EFAULT;
3086                 }
3087                 break;
3088         case KVM_S390_MEMOP_LOGICAL_WRITE:
3089                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3090                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3091                                             mop->size, GACC_STORE);
3092                         break;
3093                 }
3094                 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
3095                         r = -EFAULT;
3096                         break;
3097                 }
3098                 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3099                 break;
3100         default:
3101                 r = -EINVAL;
3102         }
3103
3104         srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
3105
3106         if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
3107                 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
3108
3109         vfree(tmpbuf);
3110         return r;
3111 }
3112
3113 long kvm_arch_vcpu_ioctl(struct file *filp,
3114                          unsigned int ioctl, unsigned long arg)
3115 {
3116         struct kvm_vcpu *vcpu = filp->private_data;
3117         void __user *argp = (void __user *)arg;
3118         int idx;
3119         long r;
3120
3121         switch (ioctl) {
3122         case KVM_S390_IRQ: {
3123                 struct kvm_s390_irq s390irq;
3124
3125                 r = -EFAULT;
3126                 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3127                         break;
3128                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3129                 break;
3130         }
3131         case KVM_S390_INTERRUPT: {
3132                 struct kvm_s390_interrupt s390int;
3133                 struct kvm_s390_irq s390irq;
3134
3135                 r = -EFAULT;
3136                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
3137                         break;
3138                 if (s390int_to_s390irq(&s390int, &s390irq))
3139                         return -EINVAL;
3140                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3141                 break;
3142         }
3143         case KVM_S390_STORE_STATUS:
3144                 idx = srcu_read_lock(&vcpu->kvm->srcu);
3145                 r = kvm_s390_vcpu_store_status(vcpu, arg);
3146                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
3147                 break;
3148         case KVM_S390_SET_INITIAL_PSW: {
3149                 psw_t psw;
3150
3151                 r = -EFAULT;
3152                 if (copy_from_user(&psw, argp, sizeof(psw)))
3153                         break;
3154                 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3155                 break;
3156         }
3157         case KVM_S390_INITIAL_RESET:
3158                 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3159                 break;
3160         case KVM_SET_ONE_REG:
3161         case KVM_GET_ONE_REG: {
3162                 struct kvm_one_reg reg;
3163                 r = -EFAULT;
3164                 if (copy_from_user(&reg, argp, sizeof(reg)))
3165                         break;
3166                 if (ioctl == KVM_SET_ONE_REG)
3167                         r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
3168                 else
3169                         r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
3170                 break;
3171         }
3172 #ifdef CONFIG_KVM_S390_UCONTROL
3173         case KVM_S390_UCAS_MAP: {
3174                 struct kvm_s390_ucas_mapping ucasmap;
3175
3176                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3177                         r = -EFAULT;
3178                         break;
3179                 }
3180
3181                 if (!kvm_is_ucontrol(vcpu->kvm)) {
3182                         r = -EINVAL;
3183                         break;
3184                 }
3185
3186                 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
3187                                      ucasmap.vcpu_addr, ucasmap.length);
3188                 break;
3189         }
3190         case KVM_S390_UCAS_UNMAP: {
3191                 struct kvm_s390_ucas_mapping ucasmap;
3192
3193                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3194                         r = -EFAULT;
3195                         break;
3196                 }
3197
3198                 if (!kvm_is_ucontrol(vcpu->kvm)) {
3199                         r = -EINVAL;
3200                         break;
3201                 }
3202
3203                 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
3204                         ucasmap.length);
3205                 break;
3206         }
3207 #endif
3208         case KVM_S390_VCPU_FAULT: {
3209                 r = gmap_fault(vcpu->arch.gmap, arg, 0);
3210                 break;
3211         }
3212         case KVM_ENABLE_CAP:
3213         {
3214                 struct kvm_enable_cap cap;
3215                 r = -EFAULT;
3216                 if (copy_from_user(&cap, argp, sizeof(cap)))
3217                         break;
3218                 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
3219                 break;
3220         }
3221         case KVM_S390_MEM_OP: {
3222                 struct kvm_s390_mem_op mem_op;
3223
3224                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3225                         r = kvm_s390_guest_mem_op(vcpu, &mem_op);
3226                 else
3227                         r = -EFAULT;
3228                 break;
3229         }
3230         case KVM_S390_SET_IRQ_STATE: {
3231                 struct kvm_s390_irq_state irq_state;
3232
3233                 r = -EFAULT;
3234                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3235                         break;
3236                 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
3237                     irq_state.len == 0 ||
3238                     irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
3239                         r = -EINVAL;
3240                         break;
3241                 }
3242                 r = kvm_s390_set_irq_state(vcpu,
3243                                            (void __user *) irq_state.buf,
3244                                            irq_state.len);
3245                 break;
3246         }
3247         case KVM_S390_GET_IRQ_STATE: {
3248                 struct kvm_s390_irq_state irq_state;
3249
3250                 r = -EFAULT;
3251                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3252                         break;
3253                 if (irq_state.len == 0) {
3254                         r = -EINVAL;
3255                         break;
3256                 }
3257                 r = kvm_s390_get_irq_state(vcpu,
3258                                            (__u8 __user *)  irq_state.buf,
3259                                            irq_state.len);
3260                 break;
3261         }
3262         default:
3263                 r = -ENOTTY;
3264         }
3265         return r;
3266 }
3267
3268 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3269 {
3270 #ifdef CONFIG_KVM_S390_UCONTROL
3271         if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
3272                  && (kvm_is_ucontrol(vcpu->kvm))) {
3273                 vmf->page = virt_to_page(vcpu->arch.sie_block);
3274                 get_page(vmf->page);
3275                 return 0;
3276         }
3277 #endif
3278         return VM_FAULT_SIGBUS;
3279 }
3280
3281 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
3282                             unsigned long npages)
3283 {
3284         return 0;
3285 }
3286
3287 /* Section: memory related */
3288 int kvm_arch_prepare_memory_region(struct kvm *kvm,
3289                                    struct kvm_memory_slot *memslot,
3290                                    const struct kvm_userspace_memory_region *mem,
3291                                    enum kvm_mr_change change)
3292 {
3293         /* A few sanity checks. We can have memory slots which have to be
3294            located/ended at a segment boundary (1MB). The memory in userland is
3295            ok to be fragmented into various different vmas. It is okay to mmap()
3296            and munmap() stuff in this slot after doing this call at any time */
3297
3298         if (mem->userspace_addr & 0xffffful)
3299                 return -EINVAL;
3300
3301         if (mem->memory_size & 0xffffful)
3302                 return -EINVAL;
3303
3304         if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
3305                 return -EINVAL;
3306
3307         return 0;
3308 }
3309
3310 void kvm_arch_commit_memory_region(struct kvm *kvm,
3311                                 const struct kvm_userspace_memory_region *mem,
3312                                 const struct kvm_memory_slot *old,
3313                                 const struct kvm_memory_slot *new,
3314                                 enum kvm_mr_change change)
3315 {
3316         int rc;
3317
3318         /* If the basics of the memslot do not change, we do not want
3319          * to update the gmap. Every update causes several unnecessary
3320          * segment translation exceptions. This is usually handled just
3321          * fine by the normal fault handler + gmap, but it will also
3322          * cause faults on the prefix page of running guest CPUs.
3323          */
3324         if (old->userspace_addr == mem->userspace_addr &&
3325             old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
3326             old->npages * PAGE_SIZE == mem->memory_size)
3327                 return;
3328
3329         rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
3330                 mem->guest_phys_addr, mem->memory_size);
3331         if (rc)
3332                 pr_warn("failed to commit memory region\n");
3333         return;
3334 }
3335
3336 static inline unsigned long nonhyp_mask(int i)
3337 {
3338         unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
3339
3340         return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
3341 }
3342
3343 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
3344 {
3345         vcpu->valid_wakeup = false;
3346 }
3347
3348 static int __init kvm_s390_init(void)
3349 {
3350         int i;
3351
3352         if (!sclp.has_sief2) {
3353                 pr_info("SIE not available\n");
3354                 return -ENODEV;
3355         }
3356
3357         for (i = 0; i < 16; i++)
3358                 kvm_s390_fac_list_mask[i] |=
3359                         S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
3360
3361         return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
3362 }
3363
3364 static void __exit kvm_s390_exit(void)
3365 {
3366         kvm_exit();
3367 }
3368
3369 module_init(kvm_s390_init);
3370 module_exit(kvm_s390_exit);
3371
3372 /*
3373  * Enable autoloading of the kvm module.
3374  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
3375  * since x86 takes a different approach.
3376  */
3377 #include <linux/miscdevice.h>
3378 MODULE_ALIAS_MISCDEV(KVM_MINOR);
3379 MODULE_ALIAS("devname:kvm");