]> git.karo-electronics.de Git - karo-tx-linux.git/blob - arch/s390/kvm/kvm-s390.c
KVM: race-free exit from KVM_RUN without POSIX signals
[karo-tx-linux.git] / arch / s390 / kvm / kvm-s390.c
1 /*
2  * hosting zSeries kernel virtual machines
3  *
4  * Copyright IBM Corp. 2008, 2009
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License (version 2 only)
8  * as published by the Free Software Foundation.
9  *
10  *    Author(s): Carsten Otte <cotte@de.ibm.com>
11  *               Christian Borntraeger <borntraeger@de.ibm.com>
12  *               Heiko Carstens <heiko.carstens@de.ibm.com>
13  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
14  *               Jason J. Herne <jjherne@us.ibm.com>
15  */
16
17 #include <linux/compiler.h>
18 #include <linux/err.h>
19 #include <linux/fs.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/mman.h>
25 #include <linux/module.h>
26 #include <linux/random.h>
27 #include <linux/slab.h>
28 #include <linux/timer.h>
29 #include <linux/vmalloc.h>
30 #include <linux/bitmap.h>
31 #include <asm/asm-offsets.h>
32 #include <asm/lowcore.h>
33 #include <asm/stp.h>
34 #include <asm/pgtable.h>
35 #include <asm/gmap.h>
36 #include <asm/nmi.h>
37 #include <asm/switch_to.h>
38 #include <asm/isc.h>
39 #include <asm/sclp.h>
40 #include <asm/cpacf.h>
41 #include <asm/timex.h>
42 #include "kvm-s390.h"
43 #include "gaccess.h"
44
45 #define KMSG_COMPONENT "kvm-s390"
46 #undef pr_fmt
47 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
48
49 #define CREATE_TRACE_POINTS
50 #include "trace.h"
51 #include "trace-s390.h"
52
53 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
54 #define LOCAL_IRQS 32
55 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
56                            (KVM_MAX_VCPUS + LOCAL_IRQS))
57
58 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
59
60 struct kvm_stats_debugfs_item debugfs_entries[] = {
61         { "userspace_handled", VCPU_STAT(exit_userspace) },
62         { "exit_null", VCPU_STAT(exit_null) },
63         { "exit_validity", VCPU_STAT(exit_validity) },
64         { "exit_stop_request", VCPU_STAT(exit_stop_request) },
65         { "exit_external_request", VCPU_STAT(exit_external_request) },
66         { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
67         { "exit_instruction", VCPU_STAT(exit_instruction) },
68         { "exit_pei", VCPU_STAT(exit_pei) },
69         { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
70         { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
71         { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
72         { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
73         { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
74         { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
75         { "halt_wakeup", VCPU_STAT(halt_wakeup) },
76         { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
77         { "instruction_lctl", VCPU_STAT(instruction_lctl) },
78         { "instruction_stctl", VCPU_STAT(instruction_stctl) },
79         { "instruction_stctg", VCPU_STAT(instruction_stctg) },
80         { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
81         { "deliver_external_call", VCPU_STAT(deliver_external_call) },
82         { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
83         { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
84         { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
85         { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
86         { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
87         { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
88         { "exit_wait_state", VCPU_STAT(exit_wait_state) },
89         { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
90         { "instruction_stidp", VCPU_STAT(instruction_stidp) },
91         { "instruction_spx", VCPU_STAT(instruction_spx) },
92         { "instruction_stpx", VCPU_STAT(instruction_stpx) },
93         { "instruction_stap", VCPU_STAT(instruction_stap) },
94         { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
95         { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
96         { "instruction_stsch", VCPU_STAT(instruction_stsch) },
97         { "instruction_chsc", VCPU_STAT(instruction_chsc) },
98         { "instruction_essa", VCPU_STAT(instruction_essa) },
99         { "instruction_stsi", VCPU_STAT(instruction_stsi) },
100         { "instruction_stfl", VCPU_STAT(instruction_stfl) },
101         { "instruction_tprot", VCPU_STAT(instruction_tprot) },
102         { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
103         { "instruction_sie", VCPU_STAT(instruction_sie) },
104         { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
105         { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
106         { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
107         { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
108         { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
109         { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
110         { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
111         { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
112         { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
113         { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
114         { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
115         { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
116         { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
117         { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
118         { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
119         { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
120         { "diagnose_10", VCPU_STAT(diagnose_10) },
121         { "diagnose_44", VCPU_STAT(diagnose_44) },
122         { "diagnose_9c", VCPU_STAT(diagnose_9c) },
123         { "diagnose_258", VCPU_STAT(diagnose_258) },
124         { "diagnose_308", VCPU_STAT(diagnose_308) },
125         { "diagnose_500", VCPU_STAT(diagnose_500) },
126         { NULL }
127 };
128
129 /* allow nested virtualization in KVM (if enabled by user space) */
130 static int nested;
131 module_param(nested, int, S_IRUGO);
132 MODULE_PARM_DESC(nested, "Nested virtualization support");
133
134 /* upper facilities limit for kvm */
135 unsigned long kvm_s390_fac_list_mask[16] = { FACILITIES_KVM };
136
137 unsigned long kvm_s390_fac_list_mask_size(void)
138 {
139         BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
140         return ARRAY_SIZE(kvm_s390_fac_list_mask);
141 }
142
143 /* available cpu features supported by kvm */
144 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
145 /* available subfunctions indicated via query / "test bit" */
146 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
147
148 static struct gmap_notifier gmap_notifier;
149 static struct gmap_notifier vsie_gmap_notifier;
150 debug_info_t *kvm_s390_dbf;
151
152 /* Section: not file related */
153 int kvm_arch_hardware_enable(void)
154 {
155         /* every s390 is virtualization enabled ;-) */
156         return 0;
157 }
158
159 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
160                               unsigned long end);
161
162 /*
163  * This callback is executed during stop_machine(). All CPUs are therefore
164  * temporarily stopped. In order not to change guest behavior, we have to
165  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
166  * so a CPU won't be stopped while calculating with the epoch.
167  */
168 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
169                           void *v)
170 {
171         struct kvm *kvm;
172         struct kvm_vcpu *vcpu;
173         int i;
174         unsigned long long *delta = v;
175
176         list_for_each_entry(kvm, &vm_list, vm_list) {
177                 kvm->arch.epoch -= *delta;
178                 kvm_for_each_vcpu(i, vcpu, kvm) {
179                         vcpu->arch.sie_block->epoch -= *delta;
180                         if (vcpu->arch.cputm_enabled)
181                                 vcpu->arch.cputm_start += *delta;
182                         if (vcpu->arch.vsie_block)
183                                 vcpu->arch.vsie_block->epoch -= *delta;
184                 }
185         }
186         return NOTIFY_OK;
187 }
188
189 static struct notifier_block kvm_clock_notifier = {
190         .notifier_call = kvm_clock_sync,
191 };
192
193 int kvm_arch_hardware_setup(void)
194 {
195         gmap_notifier.notifier_call = kvm_gmap_notifier;
196         gmap_register_pte_notifier(&gmap_notifier);
197         vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
198         gmap_register_pte_notifier(&vsie_gmap_notifier);
199         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
200                                        &kvm_clock_notifier);
201         return 0;
202 }
203
204 void kvm_arch_hardware_unsetup(void)
205 {
206         gmap_unregister_pte_notifier(&gmap_notifier);
207         gmap_unregister_pte_notifier(&vsie_gmap_notifier);
208         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
209                                          &kvm_clock_notifier);
210 }
211
212 static void allow_cpu_feat(unsigned long nr)
213 {
214         set_bit_inv(nr, kvm_s390_available_cpu_feat);
215 }
216
217 static inline int plo_test_bit(unsigned char nr)
218 {
219         register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
220         int cc;
221
222         asm volatile(
223                 /* Parameter registers are ignored for "test bit" */
224                 "       plo     0,0,0,0(0)\n"
225                 "       ipm     %0\n"
226                 "       srl     %0,28\n"
227                 : "=d" (cc)
228                 : "d" (r0)
229                 : "cc");
230         return cc == 0;
231 }
232
233 static void kvm_s390_cpu_feat_init(void)
234 {
235         int i;
236
237         for (i = 0; i < 256; ++i) {
238                 if (plo_test_bit(i))
239                         kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
240         }
241
242         if (test_facility(28)) /* TOD-clock steering */
243                 ptff(kvm_s390_available_subfunc.ptff,
244                      sizeof(kvm_s390_available_subfunc.ptff),
245                      PTFF_QAF);
246
247         if (test_facility(17)) { /* MSA */
248                 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
249                               kvm_s390_available_subfunc.kmac);
250                 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
251                               kvm_s390_available_subfunc.kmc);
252                 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
253                               kvm_s390_available_subfunc.km);
254                 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
255                               kvm_s390_available_subfunc.kimd);
256                 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
257                               kvm_s390_available_subfunc.klmd);
258         }
259         if (test_facility(76)) /* MSA3 */
260                 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
261                               kvm_s390_available_subfunc.pckmo);
262         if (test_facility(77)) { /* MSA4 */
263                 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
264                               kvm_s390_available_subfunc.kmctr);
265                 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
266                               kvm_s390_available_subfunc.kmf);
267                 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
268                               kvm_s390_available_subfunc.kmo);
269                 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
270                               kvm_s390_available_subfunc.pcc);
271         }
272         if (test_facility(57)) /* MSA5 */
273                 __cpacf_query(CPACF_PPNO, (cpacf_mask_t *)
274                               kvm_s390_available_subfunc.ppno);
275
276         if (MACHINE_HAS_ESOP)
277                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
278         /*
279          * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
280          * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
281          */
282         if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
283             !test_facility(3) || !nested)
284                 return;
285         allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
286         if (sclp.has_64bscao)
287                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
288         if (sclp.has_siif)
289                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
290         if (sclp.has_gpere)
291                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
292         if (sclp.has_gsls)
293                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
294         if (sclp.has_ib)
295                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
296         if (sclp.has_cei)
297                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
298         if (sclp.has_ibs)
299                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
300         /*
301          * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
302          * all skey handling functions read/set the skey from the PGSTE
303          * instead of the real storage key.
304          *
305          * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
306          * pages being detected as preserved although they are resident.
307          *
308          * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
309          * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
310          *
311          * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
312          * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
313          * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
314          *
315          * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
316          * cannot easily shadow the SCA because of the ipte lock.
317          */
318 }
319
320 int kvm_arch_init(void *opaque)
321 {
322         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
323         if (!kvm_s390_dbf)
324                 return -ENOMEM;
325
326         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
327                 debug_unregister(kvm_s390_dbf);
328                 return -ENOMEM;
329         }
330
331         kvm_s390_cpu_feat_init();
332
333         /* Register floating interrupt controller interface. */
334         return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
335 }
336
337 void kvm_arch_exit(void)
338 {
339         debug_unregister(kvm_s390_dbf);
340 }
341
342 /* Section: device related */
343 long kvm_arch_dev_ioctl(struct file *filp,
344                         unsigned int ioctl, unsigned long arg)
345 {
346         if (ioctl == KVM_S390_ENABLE_SIE)
347                 return s390_enable_sie();
348         return -EINVAL;
349 }
350
351 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
352 {
353         int r;
354
355         switch (ext) {
356         case KVM_CAP_S390_PSW:
357         case KVM_CAP_S390_GMAP:
358         case KVM_CAP_SYNC_MMU:
359 #ifdef CONFIG_KVM_S390_UCONTROL
360         case KVM_CAP_S390_UCONTROL:
361 #endif
362         case KVM_CAP_ASYNC_PF:
363         case KVM_CAP_SYNC_REGS:
364         case KVM_CAP_ONE_REG:
365         case KVM_CAP_ENABLE_CAP:
366         case KVM_CAP_S390_CSS_SUPPORT:
367         case KVM_CAP_IOEVENTFD:
368         case KVM_CAP_DEVICE_CTRL:
369         case KVM_CAP_ENABLE_CAP_VM:
370         case KVM_CAP_S390_IRQCHIP:
371         case KVM_CAP_VM_ATTRIBUTES:
372         case KVM_CAP_MP_STATE:
373         case KVM_CAP_IMMEDIATE_EXIT:
374         case KVM_CAP_S390_INJECT_IRQ:
375         case KVM_CAP_S390_USER_SIGP:
376         case KVM_CAP_S390_USER_STSI:
377         case KVM_CAP_S390_SKEYS:
378         case KVM_CAP_S390_IRQ_STATE:
379         case KVM_CAP_S390_USER_INSTR0:
380                 r = 1;
381                 break;
382         case KVM_CAP_S390_MEM_OP:
383                 r = MEM_OP_MAX_SIZE;
384                 break;
385         case KVM_CAP_NR_VCPUS:
386         case KVM_CAP_MAX_VCPUS:
387                 r = KVM_S390_BSCA_CPU_SLOTS;
388                 if (!kvm_s390_use_sca_entries())
389                         r = KVM_MAX_VCPUS;
390                 else if (sclp.has_esca && sclp.has_64bscao)
391                         r = KVM_S390_ESCA_CPU_SLOTS;
392                 break;
393         case KVM_CAP_NR_MEMSLOTS:
394                 r = KVM_USER_MEM_SLOTS;
395                 break;
396         case KVM_CAP_S390_COW:
397                 r = MACHINE_HAS_ESOP;
398                 break;
399         case KVM_CAP_S390_VECTOR_REGISTERS:
400                 r = MACHINE_HAS_VX;
401                 break;
402         case KVM_CAP_S390_RI:
403                 r = test_facility(64);
404                 break;
405         default:
406                 r = 0;
407         }
408         return r;
409 }
410
411 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
412                                         struct kvm_memory_slot *memslot)
413 {
414         gfn_t cur_gfn, last_gfn;
415         unsigned long address;
416         struct gmap *gmap = kvm->arch.gmap;
417
418         /* Loop over all guest pages */
419         last_gfn = memslot->base_gfn + memslot->npages;
420         for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
421                 address = gfn_to_hva_memslot(memslot, cur_gfn);
422
423                 if (test_and_clear_guest_dirty(gmap->mm, address))
424                         mark_page_dirty(kvm, cur_gfn);
425                 if (fatal_signal_pending(current))
426                         return;
427                 cond_resched();
428         }
429 }
430
431 /* Section: vm related */
432 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
433
434 /*
435  * Get (and clear) the dirty memory log for a memory slot.
436  */
437 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
438                                struct kvm_dirty_log *log)
439 {
440         int r;
441         unsigned long n;
442         struct kvm_memslots *slots;
443         struct kvm_memory_slot *memslot;
444         int is_dirty = 0;
445
446         if (kvm_is_ucontrol(kvm))
447                 return -EINVAL;
448
449         mutex_lock(&kvm->slots_lock);
450
451         r = -EINVAL;
452         if (log->slot >= KVM_USER_MEM_SLOTS)
453                 goto out;
454
455         slots = kvm_memslots(kvm);
456         memslot = id_to_memslot(slots, log->slot);
457         r = -ENOENT;
458         if (!memslot->dirty_bitmap)
459                 goto out;
460
461         kvm_s390_sync_dirty_log(kvm, memslot);
462         r = kvm_get_dirty_log(kvm, log, &is_dirty);
463         if (r)
464                 goto out;
465
466         /* Clear the dirty log */
467         if (is_dirty) {
468                 n = kvm_dirty_bitmap_bytes(memslot);
469                 memset(memslot->dirty_bitmap, 0, n);
470         }
471         r = 0;
472 out:
473         mutex_unlock(&kvm->slots_lock);
474         return r;
475 }
476
477 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
478 {
479         unsigned int i;
480         struct kvm_vcpu *vcpu;
481
482         kvm_for_each_vcpu(i, vcpu, kvm) {
483                 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
484         }
485 }
486
487 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
488 {
489         int r;
490
491         if (cap->flags)
492                 return -EINVAL;
493
494         switch (cap->cap) {
495         case KVM_CAP_S390_IRQCHIP:
496                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
497                 kvm->arch.use_irqchip = 1;
498                 r = 0;
499                 break;
500         case KVM_CAP_S390_USER_SIGP:
501                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
502                 kvm->arch.user_sigp = 1;
503                 r = 0;
504                 break;
505         case KVM_CAP_S390_VECTOR_REGISTERS:
506                 mutex_lock(&kvm->lock);
507                 if (kvm->created_vcpus) {
508                         r = -EBUSY;
509                 } else if (MACHINE_HAS_VX) {
510                         set_kvm_facility(kvm->arch.model.fac_mask, 129);
511                         set_kvm_facility(kvm->arch.model.fac_list, 129);
512                         if (test_facility(134)) {
513                                 set_kvm_facility(kvm->arch.model.fac_mask, 134);
514                                 set_kvm_facility(kvm->arch.model.fac_list, 134);
515                         }
516                         if (test_facility(135)) {
517                                 set_kvm_facility(kvm->arch.model.fac_mask, 135);
518                                 set_kvm_facility(kvm->arch.model.fac_list, 135);
519                         }
520                         r = 0;
521                 } else
522                         r = -EINVAL;
523                 mutex_unlock(&kvm->lock);
524                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
525                          r ? "(not available)" : "(success)");
526                 break;
527         case KVM_CAP_S390_RI:
528                 r = -EINVAL;
529                 mutex_lock(&kvm->lock);
530                 if (kvm->created_vcpus) {
531                         r = -EBUSY;
532                 } else if (test_facility(64)) {
533                         set_kvm_facility(kvm->arch.model.fac_mask, 64);
534                         set_kvm_facility(kvm->arch.model.fac_list, 64);
535                         r = 0;
536                 }
537                 mutex_unlock(&kvm->lock);
538                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
539                          r ? "(not available)" : "(success)");
540                 break;
541         case KVM_CAP_S390_USER_STSI:
542                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
543                 kvm->arch.user_stsi = 1;
544                 r = 0;
545                 break;
546         case KVM_CAP_S390_USER_INSTR0:
547                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
548                 kvm->arch.user_instr0 = 1;
549                 icpt_operexc_on_all_vcpus(kvm);
550                 r = 0;
551                 break;
552         default:
553                 r = -EINVAL;
554                 break;
555         }
556         return r;
557 }
558
559 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
560 {
561         int ret;
562
563         switch (attr->attr) {
564         case KVM_S390_VM_MEM_LIMIT_SIZE:
565                 ret = 0;
566                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
567                          kvm->arch.mem_limit);
568                 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
569                         ret = -EFAULT;
570                 break;
571         default:
572                 ret = -ENXIO;
573                 break;
574         }
575         return ret;
576 }
577
578 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
579 {
580         int ret;
581         unsigned int idx;
582         switch (attr->attr) {
583         case KVM_S390_VM_MEM_ENABLE_CMMA:
584                 ret = -ENXIO;
585                 if (!sclp.has_cmma)
586                         break;
587
588                 ret = -EBUSY;
589                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
590                 mutex_lock(&kvm->lock);
591                 if (!kvm->created_vcpus) {
592                         kvm->arch.use_cmma = 1;
593                         ret = 0;
594                 }
595                 mutex_unlock(&kvm->lock);
596                 break;
597         case KVM_S390_VM_MEM_CLR_CMMA:
598                 ret = -ENXIO;
599                 if (!sclp.has_cmma)
600                         break;
601                 ret = -EINVAL;
602                 if (!kvm->arch.use_cmma)
603                         break;
604
605                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
606                 mutex_lock(&kvm->lock);
607                 idx = srcu_read_lock(&kvm->srcu);
608                 s390_reset_cmma(kvm->arch.gmap->mm);
609                 srcu_read_unlock(&kvm->srcu, idx);
610                 mutex_unlock(&kvm->lock);
611                 ret = 0;
612                 break;
613         case KVM_S390_VM_MEM_LIMIT_SIZE: {
614                 unsigned long new_limit;
615
616                 if (kvm_is_ucontrol(kvm))
617                         return -EINVAL;
618
619                 if (get_user(new_limit, (u64 __user *)attr->addr))
620                         return -EFAULT;
621
622                 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
623                     new_limit > kvm->arch.mem_limit)
624                         return -E2BIG;
625
626                 if (!new_limit)
627                         return -EINVAL;
628
629                 /* gmap_create takes last usable address */
630                 if (new_limit != KVM_S390_NO_MEM_LIMIT)
631                         new_limit -= 1;
632
633                 ret = -EBUSY;
634                 mutex_lock(&kvm->lock);
635                 if (!kvm->created_vcpus) {
636                         /* gmap_create will round the limit up */
637                         struct gmap *new = gmap_create(current->mm, new_limit);
638
639                         if (!new) {
640                                 ret = -ENOMEM;
641                         } else {
642                                 gmap_remove(kvm->arch.gmap);
643                                 new->private = kvm;
644                                 kvm->arch.gmap = new;
645                                 ret = 0;
646                         }
647                 }
648                 mutex_unlock(&kvm->lock);
649                 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
650                 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
651                          (void *) kvm->arch.gmap->asce);
652                 break;
653         }
654         default:
655                 ret = -ENXIO;
656                 break;
657         }
658         return ret;
659 }
660
661 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
662
663 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
664 {
665         struct kvm_vcpu *vcpu;
666         int i;
667
668         if (!test_kvm_facility(kvm, 76))
669                 return -EINVAL;
670
671         mutex_lock(&kvm->lock);
672         switch (attr->attr) {
673         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
674                 get_random_bytes(
675                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
676                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
677                 kvm->arch.crypto.aes_kw = 1;
678                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
679                 break;
680         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
681                 get_random_bytes(
682                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
683                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
684                 kvm->arch.crypto.dea_kw = 1;
685                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
686                 break;
687         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
688                 kvm->arch.crypto.aes_kw = 0;
689                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
690                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
691                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
692                 break;
693         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
694                 kvm->arch.crypto.dea_kw = 0;
695                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
696                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
697                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
698                 break;
699         default:
700                 mutex_unlock(&kvm->lock);
701                 return -ENXIO;
702         }
703
704         kvm_for_each_vcpu(i, vcpu, kvm) {
705                 kvm_s390_vcpu_crypto_setup(vcpu);
706                 exit_sie(vcpu);
707         }
708         mutex_unlock(&kvm->lock);
709         return 0;
710 }
711
712 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
713 {
714         u8 gtod_high;
715
716         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
717                                            sizeof(gtod_high)))
718                 return -EFAULT;
719
720         if (gtod_high != 0)
721                 return -EINVAL;
722         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
723
724         return 0;
725 }
726
727 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
728 {
729         u64 gtod;
730
731         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
732                 return -EFAULT;
733
734         kvm_s390_set_tod_clock(kvm, gtod);
735         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
736         return 0;
737 }
738
739 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
740 {
741         int ret;
742
743         if (attr->flags)
744                 return -EINVAL;
745
746         switch (attr->attr) {
747         case KVM_S390_VM_TOD_HIGH:
748                 ret = kvm_s390_set_tod_high(kvm, attr);
749                 break;
750         case KVM_S390_VM_TOD_LOW:
751                 ret = kvm_s390_set_tod_low(kvm, attr);
752                 break;
753         default:
754                 ret = -ENXIO;
755                 break;
756         }
757         return ret;
758 }
759
760 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
761 {
762         u8 gtod_high = 0;
763
764         if (copy_to_user((void __user *)attr->addr, &gtod_high,
765                                          sizeof(gtod_high)))
766                 return -EFAULT;
767         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
768
769         return 0;
770 }
771
772 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
773 {
774         u64 gtod;
775
776         gtod = kvm_s390_get_tod_clock_fast(kvm);
777         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
778                 return -EFAULT;
779         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
780
781         return 0;
782 }
783
784 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
785 {
786         int ret;
787
788         if (attr->flags)
789                 return -EINVAL;
790
791         switch (attr->attr) {
792         case KVM_S390_VM_TOD_HIGH:
793                 ret = kvm_s390_get_tod_high(kvm, attr);
794                 break;
795         case KVM_S390_VM_TOD_LOW:
796                 ret = kvm_s390_get_tod_low(kvm, attr);
797                 break;
798         default:
799                 ret = -ENXIO;
800                 break;
801         }
802         return ret;
803 }
804
805 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
806 {
807         struct kvm_s390_vm_cpu_processor *proc;
808         u16 lowest_ibc, unblocked_ibc;
809         int ret = 0;
810
811         mutex_lock(&kvm->lock);
812         if (kvm->created_vcpus) {
813                 ret = -EBUSY;
814                 goto out;
815         }
816         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
817         if (!proc) {
818                 ret = -ENOMEM;
819                 goto out;
820         }
821         if (!copy_from_user(proc, (void __user *)attr->addr,
822                             sizeof(*proc))) {
823                 kvm->arch.model.cpuid = proc->cpuid;
824                 lowest_ibc = sclp.ibc >> 16 & 0xfff;
825                 unblocked_ibc = sclp.ibc & 0xfff;
826                 if (lowest_ibc && proc->ibc) {
827                         if (proc->ibc > unblocked_ibc)
828                                 kvm->arch.model.ibc = unblocked_ibc;
829                         else if (proc->ibc < lowest_ibc)
830                                 kvm->arch.model.ibc = lowest_ibc;
831                         else
832                                 kvm->arch.model.ibc = proc->ibc;
833                 }
834                 memcpy(kvm->arch.model.fac_list, proc->fac_list,
835                        S390_ARCH_FAC_LIST_SIZE_BYTE);
836                 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
837                          kvm->arch.model.ibc,
838                          kvm->arch.model.cpuid);
839                 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
840                          kvm->arch.model.fac_list[0],
841                          kvm->arch.model.fac_list[1],
842                          kvm->arch.model.fac_list[2]);
843         } else
844                 ret = -EFAULT;
845         kfree(proc);
846 out:
847         mutex_unlock(&kvm->lock);
848         return ret;
849 }
850
851 static int kvm_s390_set_processor_feat(struct kvm *kvm,
852                                        struct kvm_device_attr *attr)
853 {
854         struct kvm_s390_vm_cpu_feat data;
855         int ret = -EBUSY;
856
857         if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
858                 return -EFAULT;
859         if (!bitmap_subset((unsigned long *) data.feat,
860                            kvm_s390_available_cpu_feat,
861                            KVM_S390_VM_CPU_FEAT_NR_BITS))
862                 return -EINVAL;
863
864         mutex_lock(&kvm->lock);
865         if (!atomic_read(&kvm->online_vcpus)) {
866                 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
867                             KVM_S390_VM_CPU_FEAT_NR_BITS);
868                 ret = 0;
869         }
870         mutex_unlock(&kvm->lock);
871         return ret;
872 }
873
874 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
875                                           struct kvm_device_attr *attr)
876 {
877         /*
878          * Once supported by kernel + hw, we have to store the subfunctions
879          * in kvm->arch and remember that user space configured them.
880          */
881         return -ENXIO;
882 }
883
884 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
885 {
886         int ret = -ENXIO;
887
888         switch (attr->attr) {
889         case KVM_S390_VM_CPU_PROCESSOR:
890                 ret = kvm_s390_set_processor(kvm, attr);
891                 break;
892         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
893                 ret = kvm_s390_set_processor_feat(kvm, attr);
894                 break;
895         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
896                 ret = kvm_s390_set_processor_subfunc(kvm, attr);
897                 break;
898         }
899         return ret;
900 }
901
902 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
903 {
904         struct kvm_s390_vm_cpu_processor *proc;
905         int ret = 0;
906
907         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
908         if (!proc) {
909                 ret = -ENOMEM;
910                 goto out;
911         }
912         proc->cpuid = kvm->arch.model.cpuid;
913         proc->ibc = kvm->arch.model.ibc;
914         memcpy(&proc->fac_list, kvm->arch.model.fac_list,
915                S390_ARCH_FAC_LIST_SIZE_BYTE);
916         VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
917                  kvm->arch.model.ibc,
918                  kvm->arch.model.cpuid);
919         VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
920                  kvm->arch.model.fac_list[0],
921                  kvm->arch.model.fac_list[1],
922                  kvm->arch.model.fac_list[2]);
923         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
924                 ret = -EFAULT;
925         kfree(proc);
926 out:
927         return ret;
928 }
929
930 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
931 {
932         struct kvm_s390_vm_cpu_machine *mach;
933         int ret = 0;
934
935         mach = kzalloc(sizeof(*mach), GFP_KERNEL);
936         if (!mach) {
937                 ret = -ENOMEM;
938                 goto out;
939         }
940         get_cpu_id((struct cpuid *) &mach->cpuid);
941         mach->ibc = sclp.ibc;
942         memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
943                S390_ARCH_FAC_LIST_SIZE_BYTE);
944         memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
945                sizeof(S390_lowcore.stfle_fac_list));
946         VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
947                  kvm->arch.model.ibc,
948                  kvm->arch.model.cpuid);
949         VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
950                  mach->fac_mask[0],
951                  mach->fac_mask[1],
952                  mach->fac_mask[2]);
953         VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
954                  mach->fac_list[0],
955                  mach->fac_list[1],
956                  mach->fac_list[2]);
957         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
958                 ret = -EFAULT;
959         kfree(mach);
960 out:
961         return ret;
962 }
963
964 static int kvm_s390_get_processor_feat(struct kvm *kvm,
965                                        struct kvm_device_attr *attr)
966 {
967         struct kvm_s390_vm_cpu_feat data;
968
969         bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
970                     KVM_S390_VM_CPU_FEAT_NR_BITS);
971         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
972                 return -EFAULT;
973         return 0;
974 }
975
976 static int kvm_s390_get_machine_feat(struct kvm *kvm,
977                                      struct kvm_device_attr *attr)
978 {
979         struct kvm_s390_vm_cpu_feat data;
980
981         bitmap_copy((unsigned long *) data.feat,
982                     kvm_s390_available_cpu_feat,
983                     KVM_S390_VM_CPU_FEAT_NR_BITS);
984         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
985                 return -EFAULT;
986         return 0;
987 }
988
989 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
990                                           struct kvm_device_attr *attr)
991 {
992         /*
993          * Once we can actually configure subfunctions (kernel + hw support),
994          * we have to check if they were already set by user space, if so copy
995          * them from kvm->arch.
996          */
997         return -ENXIO;
998 }
999
1000 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1001                                         struct kvm_device_attr *attr)
1002 {
1003         if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1004             sizeof(struct kvm_s390_vm_cpu_subfunc)))
1005                 return -EFAULT;
1006         return 0;
1007 }
1008 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1009 {
1010         int ret = -ENXIO;
1011
1012         switch (attr->attr) {
1013         case KVM_S390_VM_CPU_PROCESSOR:
1014                 ret = kvm_s390_get_processor(kvm, attr);
1015                 break;
1016         case KVM_S390_VM_CPU_MACHINE:
1017                 ret = kvm_s390_get_machine(kvm, attr);
1018                 break;
1019         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1020                 ret = kvm_s390_get_processor_feat(kvm, attr);
1021                 break;
1022         case KVM_S390_VM_CPU_MACHINE_FEAT:
1023                 ret = kvm_s390_get_machine_feat(kvm, attr);
1024                 break;
1025         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1026                 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1027                 break;
1028         case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1029                 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1030                 break;
1031         }
1032         return ret;
1033 }
1034
1035 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1036 {
1037         int ret;
1038
1039         switch (attr->group) {
1040         case KVM_S390_VM_MEM_CTRL:
1041                 ret = kvm_s390_set_mem_control(kvm, attr);
1042                 break;
1043         case KVM_S390_VM_TOD:
1044                 ret = kvm_s390_set_tod(kvm, attr);
1045                 break;
1046         case KVM_S390_VM_CPU_MODEL:
1047                 ret = kvm_s390_set_cpu_model(kvm, attr);
1048                 break;
1049         case KVM_S390_VM_CRYPTO:
1050                 ret = kvm_s390_vm_set_crypto(kvm, attr);
1051                 break;
1052         default:
1053                 ret = -ENXIO;
1054                 break;
1055         }
1056
1057         return ret;
1058 }
1059
1060 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1061 {
1062         int ret;
1063
1064         switch (attr->group) {
1065         case KVM_S390_VM_MEM_CTRL:
1066                 ret = kvm_s390_get_mem_control(kvm, attr);
1067                 break;
1068         case KVM_S390_VM_TOD:
1069                 ret = kvm_s390_get_tod(kvm, attr);
1070                 break;
1071         case KVM_S390_VM_CPU_MODEL:
1072                 ret = kvm_s390_get_cpu_model(kvm, attr);
1073                 break;
1074         default:
1075                 ret = -ENXIO;
1076                 break;
1077         }
1078
1079         return ret;
1080 }
1081
1082 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1083 {
1084         int ret;
1085
1086         switch (attr->group) {
1087         case KVM_S390_VM_MEM_CTRL:
1088                 switch (attr->attr) {
1089                 case KVM_S390_VM_MEM_ENABLE_CMMA:
1090                 case KVM_S390_VM_MEM_CLR_CMMA:
1091                         ret = sclp.has_cmma ? 0 : -ENXIO;
1092                         break;
1093                 case KVM_S390_VM_MEM_LIMIT_SIZE:
1094                         ret = 0;
1095                         break;
1096                 default:
1097                         ret = -ENXIO;
1098                         break;
1099                 }
1100                 break;
1101         case KVM_S390_VM_TOD:
1102                 switch (attr->attr) {
1103                 case KVM_S390_VM_TOD_LOW:
1104                 case KVM_S390_VM_TOD_HIGH:
1105                         ret = 0;
1106                         break;
1107                 default:
1108                         ret = -ENXIO;
1109                         break;
1110                 }
1111                 break;
1112         case KVM_S390_VM_CPU_MODEL:
1113                 switch (attr->attr) {
1114                 case KVM_S390_VM_CPU_PROCESSOR:
1115                 case KVM_S390_VM_CPU_MACHINE:
1116                 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1117                 case KVM_S390_VM_CPU_MACHINE_FEAT:
1118                 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1119                         ret = 0;
1120                         break;
1121                 /* configuring subfunctions is not supported yet */
1122                 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1123                 default:
1124                         ret = -ENXIO;
1125                         break;
1126                 }
1127                 break;
1128         case KVM_S390_VM_CRYPTO:
1129                 switch (attr->attr) {
1130                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1131                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1132                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1133                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1134                         ret = 0;
1135                         break;
1136                 default:
1137                         ret = -ENXIO;
1138                         break;
1139                 }
1140                 break;
1141         default:
1142                 ret = -ENXIO;
1143                 break;
1144         }
1145
1146         return ret;
1147 }
1148
1149 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1150 {
1151         uint8_t *keys;
1152         uint64_t hva;
1153         int i, r = 0;
1154
1155         if (args->flags != 0)
1156                 return -EINVAL;
1157
1158         /* Is this guest using storage keys? */
1159         if (!mm_use_skey(current->mm))
1160                 return KVM_S390_GET_SKEYS_NONE;
1161
1162         /* Enforce sane limit on memory allocation */
1163         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1164                 return -EINVAL;
1165
1166         keys = kmalloc_array(args->count, sizeof(uint8_t),
1167                              GFP_KERNEL | __GFP_NOWARN);
1168         if (!keys)
1169                 keys = vmalloc(sizeof(uint8_t) * args->count);
1170         if (!keys)
1171                 return -ENOMEM;
1172
1173         down_read(&current->mm->mmap_sem);
1174         for (i = 0; i < args->count; i++) {
1175                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1176                 if (kvm_is_error_hva(hva)) {
1177                         r = -EFAULT;
1178                         break;
1179                 }
1180
1181                 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1182                 if (r)
1183                         break;
1184         }
1185         up_read(&current->mm->mmap_sem);
1186
1187         if (!r) {
1188                 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1189                                  sizeof(uint8_t) * args->count);
1190                 if (r)
1191                         r = -EFAULT;
1192         }
1193
1194         kvfree(keys);
1195         return r;
1196 }
1197
1198 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1199 {
1200         uint8_t *keys;
1201         uint64_t hva;
1202         int i, r = 0;
1203
1204         if (args->flags != 0)
1205                 return -EINVAL;
1206
1207         /* Enforce sane limit on memory allocation */
1208         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1209                 return -EINVAL;
1210
1211         keys = kmalloc_array(args->count, sizeof(uint8_t),
1212                              GFP_KERNEL | __GFP_NOWARN);
1213         if (!keys)
1214                 keys = vmalloc(sizeof(uint8_t) * args->count);
1215         if (!keys)
1216                 return -ENOMEM;
1217
1218         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1219                            sizeof(uint8_t) * args->count);
1220         if (r) {
1221                 r = -EFAULT;
1222                 goto out;
1223         }
1224
1225         /* Enable storage key handling for the guest */
1226         r = s390_enable_skey();
1227         if (r)
1228                 goto out;
1229
1230         down_read(&current->mm->mmap_sem);
1231         for (i = 0; i < args->count; i++) {
1232                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1233                 if (kvm_is_error_hva(hva)) {
1234                         r = -EFAULT;
1235                         break;
1236                 }
1237
1238                 /* Lowest order bit is reserved */
1239                 if (keys[i] & 0x01) {
1240                         r = -EINVAL;
1241                         break;
1242                 }
1243
1244                 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1245                 if (r)
1246                         break;
1247         }
1248         up_read(&current->mm->mmap_sem);
1249 out:
1250         kvfree(keys);
1251         return r;
1252 }
1253
1254 long kvm_arch_vm_ioctl(struct file *filp,
1255                        unsigned int ioctl, unsigned long arg)
1256 {
1257         struct kvm *kvm = filp->private_data;
1258         void __user *argp = (void __user *)arg;
1259         struct kvm_device_attr attr;
1260         int r;
1261
1262         switch (ioctl) {
1263         case KVM_S390_INTERRUPT: {
1264                 struct kvm_s390_interrupt s390int;
1265
1266                 r = -EFAULT;
1267                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1268                         break;
1269                 r = kvm_s390_inject_vm(kvm, &s390int);
1270                 break;
1271         }
1272         case KVM_ENABLE_CAP: {
1273                 struct kvm_enable_cap cap;
1274                 r = -EFAULT;
1275                 if (copy_from_user(&cap, argp, sizeof(cap)))
1276                         break;
1277                 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1278                 break;
1279         }
1280         case KVM_CREATE_IRQCHIP: {
1281                 struct kvm_irq_routing_entry routing;
1282
1283                 r = -EINVAL;
1284                 if (kvm->arch.use_irqchip) {
1285                         /* Set up dummy routing. */
1286                         memset(&routing, 0, sizeof(routing));
1287                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1288                 }
1289                 break;
1290         }
1291         case KVM_SET_DEVICE_ATTR: {
1292                 r = -EFAULT;
1293                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1294                         break;
1295                 r = kvm_s390_vm_set_attr(kvm, &attr);
1296                 break;
1297         }
1298         case KVM_GET_DEVICE_ATTR: {
1299                 r = -EFAULT;
1300                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1301                         break;
1302                 r = kvm_s390_vm_get_attr(kvm, &attr);
1303                 break;
1304         }
1305         case KVM_HAS_DEVICE_ATTR: {
1306                 r = -EFAULT;
1307                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1308                         break;
1309                 r = kvm_s390_vm_has_attr(kvm, &attr);
1310                 break;
1311         }
1312         case KVM_S390_GET_SKEYS: {
1313                 struct kvm_s390_skeys args;
1314
1315                 r = -EFAULT;
1316                 if (copy_from_user(&args, argp,
1317                                    sizeof(struct kvm_s390_skeys)))
1318                         break;
1319                 r = kvm_s390_get_skeys(kvm, &args);
1320                 break;
1321         }
1322         case KVM_S390_SET_SKEYS: {
1323                 struct kvm_s390_skeys args;
1324
1325                 r = -EFAULT;
1326                 if (copy_from_user(&args, argp,
1327                                    sizeof(struct kvm_s390_skeys)))
1328                         break;
1329                 r = kvm_s390_set_skeys(kvm, &args);
1330                 break;
1331         }
1332         default:
1333                 r = -ENOTTY;
1334         }
1335
1336         return r;
1337 }
1338
1339 static int kvm_s390_query_ap_config(u8 *config)
1340 {
1341         u32 fcn_code = 0x04000000UL;
1342         u32 cc = 0;
1343
1344         memset(config, 0, 128);
1345         asm volatile(
1346                 "lgr 0,%1\n"
1347                 "lgr 2,%2\n"
1348                 ".long 0xb2af0000\n"            /* PQAP(QCI) */
1349                 "0: ipm %0\n"
1350                 "srl %0,28\n"
1351                 "1:\n"
1352                 EX_TABLE(0b, 1b)
1353                 : "+r" (cc)
1354                 : "r" (fcn_code), "r" (config)
1355                 : "cc", "0", "2", "memory"
1356         );
1357
1358         return cc;
1359 }
1360
1361 static int kvm_s390_apxa_installed(void)
1362 {
1363         u8 config[128];
1364         int cc;
1365
1366         if (test_facility(12)) {
1367                 cc = kvm_s390_query_ap_config(config);
1368
1369                 if (cc)
1370                         pr_err("PQAP(QCI) failed with cc=%d", cc);
1371                 else
1372                         return config[0] & 0x40;
1373         }
1374
1375         return 0;
1376 }
1377
1378 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1379 {
1380         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1381
1382         if (kvm_s390_apxa_installed())
1383                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1384         else
1385                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1386 }
1387
1388 static u64 kvm_s390_get_initial_cpuid(void)
1389 {
1390         struct cpuid cpuid;
1391
1392         get_cpu_id(&cpuid);
1393         cpuid.version = 0xff;
1394         return *((u64 *) &cpuid);
1395 }
1396
1397 static void kvm_s390_crypto_init(struct kvm *kvm)
1398 {
1399         if (!test_kvm_facility(kvm, 76))
1400                 return;
1401
1402         kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1403         kvm_s390_set_crycb_format(kvm);
1404
1405         /* Enable AES/DEA protected key functions by default */
1406         kvm->arch.crypto.aes_kw = 1;
1407         kvm->arch.crypto.dea_kw = 1;
1408         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1409                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1410         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1411                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1412 }
1413
1414 static void sca_dispose(struct kvm *kvm)
1415 {
1416         if (kvm->arch.use_esca)
1417                 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1418         else
1419                 free_page((unsigned long)(kvm->arch.sca));
1420         kvm->arch.sca = NULL;
1421 }
1422
1423 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1424 {
1425         gfp_t alloc_flags = GFP_KERNEL;
1426         int i, rc;
1427         char debug_name[16];
1428         static unsigned long sca_offset;
1429
1430         rc = -EINVAL;
1431 #ifdef CONFIG_KVM_S390_UCONTROL
1432         if (type & ~KVM_VM_S390_UCONTROL)
1433                 goto out_err;
1434         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1435                 goto out_err;
1436 #else
1437         if (type)
1438                 goto out_err;
1439 #endif
1440
1441         rc = s390_enable_sie();
1442         if (rc)
1443                 goto out_err;
1444
1445         rc = -ENOMEM;
1446
1447         ratelimit_state_init(&kvm->arch.sthyi_limit, 5 * HZ, 500);
1448
1449         kvm->arch.use_esca = 0; /* start with basic SCA */
1450         if (!sclp.has_64bscao)
1451                 alloc_flags |= GFP_DMA;
1452         rwlock_init(&kvm->arch.sca_lock);
1453         kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1454         if (!kvm->arch.sca)
1455                 goto out_err;
1456         spin_lock(&kvm_lock);
1457         sca_offset += 16;
1458         if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1459                 sca_offset = 0;
1460         kvm->arch.sca = (struct bsca_block *)
1461                         ((char *) kvm->arch.sca + sca_offset);
1462         spin_unlock(&kvm_lock);
1463
1464         sprintf(debug_name, "kvm-%u", current->pid);
1465
1466         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1467         if (!kvm->arch.dbf)
1468                 goto out_err;
1469
1470         kvm->arch.sie_page2 =
1471              (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1472         if (!kvm->arch.sie_page2)
1473                 goto out_err;
1474
1475         /* Populate the facility mask initially. */
1476         memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1477                sizeof(S390_lowcore.stfle_fac_list));
1478         for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1479                 if (i < kvm_s390_fac_list_mask_size())
1480                         kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1481                 else
1482                         kvm->arch.model.fac_mask[i] = 0UL;
1483         }
1484
1485         /* Populate the facility list initially. */
1486         kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1487         memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1488                S390_ARCH_FAC_LIST_SIZE_BYTE);
1489
1490         set_kvm_facility(kvm->arch.model.fac_mask, 74);
1491         set_kvm_facility(kvm->arch.model.fac_list, 74);
1492
1493         kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
1494         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1495
1496         kvm_s390_crypto_init(kvm);
1497
1498         spin_lock_init(&kvm->arch.float_int.lock);
1499         for (i = 0; i < FIRQ_LIST_COUNT; i++)
1500                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1501         init_waitqueue_head(&kvm->arch.ipte_wq);
1502         mutex_init(&kvm->arch.ipte_mutex);
1503
1504         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1505         VM_EVENT(kvm, 3, "vm created with type %lu", type);
1506
1507         if (type & KVM_VM_S390_UCONTROL) {
1508                 kvm->arch.gmap = NULL;
1509                 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1510         } else {
1511                 if (sclp.hamax == U64_MAX)
1512                         kvm->arch.mem_limit = TASK_MAX_SIZE;
1513                 else
1514                         kvm->arch.mem_limit = min_t(unsigned long, TASK_MAX_SIZE,
1515                                                     sclp.hamax + 1);
1516                 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
1517                 if (!kvm->arch.gmap)
1518                         goto out_err;
1519                 kvm->arch.gmap->private = kvm;
1520                 kvm->arch.gmap->pfault_enabled = 0;
1521         }
1522
1523         kvm->arch.css_support = 0;
1524         kvm->arch.use_irqchip = 0;
1525         kvm->arch.epoch = 0;
1526
1527         spin_lock_init(&kvm->arch.start_stop_lock);
1528         kvm_s390_vsie_init(kvm);
1529         KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1530
1531         return 0;
1532 out_err:
1533         free_page((unsigned long)kvm->arch.sie_page2);
1534         debug_unregister(kvm->arch.dbf);
1535         sca_dispose(kvm);
1536         KVM_EVENT(3, "creation of vm failed: %d", rc);
1537         return rc;
1538 }
1539
1540 bool kvm_arch_has_vcpu_debugfs(void)
1541 {
1542         return false;
1543 }
1544
1545 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
1546 {
1547         return 0;
1548 }
1549
1550 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1551 {
1552         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1553         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1554         kvm_s390_clear_local_irqs(vcpu);
1555         kvm_clear_async_pf_completion_queue(vcpu);
1556         if (!kvm_is_ucontrol(vcpu->kvm))
1557                 sca_del_vcpu(vcpu);
1558
1559         if (kvm_is_ucontrol(vcpu->kvm))
1560                 gmap_remove(vcpu->arch.gmap);
1561
1562         if (vcpu->kvm->arch.use_cmma)
1563                 kvm_s390_vcpu_unsetup_cmma(vcpu);
1564         free_page((unsigned long)(vcpu->arch.sie_block));
1565
1566         kvm_vcpu_uninit(vcpu);
1567         kmem_cache_free(kvm_vcpu_cache, vcpu);
1568 }
1569
1570 static void kvm_free_vcpus(struct kvm *kvm)
1571 {
1572         unsigned int i;
1573         struct kvm_vcpu *vcpu;
1574
1575         kvm_for_each_vcpu(i, vcpu, kvm)
1576                 kvm_arch_vcpu_destroy(vcpu);
1577
1578         mutex_lock(&kvm->lock);
1579         for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1580                 kvm->vcpus[i] = NULL;
1581
1582         atomic_set(&kvm->online_vcpus, 0);
1583         mutex_unlock(&kvm->lock);
1584 }
1585
1586 void kvm_arch_destroy_vm(struct kvm *kvm)
1587 {
1588         kvm_free_vcpus(kvm);
1589         sca_dispose(kvm);
1590         debug_unregister(kvm->arch.dbf);
1591         free_page((unsigned long)kvm->arch.sie_page2);
1592         if (!kvm_is_ucontrol(kvm))
1593                 gmap_remove(kvm->arch.gmap);
1594         kvm_s390_destroy_adapters(kvm);
1595         kvm_s390_clear_float_irqs(kvm);
1596         kvm_s390_vsie_destroy(kvm);
1597         KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
1598 }
1599
1600 /* Section: vcpu related */
1601 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1602 {
1603         vcpu->arch.gmap = gmap_create(current->mm, -1UL);
1604         if (!vcpu->arch.gmap)
1605                 return -ENOMEM;
1606         vcpu->arch.gmap->private = vcpu->kvm;
1607
1608         return 0;
1609 }
1610
1611 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
1612 {
1613         if (!kvm_s390_use_sca_entries())
1614                 return;
1615         read_lock(&vcpu->kvm->arch.sca_lock);
1616         if (vcpu->kvm->arch.use_esca) {
1617                 struct esca_block *sca = vcpu->kvm->arch.sca;
1618
1619                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1620                 sca->cpu[vcpu->vcpu_id].sda = 0;
1621         } else {
1622                 struct bsca_block *sca = vcpu->kvm->arch.sca;
1623
1624                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1625                 sca->cpu[vcpu->vcpu_id].sda = 0;
1626         }
1627         read_unlock(&vcpu->kvm->arch.sca_lock);
1628 }
1629
1630 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
1631 {
1632         if (!kvm_s390_use_sca_entries()) {
1633                 struct bsca_block *sca = vcpu->kvm->arch.sca;
1634
1635                 /* we still need the basic sca for the ipte control */
1636                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1637                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
1638         }
1639         read_lock(&vcpu->kvm->arch.sca_lock);
1640         if (vcpu->kvm->arch.use_esca) {
1641                 struct esca_block *sca = vcpu->kvm->arch.sca;
1642
1643                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1644                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1645                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
1646                 vcpu->arch.sie_block->ecb2 |= 0x04U;
1647                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1648         } else {
1649                 struct bsca_block *sca = vcpu->kvm->arch.sca;
1650
1651                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1652                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1653                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
1654                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1655         }
1656         read_unlock(&vcpu->kvm->arch.sca_lock);
1657 }
1658
1659 /* Basic SCA to Extended SCA data copy routines */
1660 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
1661 {
1662         d->sda = s->sda;
1663         d->sigp_ctrl.c = s->sigp_ctrl.c;
1664         d->sigp_ctrl.scn = s->sigp_ctrl.scn;
1665 }
1666
1667 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
1668 {
1669         int i;
1670
1671         d->ipte_control = s->ipte_control;
1672         d->mcn[0] = s->mcn;
1673         for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
1674                 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
1675 }
1676
1677 static int sca_switch_to_extended(struct kvm *kvm)
1678 {
1679         struct bsca_block *old_sca = kvm->arch.sca;
1680         struct esca_block *new_sca;
1681         struct kvm_vcpu *vcpu;
1682         unsigned int vcpu_idx;
1683         u32 scaol, scaoh;
1684
1685         new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
1686         if (!new_sca)
1687                 return -ENOMEM;
1688
1689         scaoh = (u32)((u64)(new_sca) >> 32);
1690         scaol = (u32)(u64)(new_sca) & ~0x3fU;
1691
1692         kvm_s390_vcpu_block_all(kvm);
1693         write_lock(&kvm->arch.sca_lock);
1694
1695         sca_copy_b_to_e(new_sca, old_sca);
1696
1697         kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
1698                 vcpu->arch.sie_block->scaoh = scaoh;
1699                 vcpu->arch.sie_block->scaol = scaol;
1700                 vcpu->arch.sie_block->ecb2 |= 0x04U;
1701         }
1702         kvm->arch.sca = new_sca;
1703         kvm->arch.use_esca = 1;
1704
1705         write_unlock(&kvm->arch.sca_lock);
1706         kvm_s390_vcpu_unblock_all(kvm);
1707
1708         free_page((unsigned long)old_sca);
1709
1710         VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
1711                  old_sca, kvm->arch.sca);
1712         return 0;
1713 }
1714
1715 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
1716 {
1717         int rc;
1718
1719         if (!kvm_s390_use_sca_entries()) {
1720                 if (id < KVM_MAX_VCPUS)
1721                         return true;
1722                 return false;
1723         }
1724         if (id < KVM_S390_BSCA_CPU_SLOTS)
1725                 return true;
1726         if (!sclp.has_esca || !sclp.has_64bscao)
1727                 return false;
1728
1729         mutex_lock(&kvm->lock);
1730         rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
1731         mutex_unlock(&kvm->lock);
1732
1733         return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
1734 }
1735
1736 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1737 {
1738         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1739         kvm_clear_async_pf_completion_queue(vcpu);
1740         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
1741                                     KVM_SYNC_GPRS |
1742                                     KVM_SYNC_ACRS |
1743                                     KVM_SYNC_CRS |
1744                                     KVM_SYNC_ARCH0 |
1745                                     KVM_SYNC_PFAULT;
1746         kvm_s390_set_prefix(vcpu, 0);
1747         if (test_kvm_facility(vcpu->kvm, 64))
1748                 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
1749         /* fprs can be synchronized via vrs, even if the guest has no vx. With
1750          * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
1751          */
1752         if (MACHINE_HAS_VX)
1753                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
1754         else
1755                 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
1756
1757         if (kvm_is_ucontrol(vcpu->kvm))
1758                 return __kvm_ucontrol_vcpu_init(vcpu);
1759
1760         return 0;
1761 }
1762
1763 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1764 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1765 {
1766         WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
1767         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1768         vcpu->arch.cputm_start = get_tod_clock_fast();
1769         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1770 }
1771
1772 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1773 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1774 {
1775         WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
1776         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1777         vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1778         vcpu->arch.cputm_start = 0;
1779         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1780 }
1781
1782 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1783 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1784 {
1785         WARN_ON_ONCE(vcpu->arch.cputm_enabled);
1786         vcpu->arch.cputm_enabled = true;
1787         __start_cpu_timer_accounting(vcpu);
1788 }
1789
1790 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1791 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1792 {
1793         WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
1794         __stop_cpu_timer_accounting(vcpu);
1795         vcpu->arch.cputm_enabled = false;
1796 }
1797
1798 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1799 {
1800         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1801         __enable_cpu_timer_accounting(vcpu);
1802         preempt_enable();
1803 }
1804
1805 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1806 {
1807         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1808         __disable_cpu_timer_accounting(vcpu);
1809         preempt_enable();
1810 }
1811
1812 /* set the cpu timer - may only be called from the VCPU thread itself */
1813 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
1814 {
1815         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1816         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1817         if (vcpu->arch.cputm_enabled)
1818                 vcpu->arch.cputm_start = get_tod_clock_fast();
1819         vcpu->arch.sie_block->cputm = cputm;
1820         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1821         preempt_enable();
1822 }
1823
1824 /* update and get the cpu timer - can also be called from other VCPU threads */
1825 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
1826 {
1827         unsigned int seq;
1828         __u64 value;
1829
1830         if (unlikely(!vcpu->arch.cputm_enabled))
1831                 return vcpu->arch.sie_block->cputm;
1832
1833         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1834         do {
1835                 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
1836                 /*
1837                  * If the writer would ever execute a read in the critical
1838                  * section, e.g. in irq context, we have a deadlock.
1839                  */
1840                 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
1841                 value = vcpu->arch.sie_block->cputm;
1842                 /* if cputm_start is 0, accounting is being started/stopped */
1843                 if (likely(vcpu->arch.cputm_start))
1844                         value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1845         } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
1846         preempt_enable();
1847         return value;
1848 }
1849
1850 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1851 {
1852
1853         gmap_enable(vcpu->arch.enabled_gmap);
1854         atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1855         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1856                 __start_cpu_timer_accounting(vcpu);
1857         vcpu->cpu = cpu;
1858 }
1859
1860 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1861 {
1862         vcpu->cpu = -1;
1863         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1864                 __stop_cpu_timer_accounting(vcpu);
1865         atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1866         vcpu->arch.enabled_gmap = gmap_get_enabled();
1867         gmap_disable(vcpu->arch.enabled_gmap);
1868
1869 }
1870
1871 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
1872 {
1873         /* this equals initial cpu reset in pop, but we don't switch to ESA */
1874         vcpu->arch.sie_block->gpsw.mask = 0UL;
1875         vcpu->arch.sie_block->gpsw.addr = 0UL;
1876         kvm_s390_set_prefix(vcpu, 0);
1877         kvm_s390_set_cpu_timer(vcpu, 0);
1878         vcpu->arch.sie_block->ckc       = 0UL;
1879         vcpu->arch.sie_block->todpr     = 0;
1880         memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
1881         vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
1882         vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
1883         /* make sure the new fpc will be lazily loaded */
1884         save_fpu_regs();
1885         current->thread.fpu.fpc = 0;
1886         vcpu->arch.sie_block->gbea = 1;
1887         vcpu->arch.sie_block->pp = 0;
1888         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1889         kvm_clear_async_pf_completion_queue(vcpu);
1890         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
1891                 kvm_s390_vcpu_stop(vcpu);
1892         kvm_s390_clear_local_irqs(vcpu);
1893 }
1894
1895 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
1896 {
1897         mutex_lock(&vcpu->kvm->lock);
1898         preempt_disable();
1899         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
1900         preempt_enable();
1901         mutex_unlock(&vcpu->kvm->lock);
1902         if (!kvm_is_ucontrol(vcpu->kvm)) {
1903                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
1904                 sca_add_vcpu(vcpu);
1905         }
1906         if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
1907                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
1908         /* make vcpu_load load the right gmap on the first trigger */
1909         vcpu->arch.enabled_gmap = vcpu->arch.gmap;
1910 }
1911
1912 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
1913 {
1914         if (!test_kvm_facility(vcpu->kvm, 76))
1915                 return;
1916
1917         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
1918
1919         if (vcpu->kvm->arch.crypto.aes_kw)
1920                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
1921         if (vcpu->kvm->arch.crypto.dea_kw)
1922                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
1923
1924         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
1925 }
1926
1927 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
1928 {
1929         free_page(vcpu->arch.sie_block->cbrlo);
1930         vcpu->arch.sie_block->cbrlo = 0;
1931 }
1932
1933 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
1934 {
1935         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
1936         if (!vcpu->arch.sie_block->cbrlo)
1937                 return -ENOMEM;
1938
1939         vcpu->arch.sie_block->ecb2 |= 0x80;
1940         vcpu->arch.sie_block->ecb2 &= ~0x08;
1941         return 0;
1942 }
1943
1944 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
1945 {
1946         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
1947
1948         vcpu->arch.sie_block->ibc = model->ibc;
1949         if (test_kvm_facility(vcpu->kvm, 7))
1950                 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
1951 }
1952
1953 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1954 {
1955         int rc = 0;
1956
1957         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
1958                                                     CPUSTAT_SM |
1959                                                     CPUSTAT_STOPPED);
1960
1961         if (test_kvm_facility(vcpu->kvm, 78))
1962                 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
1963         else if (test_kvm_facility(vcpu->kvm, 8))
1964                 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
1965
1966         kvm_s390_vcpu_setup_model(vcpu);
1967
1968         /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
1969         if (MACHINE_HAS_ESOP)
1970                 vcpu->arch.sie_block->ecb |= 0x02;
1971         if (test_kvm_facility(vcpu->kvm, 9))
1972                 vcpu->arch.sie_block->ecb |= 0x04;
1973         if (test_kvm_facility(vcpu->kvm, 73))
1974                 vcpu->arch.sie_block->ecb |= 0x10;
1975
1976         if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
1977                 vcpu->arch.sie_block->ecb2 |= 0x08;
1978         if (test_kvm_facility(vcpu->kvm, 130))
1979                 vcpu->arch.sie_block->ecb2 |= 0x20;
1980         vcpu->arch.sie_block->eca = 0x1002000U;
1981         if (sclp.has_cei)
1982                 vcpu->arch.sie_block->eca |= 0x80000000U;
1983         if (sclp.has_ib)
1984                 vcpu->arch.sie_block->eca |= 0x40000000U;
1985         if (sclp.has_siif)
1986                 vcpu->arch.sie_block->eca |= 1;
1987         if (sclp.has_sigpif)
1988                 vcpu->arch.sie_block->eca |= 0x10000000U;
1989         if (test_kvm_facility(vcpu->kvm, 129)) {
1990                 vcpu->arch.sie_block->eca |= 0x00020000;
1991                 vcpu->arch.sie_block->ecd |= 0x20000000;
1992         }
1993         vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
1994         vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
1995
1996         if (vcpu->kvm->arch.use_cmma) {
1997                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
1998                 if (rc)
1999                         return rc;
2000         }
2001         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2002         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
2003
2004         kvm_s390_vcpu_crypto_setup(vcpu);
2005
2006         return rc;
2007 }
2008
2009 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
2010                                       unsigned int id)
2011 {
2012         struct kvm_vcpu *vcpu;
2013         struct sie_page *sie_page;
2014         int rc = -EINVAL;
2015
2016         if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
2017                 goto out;
2018
2019         rc = -ENOMEM;
2020
2021         vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2022         if (!vcpu)
2023                 goto out;
2024
2025         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2026         if (!sie_page)
2027                 goto out_free_cpu;
2028
2029         vcpu->arch.sie_block = &sie_page->sie_block;
2030         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2031
2032         /* the real guest size will always be smaller than msl */
2033         vcpu->arch.sie_block->mso = 0;
2034         vcpu->arch.sie_block->msl = sclp.hamax;
2035
2036         vcpu->arch.sie_block->icpua = id;
2037         spin_lock_init(&vcpu->arch.local_int.lock);
2038         vcpu->arch.local_int.float_int = &kvm->arch.float_int;
2039         vcpu->arch.local_int.wq = &vcpu->wq;
2040         vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
2041         seqcount_init(&vcpu->arch.cputm_seqcount);
2042
2043         rc = kvm_vcpu_init(vcpu, kvm, id);
2044         if (rc)
2045                 goto out_free_sie_block;
2046         VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2047                  vcpu->arch.sie_block);
2048         trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2049
2050         return vcpu;
2051 out_free_sie_block:
2052         free_page((unsigned long)(vcpu->arch.sie_block));
2053 out_free_cpu:
2054         kmem_cache_free(kvm_vcpu_cache, vcpu);
2055 out:
2056         return ERR_PTR(rc);
2057 }
2058
2059 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2060 {
2061         return kvm_s390_vcpu_has_irq(vcpu, 0);
2062 }
2063
2064 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2065 {
2066         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2067         exit_sie(vcpu);
2068 }
2069
2070 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2071 {
2072         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2073 }
2074
2075 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2076 {
2077         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2078         exit_sie(vcpu);
2079 }
2080
2081 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2082 {
2083         atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2084 }
2085
2086 /*
2087  * Kick a guest cpu out of SIE and wait until SIE is not running.
2088  * If the CPU is not running (e.g. waiting as idle) the function will
2089  * return immediately. */
2090 void exit_sie(struct kvm_vcpu *vcpu)
2091 {
2092         atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
2093         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2094                 cpu_relax();
2095 }
2096
2097 /* Kick a guest cpu out of SIE to process a request synchronously */
2098 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2099 {
2100         kvm_make_request(req, vcpu);
2101         kvm_s390_vcpu_request(vcpu);
2102 }
2103
2104 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2105                               unsigned long end)
2106 {
2107         struct kvm *kvm = gmap->private;
2108         struct kvm_vcpu *vcpu;
2109         unsigned long prefix;
2110         int i;
2111
2112         if (gmap_is_shadow(gmap))
2113                 return;
2114         if (start >= 1UL << 31)
2115                 /* We are only interested in prefix pages */
2116                 return;
2117         kvm_for_each_vcpu(i, vcpu, kvm) {
2118                 /* match against both prefix pages */
2119                 prefix = kvm_s390_get_prefix(vcpu);
2120                 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2121                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2122                                    start, end);
2123                         kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2124                 }
2125         }
2126 }
2127
2128 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2129 {
2130         /* kvm common code refers to this, but never calls it */
2131         BUG();
2132         return 0;
2133 }
2134
2135 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2136                                            struct kvm_one_reg *reg)
2137 {
2138         int r = -EINVAL;
2139
2140         switch (reg->id) {
2141         case KVM_REG_S390_TODPR:
2142                 r = put_user(vcpu->arch.sie_block->todpr,
2143                              (u32 __user *)reg->addr);
2144                 break;
2145         case KVM_REG_S390_EPOCHDIFF:
2146                 r = put_user(vcpu->arch.sie_block->epoch,
2147                              (u64 __user *)reg->addr);
2148                 break;
2149         case KVM_REG_S390_CPU_TIMER:
2150                 r = put_user(kvm_s390_get_cpu_timer(vcpu),
2151                              (u64 __user *)reg->addr);
2152                 break;
2153         case KVM_REG_S390_CLOCK_COMP:
2154                 r = put_user(vcpu->arch.sie_block->ckc,
2155                              (u64 __user *)reg->addr);
2156                 break;
2157         case KVM_REG_S390_PFTOKEN:
2158                 r = put_user(vcpu->arch.pfault_token,
2159                              (u64 __user *)reg->addr);
2160                 break;
2161         case KVM_REG_S390_PFCOMPARE:
2162                 r = put_user(vcpu->arch.pfault_compare,
2163                              (u64 __user *)reg->addr);
2164                 break;
2165         case KVM_REG_S390_PFSELECT:
2166                 r = put_user(vcpu->arch.pfault_select,
2167                              (u64 __user *)reg->addr);
2168                 break;
2169         case KVM_REG_S390_PP:
2170                 r = put_user(vcpu->arch.sie_block->pp,
2171                              (u64 __user *)reg->addr);
2172                 break;
2173         case KVM_REG_S390_GBEA:
2174                 r = put_user(vcpu->arch.sie_block->gbea,
2175                              (u64 __user *)reg->addr);
2176                 break;
2177         default:
2178                 break;
2179         }
2180
2181         return r;
2182 }
2183
2184 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2185                                            struct kvm_one_reg *reg)
2186 {
2187         int r = -EINVAL;
2188         __u64 val;
2189
2190         switch (reg->id) {
2191         case KVM_REG_S390_TODPR:
2192                 r = get_user(vcpu->arch.sie_block->todpr,
2193                              (u32 __user *)reg->addr);
2194                 break;
2195         case KVM_REG_S390_EPOCHDIFF:
2196                 r = get_user(vcpu->arch.sie_block->epoch,
2197                              (u64 __user *)reg->addr);
2198                 break;
2199         case KVM_REG_S390_CPU_TIMER:
2200                 r = get_user(val, (u64 __user *)reg->addr);
2201                 if (!r)
2202                         kvm_s390_set_cpu_timer(vcpu, val);
2203                 break;
2204         case KVM_REG_S390_CLOCK_COMP:
2205                 r = get_user(vcpu->arch.sie_block->ckc,
2206                              (u64 __user *)reg->addr);
2207                 break;
2208         case KVM_REG_S390_PFTOKEN:
2209                 r = get_user(vcpu->arch.pfault_token,
2210                              (u64 __user *)reg->addr);
2211                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2212                         kvm_clear_async_pf_completion_queue(vcpu);
2213                 break;
2214         case KVM_REG_S390_PFCOMPARE:
2215                 r = get_user(vcpu->arch.pfault_compare,
2216                              (u64 __user *)reg->addr);
2217                 break;
2218         case KVM_REG_S390_PFSELECT:
2219                 r = get_user(vcpu->arch.pfault_select,
2220                              (u64 __user *)reg->addr);
2221                 break;
2222         case KVM_REG_S390_PP:
2223                 r = get_user(vcpu->arch.sie_block->pp,
2224                              (u64 __user *)reg->addr);
2225                 break;
2226         case KVM_REG_S390_GBEA:
2227                 r = get_user(vcpu->arch.sie_block->gbea,
2228                              (u64 __user *)reg->addr);
2229                 break;
2230         default:
2231                 break;
2232         }
2233
2234         return r;
2235 }
2236
2237 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2238 {
2239         kvm_s390_vcpu_initial_reset(vcpu);
2240         return 0;
2241 }
2242
2243 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2244 {
2245         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
2246         return 0;
2247 }
2248
2249 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2250 {
2251         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2252         return 0;
2253 }
2254
2255 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2256                                   struct kvm_sregs *sregs)
2257 {
2258         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2259         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2260         return 0;
2261 }
2262
2263 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2264                                   struct kvm_sregs *sregs)
2265 {
2266         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2267         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2268         return 0;
2269 }
2270
2271 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2272 {
2273         if (test_fp_ctl(fpu->fpc))
2274                 return -EINVAL;
2275         vcpu->run->s.regs.fpc = fpu->fpc;
2276         if (MACHINE_HAS_VX)
2277                 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
2278                                  (freg_t *) fpu->fprs);
2279         else
2280                 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
2281         return 0;
2282 }
2283
2284 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2285 {
2286         /* make sure we have the latest values */
2287         save_fpu_regs();
2288         if (MACHINE_HAS_VX)
2289                 convert_vx_to_fp((freg_t *) fpu->fprs,
2290                                  (__vector128 *) vcpu->run->s.regs.vrs);
2291         else
2292                 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
2293         fpu->fpc = vcpu->run->s.regs.fpc;
2294         return 0;
2295 }
2296
2297 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2298 {
2299         int rc = 0;
2300
2301         if (!is_vcpu_stopped(vcpu))
2302                 rc = -EBUSY;
2303         else {
2304                 vcpu->run->psw_mask = psw.mask;
2305                 vcpu->run->psw_addr = psw.addr;
2306         }
2307         return rc;
2308 }
2309
2310 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2311                                   struct kvm_translation *tr)
2312 {
2313         return -EINVAL; /* not implemented yet */
2314 }
2315
2316 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2317                               KVM_GUESTDBG_USE_HW_BP | \
2318                               KVM_GUESTDBG_ENABLE)
2319
2320 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2321                                         struct kvm_guest_debug *dbg)
2322 {
2323         int rc = 0;
2324
2325         vcpu->guest_debug = 0;
2326         kvm_s390_clear_bp_data(vcpu);
2327
2328         if (dbg->control & ~VALID_GUESTDBG_FLAGS)
2329                 return -EINVAL;
2330         if (!sclp.has_gpere)
2331                 return -EINVAL;
2332
2333         if (dbg->control & KVM_GUESTDBG_ENABLE) {
2334                 vcpu->guest_debug = dbg->control;
2335                 /* enforce guest PER */
2336                 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2337
2338                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2339                         rc = kvm_s390_import_bp_data(vcpu, dbg);
2340         } else {
2341                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2342                 vcpu->arch.guestdbg.last_bp = 0;
2343         }
2344
2345         if (rc) {
2346                 vcpu->guest_debug = 0;
2347                 kvm_s390_clear_bp_data(vcpu);
2348                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2349         }
2350
2351         return rc;
2352 }
2353
2354 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2355                                     struct kvm_mp_state *mp_state)
2356 {
2357         /* CHECK_STOP and LOAD are not supported yet */
2358         return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2359                                        KVM_MP_STATE_OPERATING;
2360 }
2361
2362 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2363                                     struct kvm_mp_state *mp_state)
2364 {
2365         int rc = 0;
2366
2367         /* user space knows about this interface - let it control the state */
2368         vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2369
2370         switch (mp_state->mp_state) {
2371         case KVM_MP_STATE_STOPPED:
2372                 kvm_s390_vcpu_stop(vcpu);
2373                 break;
2374         case KVM_MP_STATE_OPERATING:
2375                 kvm_s390_vcpu_start(vcpu);
2376                 break;
2377         case KVM_MP_STATE_LOAD:
2378         case KVM_MP_STATE_CHECK_STOP:
2379                 /* fall through - CHECK_STOP and LOAD are not supported yet */
2380         default:
2381                 rc = -ENXIO;
2382         }
2383
2384         return rc;
2385 }
2386
2387 static bool ibs_enabled(struct kvm_vcpu *vcpu)
2388 {
2389         return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
2390 }
2391
2392 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2393 {
2394 retry:
2395         kvm_s390_vcpu_request_handled(vcpu);
2396         if (!vcpu->requests)
2397                 return 0;
2398         /*
2399          * We use MMU_RELOAD just to re-arm the ipte notifier for the
2400          * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
2401          * This ensures that the ipte instruction for this request has
2402          * already finished. We might race against a second unmapper that
2403          * wants to set the blocking bit. Lets just retry the request loop.
2404          */
2405         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2406                 int rc;
2407                 rc = gmap_mprotect_notify(vcpu->arch.gmap,
2408                                           kvm_s390_get_prefix(vcpu),
2409                                           PAGE_SIZE * 2, PROT_WRITE);
2410                 if (rc) {
2411                         kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
2412                         return rc;
2413                 }
2414                 goto retry;
2415         }
2416
2417         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2418                 vcpu->arch.sie_block->ihcpu = 0xffff;
2419                 goto retry;
2420         }
2421
2422         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
2423                 if (!ibs_enabled(vcpu)) {
2424                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
2425                         atomic_or(CPUSTAT_IBS,
2426                                         &vcpu->arch.sie_block->cpuflags);
2427                 }
2428                 goto retry;
2429         }
2430
2431         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2432                 if (ibs_enabled(vcpu)) {
2433                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2434                         atomic_andnot(CPUSTAT_IBS,
2435                                           &vcpu->arch.sie_block->cpuflags);
2436                 }
2437                 goto retry;
2438         }
2439
2440         if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
2441                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2442                 goto retry;
2443         }
2444
2445         /* nothing to do, just clear the request */
2446         clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
2447
2448         return 0;
2449 }
2450
2451 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2452 {
2453         struct kvm_vcpu *vcpu;
2454         int i;
2455
2456         mutex_lock(&kvm->lock);
2457         preempt_disable();
2458         kvm->arch.epoch = tod - get_tod_clock();
2459         kvm_s390_vcpu_block_all(kvm);
2460         kvm_for_each_vcpu(i, vcpu, kvm)
2461                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2462         kvm_s390_vcpu_unblock_all(kvm);
2463         preempt_enable();
2464         mutex_unlock(&kvm->lock);
2465 }
2466
2467 /**
2468  * kvm_arch_fault_in_page - fault-in guest page if necessary
2469  * @vcpu: The corresponding virtual cpu
2470  * @gpa: Guest physical address
2471  * @writable: Whether the page should be writable or not
2472  *
2473  * Make sure that a guest page has been faulted-in on the host.
2474  *
2475  * Return: Zero on success, negative error code otherwise.
2476  */
2477 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
2478 {
2479         return gmap_fault(vcpu->arch.gmap, gpa,
2480                           writable ? FAULT_FLAG_WRITE : 0);
2481 }
2482
2483 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
2484                                       unsigned long token)
2485 {
2486         struct kvm_s390_interrupt inti;
2487         struct kvm_s390_irq irq;
2488
2489         if (start_token) {
2490                 irq.u.ext.ext_params2 = token;
2491                 irq.type = KVM_S390_INT_PFAULT_INIT;
2492                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
2493         } else {
2494                 inti.type = KVM_S390_INT_PFAULT_DONE;
2495                 inti.parm64 = token;
2496                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
2497         }
2498 }
2499
2500 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
2501                                      struct kvm_async_pf *work)
2502 {
2503         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
2504         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
2505 }
2506
2507 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
2508                                  struct kvm_async_pf *work)
2509 {
2510         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
2511         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
2512 }
2513
2514 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
2515                                struct kvm_async_pf *work)
2516 {
2517         /* s390 will always inject the page directly */
2518 }
2519
2520 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
2521 {
2522         /*
2523          * s390 will always inject the page directly,
2524          * but we still want check_async_completion to cleanup
2525          */
2526         return true;
2527 }
2528
2529 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
2530 {
2531         hva_t hva;
2532         struct kvm_arch_async_pf arch;
2533         int rc;
2534
2535         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2536                 return 0;
2537         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
2538             vcpu->arch.pfault_compare)
2539                 return 0;
2540         if (psw_extint_disabled(vcpu))
2541                 return 0;
2542         if (kvm_s390_vcpu_has_irq(vcpu, 0))
2543                 return 0;
2544         if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
2545                 return 0;
2546         if (!vcpu->arch.gmap->pfault_enabled)
2547                 return 0;
2548
2549         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
2550         hva += current->thread.gmap_addr & ~PAGE_MASK;
2551         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
2552                 return 0;
2553
2554         rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
2555         return rc;
2556 }
2557
2558 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
2559 {
2560         int rc, cpuflags;
2561
2562         /*
2563          * On s390 notifications for arriving pages will be delivered directly
2564          * to the guest but the house keeping for completed pfaults is
2565          * handled outside the worker.
2566          */
2567         kvm_check_async_pf_completion(vcpu);
2568
2569         vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
2570         vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
2571
2572         if (need_resched())
2573                 schedule();
2574
2575         if (test_cpu_flag(CIF_MCCK_PENDING))
2576                 s390_handle_mcck();
2577
2578         if (!kvm_is_ucontrol(vcpu->kvm)) {
2579                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
2580                 if (rc)
2581                         return rc;
2582         }
2583
2584         rc = kvm_s390_handle_requests(vcpu);
2585         if (rc)
2586                 return rc;
2587
2588         if (guestdbg_enabled(vcpu)) {
2589                 kvm_s390_backup_guest_per_regs(vcpu);
2590                 kvm_s390_patch_guest_per_regs(vcpu);
2591         }
2592
2593         vcpu->arch.sie_block->icptcode = 0;
2594         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
2595         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
2596         trace_kvm_s390_sie_enter(vcpu, cpuflags);
2597
2598         return 0;
2599 }
2600
2601 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
2602 {
2603         struct kvm_s390_pgm_info pgm_info = {
2604                 .code = PGM_ADDRESSING,
2605         };
2606         u8 opcode, ilen;
2607         int rc;
2608
2609         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
2610         trace_kvm_s390_sie_fault(vcpu);
2611
2612         /*
2613          * We want to inject an addressing exception, which is defined as a
2614          * suppressing or terminating exception. However, since we came here
2615          * by a DAT access exception, the PSW still points to the faulting
2616          * instruction since DAT exceptions are nullifying. So we've got
2617          * to look up the current opcode to get the length of the instruction
2618          * to be able to forward the PSW.
2619          */
2620         rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
2621         ilen = insn_length(opcode);
2622         if (rc < 0) {
2623                 return rc;
2624         } else if (rc) {
2625                 /* Instruction-Fetching Exceptions - we can't detect the ilen.
2626                  * Forward by arbitrary ilc, injection will take care of
2627                  * nullification if necessary.
2628                  */
2629                 pgm_info = vcpu->arch.pgm;
2630                 ilen = 4;
2631         }
2632         pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
2633         kvm_s390_forward_psw(vcpu, ilen);
2634         return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
2635 }
2636
2637 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
2638 {
2639         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
2640                    vcpu->arch.sie_block->icptcode);
2641         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
2642
2643         if (guestdbg_enabled(vcpu))
2644                 kvm_s390_restore_guest_per_regs(vcpu);
2645
2646         vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
2647         vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
2648
2649         if (vcpu->arch.sie_block->icptcode > 0) {
2650                 int rc = kvm_handle_sie_intercept(vcpu);
2651
2652                 if (rc != -EOPNOTSUPP)
2653                         return rc;
2654                 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
2655                 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
2656                 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
2657                 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
2658                 return -EREMOTE;
2659         } else if (exit_reason != -EFAULT) {
2660                 vcpu->stat.exit_null++;
2661                 return 0;
2662         } else if (kvm_is_ucontrol(vcpu->kvm)) {
2663                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
2664                 vcpu->run->s390_ucontrol.trans_exc_code =
2665                                                 current->thread.gmap_addr;
2666                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
2667                 return -EREMOTE;
2668         } else if (current->thread.gmap_pfault) {
2669                 trace_kvm_s390_major_guest_pfault(vcpu);
2670                 current->thread.gmap_pfault = 0;
2671                 if (kvm_arch_setup_async_pf(vcpu))
2672                         return 0;
2673                 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
2674         }
2675         return vcpu_post_run_fault_in_sie(vcpu);
2676 }
2677
2678 static int __vcpu_run(struct kvm_vcpu *vcpu)
2679 {
2680         int rc, exit_reason;
2681
2682         /*
2683          * We try to hold kvm->srcu during most of vcpu_run (except when run-
2684          * ning the guest), so that memslots (and other stuff) are protected
2685          */
2686         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2687
2688         do {
2689                 rc = vcpu_pre_run(vcpu);
2690                 if (rc)
2691                         break;
2692
2693                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2694                 /*
2695                  * As PF_VCPU will be used in fault handler, between
2696                  * guest_enter and guest_exit should be no uaccess.
2697                  */
2698                 local_irq_disable();
2699                 guest_enter_irqoff();
2700                 __disable_cpu_timer_accounting(vcpu);
2701                 local_irq_enable();
2702                 exit_reason = sie64a(vcpu->arch.sie_block,
2703                                      vcpu->run->s.regs.gprs);
2704                 local_irq_disable();
2705                 __enable_cpu_timer_accounting(vcpu);
2706                 guest_exit_irqoff();
2707                 local_irq_enable();
2708                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2709
2710                 rc = vcpu_post_run(vcpu, exit_reason);
2711         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
2712
2713         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2714         return rc;
2715 }
2716
2717 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2718 {
2719         vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
2720         vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
2721         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
2722                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
2723         if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
2724                 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
2725                 /* some control register changes require a tlb flush */
2726                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2727         }
2728         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
2729                 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
2730                 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
2731                 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
2732                 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
2733                 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
2734         }
2735         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
2736                 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
2737                 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
2738                 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
2739                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2740                         kvm_clear_async_pf_completion_queue(vcpu);
2741         }
2742         /*
2743          * If userspace sets the riccb (e.g. after migration) to a valid state,
2744          * we should enable RI here instead of doing the lazy enablement.
2745          */
2746         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
2747             test_kvm_facility(vcpu->kvm, 64)) {
2748                 struct runtime_instr_cb *riccb =
2749                         (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
2750
2751                 if (riccb->valid)
2752                         vcpu->arch.sie_block->ecb3 |= 0x01;
2753         }
2754         save_access_regs(vcpu->arch.host_acrs);
2755         restore_access_regs(vcpu->run->s.regs.acrs);
2756         /* save host (userspace) fprs/vrs */
2757         save_fpu_regs();
2758         vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
2759         vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
2760         if (MACHINE_HAS_VX)
2761                 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
2762         else
2763                 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
2764         current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
2765         if (test_fp_ctl(current->thread.fpu.fpc))
2766                 /* User space provided an invalid FPC, let's clear it */
2767                 current->thread.fpu.fpc = 0;
2768
2769         kvm_run->kvm_dirty_regs = 0;
2770 }
2771
2772 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2773 {
2774         kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
2775         kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
2776         kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
2777         memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
2778         kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
2779         kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
2780         kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
2781         kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
2782         kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
2783         kvm_run->s.regs.pft = vcpu->arch.pfault_token;
2784         kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
2785         kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
2786         save_access_regs(vcpu->run->s.regs.acrs);
2787         restore_access_regs(vcpu->arch.host_acrs);
2788         /* Save guest register state */
2789         save_fpu_regs();
2790         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
2791         /* Restore will be done lazily at return */
2792         current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
2793         current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
2794
2795 }
2796
2797 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2798 {
2799         int rc;
2800         sigset_t sigsaved;
2801
2802         if (kvm_run->immediate_exit)
2803                 return -EINTR;
2804
2805         if (guestdbg_exit_pending(vcpu)) {
2806                 kvm_s390_prepare_debug_exit(vcpu);
2807                 return 0;
2808         }
2809
2810         if (vcpu->sigset_active)
2811                 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
2812
2813         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
2814                 kvm_s390_vcpu_start(vcpu);
2815         } else if (is_vcpu_stopped(vcpu)) {
2816                 pr_err_ratelimited("can't run stopped vcpu %d\n",
2817                                    vcpu->vcpu_id);
2818                 return -EINVAL;
2819         }
2820
2821         sync_regs(vcpu, kvm_run);
2822         enable_cpu_timer_accounting(vcpu);
2823
2824         might_fault();
2825         rc = __vcpu_run(vcpu);
2826
2827         if (signal_pending(current) && !rc) {
2828                 kvm_run->exit_reason = KVM_EXIT_INTR;
2829                 rc = -EINTR;
2830         }
2831
2832         if (guestdbg_exit_pending(vcpu) && !rc)  {
2833                 kvm_s390_prepare_debug_exit(vcpu);
2834                 rc = 0;
2835         }
2836
2837         if (rc == -EREMOTE) {
2838                 /* userspace support is needed, kvm_run has been prepared */
2839                 rc = 0;
2840         }
2841
2842         disable_cpu_timer_accounting(vcpu);
2843         store_regs(vcpu, kvm_run);
2844
2845         if (vcpu->sigset_active)
2846                 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
2847
2848         vcpu->stat.exit_userspace++;
2849         return rc;
2850 }
2851
2852 /*
2853  * store status at address
2854  * we use have two special cases:
2855  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
2856  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
2857  */
2858 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
2859 {
2860         unsigned char archmode = 1;
2861         freg_t fprs[NUM_FPRS];
2862         unsigned int px;
2863         u64 clkcomp, cputm;
2864         int rc;
2865
2866         px = kvm_s390_get_prefix(vcpu);
2867         if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
2868                 if (write_guest_abs(vcpu, 163, &archmode, 1))
2869                         return -EFAULT;
2870                 gpa = 0;
2871         } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
2872                 if (write_guest_real(vcpu, 163, &archmode, 1))
2873                         return -EFAULT;
2874                 gpa = px;
2875         } else
2876                 gpa -= __LC_FPREGS_SAVE_AREA;
2877
2878         /* manually convert vector registers if necessary */
2879         if (MACHINE_HAS_VX) {
2880                 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
2881                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2882                                      fprs, 128);
2883         } else {
2884                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2885                                      vcpu->run->s.regs.fprs, 128);
2886         }
2887         rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
2888                               vcpu->run->s.regs.gprs, 128);
2889         rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
2890                               &vcpu->arch.sie_block->gpsw, 16);
2891         rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
2892                               &px, 4);
2893         rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
2894                               &vcpu->run->s.regs.fpc, 4);
2895         rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
2896                               &vcpu->arch.sie_block->todpr, 4);
2897         cputm = kvm_s390_get_cpu_timer(vcpu);
2898         rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
2899                               &cputm, 8);
2900         clkcomp = vcpu->arch.sie_block->ckc >> 8;
2901         rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
2902                               &clkcomp, 8);
2903         rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
2904                               &vcpu->run->s.regs.acrs, 64);
2905         rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
2906                               &vcpu->arch.sie_block->gcr, 128);
2907         return rc ? -EFAULT : 0;
2908 }
2909
2910 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
2911 {
2912         /*
2913          * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
2914          * switch in the run ioctl. Let's update our copies before we save
2915          * it into the save area
2916          */
2917         save_fpu_regs();
2918         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
2919         save_access_regs(vcpu->run->s.regs.acrs);
2920
2921         return kvm_s390_store_status_unloaded(vcpu, addr);
2922 }
2923
2924 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2925 {
2926         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
2927         kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
2928 }
2929
2930 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
2931 {
2932         unsigned int i;
2933         struct kvm_vcpu *vcpu;
2934
2935         kvm_for_each_vcpu(i, vcpu, kvm) {
2936                 __disable_ibs_on_vcpu(vcpu);
2937         }
2938 }
2939
2940 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2941 {
2942         if (!sclp.has_ibs)
2943                 return;
2944         kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
2945         kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
2946 }
2947
2948 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
2949 {
2950         int i, online_vcpus, started_vcpus = 0;
2951
2952         if (!is_vcpu_stopped(vcpu))
2953                 return;
2954
2955         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
2956         /* Only one cpu at a time may enter/leave the STOPPED state. */
2957         spin_lock(&vcpu->kvm->arch.start_stop_lock);
2958         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2959
2960         for (i = 0; i < online_vcpus; i++) {
2961                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
2962                         started_vcpus++;
2963         }
2964
2965         if (started_vcpus == 0) {
2966                 /* we're the only active VCPU -> speed it up */
2967                 __enable_ibs_on_vcpu(vcpu);
2968         } else if (started_vcpus == 1) {
2969                 /*
2970                  * As we are starting a second VCPU, we have to disable
2971                  * the IBS facility on all VCPUs to remove potentially
2972                  * oustanding ENABLE requests.
2973                  */
2974                 __disable_ibs_on_all_vcpus(vcpu->kvm);
2975         }
2976
2977         atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2978         /*
2979          * Another VCPU might have used IBS while we were offline.
2980          * Let's play safe and flush the VCPU at startup.
2981          */
2982         kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2983         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2984         return;
2985 }
2986
2987 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
2988 {
2989         int i, online_vcpus, started_vcpus = 0;
2990         struct kvm_vcpu *started_vcpu = NULL;
2991
2992         if (is_vcpu_stopped(vcpu))
2993                 return;
2994
2995         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
2996         /* Only one cpu at a time may enter/leave the STOPPED state. */
2997         spin_lock(&vcpu->kvm->arch.start_stop_lock);
2998         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2999
3000         /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
3001         kvm_s390_clear_stop_irq(vcpu);
3002
3003         atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
3004         __disable_ibs_on_vcpu(vcpu);
3005
3006         for (i = 0; i < online_vcpus; i++) {
3007                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
3008                         started_vcpus++;
3009                         started_vcpu = vcpu->kvm->vcpus[i];
3010                 }
3011         }
3012
3013         if (started_vcpus == 1) {
3014                 /*
3015                  * As we only have one VCPU left, we want to enable the
3016                  * IBS facility for that VCPU to speed it up.
3017                  */
3018                 __enable_ibs_on_vcpu(started_vcpu);
3019         }
3020
3021         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3022         return;
3023 }
3024
3025 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
3026                                      struct kvm_enable_cap *cap)
3027 {
3028         int r;
3029
3030         if (cap->flags)
3031                 return -EINVAL;
3032
3033         switch (cap->cap) {
3034         case KVM_CAP_S390_CSS_SUPPORT:
3035                 if (!vcpu->kvm->arch.css_support) {
3036                         vcpu->kvm->arch.css_support = 1;
3037                         VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
3038                         trace_kvm_s390_enable_css(vcpu->kvm);
3039                 }
3040                 r = 0;
3041                 break;
3042         default:
3043                 r = -EINVAL;
3044                 break;
3045         }
3046         return r;
3047 }
3048
3049 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3050                                   struct kvm_s390_mem_op *mop)
3051 {
3052         void __user *uaddr = (void __user *)mop->buf;
3053         void *tmpbuf = NULL;
3054         int r, srcu_idx;
3055         const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3056                                     | KVM_S390_MEMOP_F_CHECK_ONLY;
3057
3058         if (mop->flags & ~supported_flags)
3059                 return -EINVAL;
3060
3061         if (mop->size > MEM_OP_MAX_SIZE)
3062                 return -E2BIG;
3063
3064         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3065                 tmpbuf = vmalloc(mop->size);
3066                 if (!tmpbuf)
3067                         return -ENOMEM;
3068         }
3069
3070         srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3071
3072         switch (mop->op) {
3073         case KVM_S390_MEMOP_LOGICAL_READ:
3074                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3075                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3076                                             mop->size, GACC_FETCH);
3077                         break;
3078                 }
3079                 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3080                 if (r == 0) {
3081                         if (copy_to_user(uaddr, tmpbuf, mop->size))
3082                                 r = -EFAULT;
3083                 }
3084                 break;
3085         case KVM_S390_MEMOP_LOGICAL_WRITE:
3086                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3087                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3088                                             mop->size, GACC_STORE);
3089                         break;
3090                 }
3091                 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
3092                         r = -EFAULT;
3093                         break;
3094                 }
3095                 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3096                 break;
3097         default:
3098                 r = -EINVAL;
3099         }
3100
3101         srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
3102
3103         if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
3104                 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
3105
3106         vfree(tmpbuf);
3107         return r;
3108 }
3109
3110 long kvm_arch_vcpu_ioctl(struct file *filp,
3111                          unsigned int ioctl, unsigned long arg)
3112 {
3113         struct kvm_vcpu *vcpu = filp->private_data;
3114         void __user *argp = (void __user *)arg;
3115         int idx;
3116         long r;
3117
3118         switch (ioctl) {
3119         case KVM_S390_IRQ: {
3120                 struct kvm_s390_irq s390irq;
3121
3122                 r = -EFAULT;
3123                 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3124                         break;
3125                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3126                 break;
3127         }
3128         case KVM_S390_INTERRUPT: {
3129                 struct kvm_s390_interrupt s390int;
3130                 struct kvm_s390_irq s390irq;
3131
3132                 r = -EFAULT;
3133                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
3134                         break;
3135                 if (s390int_to_s390irq(&s390int, &s390irq))
3136                         return -EINVAL;
3137                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3138                 break;
3139         }
3140         case KVM_S390_STORE_STATUS:
3141                 idx = srcu_read_lock(&vcpu->kvm->srcu);
3142                 r = kvm_s390_vcpu_store_status(vcpu, arg);
3143                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
3144                 break;
3145         case KVM_S390_SET_INITIAL_PSW: {
3146                 psw_t psw;
3147
3148                 r = -EFAULT;
3149                 if (copy_from_user(&psw, argp, sizeof(psw)))
3150                         break;
3151                 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3152                 break;
3153         }
3154         case KVM_S390_INITIAL_RESET:
3155                 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3156                 break;
3157         case KVM_SET_ONE_REG:
3158         case KVM_GET_ONE_REG: {
3159                 struct kvm_one_reg reg;
3160                 r = -EFAULT;
3161                 if (copy_from_user(&reg, argp, sizeof(reg)))
3162                         break;
3163                 if (ioctl == KVM_SET_ONE_REG)
3164                         r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
3165                 else
3166                         r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
3167                 break;
3168         }
3169 #ifdef CONFIG_KVM_S390_UCONTROL
3170         case KVM_S390_UCAS_MAP: {
3171                 struct kvm_s390_ucas_mapping ucasmap;
3172
3173                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3174                         r = -EFAULT;
3175                         break;
3176                 }
3177
3178                 if (!kvm_is_ucontrol(vcpu->kvm)) {
3179                         r = -EINVAL;
3180                         break;
3181                 }
3182
3183                 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
3184                                      ucasmap.vcpu_addr, ucasmap.length);
3185                 break;
3186         }
3187         case KVM_S390_UCAS_UNMAP: {
3188                 struct kvm_s390_ucas_mapping ucasmap;
3189
3190                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3191                         r = -EFAULT;
3192                         break;
3193                 }
3194
3195                 if (!kvm_is_ucontrol(vcpu->kvm)) {
3196                         r = -EINVAL;
3197                         break;
3198                 }
3199
3200                 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
3201                         ucasmap.length);
3202                 break;
3203         }
3204 #endif
3205         case KVM_S390_VCPU_FAULT: {
3206                 r = gmap_fault(vcpu->arch.gmap, arg, 0);
3207                 break;
3208         }
3209         case KVM_ENABLE_CAP:
3210         {
3211                 struct kvm_enable_cap cap;
3212                 r = -EFAULT;
3213                 if (copy_from_user(&cap, argp, sizeof(cap)))
3214                         break;
3215                 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
3216                 break;
3217         }
3218         case KVM_S390_MEM_OP: {
3219                 struct kvm_s390_mem_op mem_op;
3220
3221                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3222                         r = kvm_s390_guest_mem_op(vcpu, &mem_op);
3223                 else
3224                         r = -EFAULT;
3225                 break;
3226         }
3227         case KVM_S390_SET_IRQ_STATE: {
3228                 struct kvm_s390_irq_state irq_state;
3229
3230                 r = -EFAULT;
3231                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3232                         break;
3233                 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
3234                     irq_state.len == 0 ||
3235                     irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
3236                         r = -EINVAL;
3237                         break;
3238                 }
3239                 r = kvm_s390_set_irq_state(vcpu,
3240                                            (void __user *) irq_state.buf,
3241                                            irq_state.len);
3242                 break;
3243         }
3244         case KVM_S390_GET_IRQ_STATE: {
3245                 struct kvm_s390_irq_state irq_state;
3246
3247                 r = -EFAULT;
3248                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3249                         break;
3250                 if (irq_state.len == 0) {
3251                         r = -EINVAL;
3252                         break;
3253                 }
3254                 r = kvm_s390_get_irq_state(vcpu,
3255                                            (__u8 __user *)  irq_state.buf,
3256                                            irq_state.len);
3257                 break;
3258         }
3259         default:
3260                 r = -ENOTTY;
3261         }
3262         return r;
3263 }
3264
3265 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3266 {
3267 #ifdef CONFIG_KVM_S390_UCONTROL
3268         if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
3269                  && (kvm_is_ucontrol(vcpu->kvm))) {
3270                 vmf->page = virt_to_page(vcpu->arch.sie_block);
3271                 get_page(vmf->page);
3272                 return 0;
3273         }
3274 #endif
3275         return VM_FAULT_SIGBUS;
3276 }
3277
3278 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
3279                             unsigned long npages)
3280 {
3281         return 0;
3282 }
3283
3284 /* Section: memory related */
3285 int kvm_arch_prepare_memory_region(struct kvm *kvm,
3286                                    struct kvm_memory_slot *memslot,
3287                                    const struct kvm_userspace_memory_region *mem,
3288                                    enum kvm_mr_change change)
3289 {
3290         /* A few sanity checks. We can have memory slots which have to be
3291            located/ended at a segment boundary (1MB). The memory in userland is
3292            ok to be fragmented into various different vmas. It is okay to mmap()
3293            and munmap() stuff in this slot after doing this call at any time */
3294
3295         if (mem->userspace_addr & 0xffffful)
3296                 return -EINVAL;
3297
3298         if (mem->memory_size & 0xffffful)
3299                 return -EINVAL;
3300
3301         if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
3302                 return -EINVAL;
3303
3304         return 0;
3305 }
3306
3307 void kvm_arch_commit_memory_region(struct kvm *kvm,
3308                                 const struct kvm_userspace_memory_region *mem,
3309                                 const struct kvm_memory_slot *old,
3310                                 const struct kvm_memory_slot *new,
3311                                 enum kvm_mr_change change)
3312 {
3313         int rc;
3314
3315         /* If the basics of the memslot do not change, we do not want
3316          * to update the gmap. Every update causes several unnecessary
3317          * segment translation exceptions. This is usually handled just
3318          * fine by the normal fault handler + gmap, but it will also
3319          * cause faults on the prefix page of running guest CPUs.
3320          */
3321         if (old->userspace_addr == mem->userspace_addr &&
3322             old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
3323             old->npages * PAGE_SIZE == mem->memory_size)
3324                 return;
3325
3326         rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
3327                 mem->guest_phys_addr, mem->memory_size);
3328         if (rc)
3329                 pr_warn("failed to commit memory region\n");
3330         return;
3331 }
3332
3333 static inline unsigned long nonhyp_mask(int i)
3334 {
3335         unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
3336
3337         return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
3338 }
3339
3340 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
3341 {
3342         vcpu->valid_wakeup = false;
3343 }
3344
3345 static int __init kvm_s390_init(void)
3346 {
3347         int i;
3348
3349         if (!sclp.has_sief2) {
3350                 pr_info("SIE not available\n");
3351                 return -ENODEV;
3352         }
3353
3354         for (i = 0; i < 16; i++)
3355                 kvm_s390_fac_list_mask[i] |=
3356                         S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
3357
3358         return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
3359 }
3360
3361 static void __exit kvm_s390_exit(void)
3362 {
3363         kvm_exit();
3364 }
3365
3366 module_init(kvm_s390_init);
3367 module_exit(kvm_s390_exit);
3368
3369 /*
3370  * Enable autoloading of the kvm module.
3371  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
3372  * since x86 takes a different approach.
3373  */
3374 #include <linux/miscdevice.h>
3375 MODULE_ALIAS_MISCDEV(KVM_MINOR);
3376 MODULE_ALIAS("devname:kvm");