]> git.karo-electronics.de Git - karo-tx-linux.git/blob - arch/s390/kvm/kvm-s390.c
KVM: s390: Introduce switching code
[karo-tx-linux.git] / arch / s390 / kvm / kvm-s390.c
1 /*
2  * hosting zSeries kernel virtual machines
3  *
4  * Copyright IBM Corp. 2008, 2009
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License (version 2 only)
8  * as published by the Free Software Foundation.
9  *
10  *    Author(s): Carsten Otte <cotte@de.ibm.com>
11  *               Christian Borntraeger <borntraeger@de.ibm.com>
12  *               Heiko Carstens <heiko.carstens@de.ibm.com>
13  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
14  *               Jason J. Herne <jjherne@us.ibm.com>
15  */
16
17 #include <linux/compiler.h>
18 #include <linux/err.h>
19 #include <linux/fs.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/module.h>
25 #include <linux/random.h>
26 #include <linux/slab.h>
27 #include <linux/timer.h>
28 #include <linux/vmalloc.h>
29 #include <asm/asm-offsets.h>
30 #include <asm/lowcore.h>
31 #include <asm/etr.h>
32 #include <asm/pgtable.h>
33 #include <asm/nmi.h>
34 #include <asm/switch_to.h>
35 #include <asm/isc.h>
36 #include <asm/sclp.h>
37 #include "kvm-s390.h"
38 #include "gaccess.h"
39
40 #define KMSG_COMPONENT "kvm-s390"
41 #undef pr_fmt
42 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
43
44 #define CREATE_TRACE_POINTS
45 #include "trace.h"
46 #include "trace-s390.h"
47
48 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
49 #define LOCAL_IRQS 32
50 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
51                            (KVM_MAX_VCPUS + LOCAL_IRQS))
52
53 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
54
55 struct kvm_stats_debugfs_item debugfs_entries[] = {
56         { "userspace_handled", VCPU_STAT(exit_userspace) },
57         { "exit_null", VCPU_STAT(exit_null) },
58         { "exit_validity", VCPU_STAT(exit_validity) },
59         { "exit_stop_request", VCPU_STAT(exit_stop_request) },
60         { "exit_external_request", VCPU_STAT(exit_external_request) },
61         { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
62         { "exit_instruction", VCPU_STAT(exit_instruction) },
63         { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
64         { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
65         { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
66         { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
67         { "halt_wakeup", VCPU_STAT(halt_wakeup) },
68         { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
69         { "instruction_lctl", VCPU_STAT(instruction_lctl) },
70         { "instruction_stctl", VCPU_STAT(instruction_stctl) },
71         { "instruction_stctg", VCPU_STAT(instruction_stctg) },
72         { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
73         { "deliver_external_call", VCPU_STAT(deliver_external_call) },
74         { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
75         { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
76         { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
77         { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
78         { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
79         { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
80         { "exit_wait_state", VCPU_STAT(exit_wait_state) },
81         { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
82         { "instruction_stidp", VCPU_STAT(instruction_stidp) },
83         { "instruction_spx", VCPU_STAT(instruction_spx) },
84         { "instruction_stpx", VCPU_STAT(instruction_stpx) },
85         { "instruction_stap", VCPU_STAT(instruction_stap) },
86         { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
87         { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
88         { "instruction_stsch", VCPU_STAT(instruction_stsch) },
89         { "instruction_chsc", VCPU_STAT(instruction_chsc) },
90         { "instruction_essa", VCPU_STAT(instruction_essa) },
91         { "instruction_stsi", VCPU_STAT(instruction_stsi) },
92         { "instruction_stfl", VCPU_STAT(instruction_stfl) },
93         { "instruction_tprot", VCPU_STAT(instruction_tprot) },
94         { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
95         { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
96         { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
97         { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
98         { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
99         { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
100         { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
101         { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
102         { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
103         { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
104         { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
105         { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
106         { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
107         { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
108         { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
109         { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
110         { "diagnose_10", VCPU_STAT(diagnose_10) },
111         { "diagnose_44", VCPU_STAT(diagnose_44) },
112         { "diagnose_9c", VCPU_STAT(diagnose_9c) },
113         { "diagnose_258", VCPU_STAT(diagnose_258) },
114         { "diagnose_308", VCPU_STAT(diagnose_308) },
115         { "diagnose_500", VCPU_STAT(diagnose_500) },
116         { NULL }
117 };
118
119 /* upper facilities limit for kvm */
120 unsigned long kvm_s390_fac_list_mask[] = {
121         0xffe6fffbfcfdfc40UL,
122         0x005e800000000000UL,
123 };
124
125 unsigned long kvm_s390_fac_list_mask_size(void)
126 {
127         BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
128         return ARRAY_SIZE(kvm_s390_fac_list_mask);
129 }
130
131 static struct gmap_notifier gmap_notifier;
132 debug_info_t *kvm_s390_dbf;
133
134 /* Section: not file related */
135 int kvm_arch_hardware_enable(void)
136 {
137         /* every s390 is virtualization enabled ;-) */
138         return 0;
139 }
140
141 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address);
142
143 /*
144  * This callback is executed during stop_machine(). All CPUs are therefore
145  * temporarily stopped. In order not to change guest behavior, we have to
146  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
147  * so a CPU won't be stopped while calculating with the epoch.
148  */
149 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
150                           void *v)
151 {
152         struct kvm *kvm;
153         struct kvm_vcpu *vcpu;
154         int i;
155         unsigned long long *delta = v;
156
157         list_for_each_entry(kvm, &vm_list, vm_list) {
158                 kvm->arch.epoch -= *delta;
159                 kvm_for_each_vcpu(i, vcpu, kvm) {
160                         vcpu->arch.sie_block->epoch -= *delta;
161                 }
162         }
163         return NOTIFY_OK;
164 }
165
166 static struct notifier_block kvm_clock_notifier = {
167         .notifier_call = kvm_clock_sync,
168 };
169
170 int kvm_arch_hardware_setup(void)
171 {
172         gmap_notifier.notifier_call = kvm_gmap_notifier;
173         gmap_register_ipte_notifier(&gmap_notifier);
174         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
175                                        &kvm_clock_notifier);
176         return 0;
177 }
178
179 void kvm_arch_hardware_unsetup(void)
180 {
181         gmap_unregister_ipte_notifier(&gmap_notifier);
182         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
183                                          &kvm_clock_notifier);
184 }
185
186 int kvm_arch_init(void *opaque)
187 {
188         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
189         if (!kvm_s390_dbf)
190                 return -ENOMEM;
191
192         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
193                 debug_unregister(kvm_s390_dbf);
194                 return -ENOMEM;
195         }
196
197         /* Register floating interrupt controller interface. */
198         return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
199 }
200
201 void kvm_arch_exit(void)
202 {
203         debug_unregister(kvm_s390_dbf);
204 }
205
206 /* Section: device related */
207 long kvm_arch_dev_ioctl(struct file *filp,
208                         unsigned int ioctl, unsigned long arg)
209 {
210         if (ioctl == KVM_S390_ENABLE_SIE)
211                 return s390_enable_sie();
212         return -EINVAL;
213 }
214
215 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
216 {
217         int r;
218
219         switch (ext) {
220         case KVM_CAP_S390_PSW:
221         case KVM_CAP_S390_GMAP:
222         case KVM_CAP_SYNC_MMU:
223 #ifdef CONFIG_KVM_S390_UCONTROL
224         case KVM_CAP_S390_UCONTROL:
225 #endif
226         case KVM_CAP_ASYNC_PF:
227         case KVM_CAP_SYNC_REGS:
228         case KVM_CAP_ONE_REG:
229         case KVM_CAP_ENABLE_CAP:
230         case KVM_CAP_S390_CSS_SUPPORT:
231         case KVM_CAP_IOEVENTFD:
232         case KVM_CAP_DEVICE_CTRL:
233         case KVM_CAP_ENABLE_CAP_VM:
234         case KVM_CAP_S390_IRQCHIP:
235         case KVM_CAP_VM_ATTRIBUTES:
236         case KVM_CAP_MP_STATE:
237         case KVM_CAP_S390_INJECT_IRQ:
238         case KVM_CAP_S390_USER_SIGP:
239         case KVM_CAP_S390_USER_STSI:
240         case KVM_CAP_S390_SKEYS:
241         case KVM_CAP_S390_IRQ_STATE:
242                 r = 1;
243                 break;
244         case KVM_CAP_S390_MEM_OP:
245                 r = MEM_OP_MAX_SIZE;
246                 break;
247         case KVM_CAP_NR_VCPUS:
248         case KVM_CAP_MAX_VCPUS:
249                 r = KVM_MAX_VCPUS;
250                 break;
251         case KVM_CAP_NR_MEMSLOTS:
252                 r = KVM_USER_MEM_SLOTS;
253                 break;
254         case KVM_CAP_S390_COW:
255                 r = MACHINE_HAS_ESOP;
256                 break;
257         case KVM_CAP_S390_VECTOR_REGISTERS:
258                 r = MACHINE_HAS_VX;
259                 break;
260         default:
261                 r = 0;
262         }
263         return r;
264 }
265
266 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
267                                         struct kvm_memory_slot *memslot)
268 {
269         gfn_t cur_gfn, last_gfn;
270         unsigned long address;
271         struct gmap *gmap = kvm->arch.gmap;
272
273         down_read(&gmap->mm->mmap_sem);
274         /* Loop over all guest pages */
275         last_gfn = memslot->base_gfn + memslot->npages;
276         for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
277                 address = gfn_to_hva_memslot(memslot, cur_gfn);
278
279                 if (gmap_test_and_clear_dirty(address, gmap))
280                         mark_page_dirty(kvm, cur_gfn);
281         }
282         up_read(&gmap->mm->mmap_sem);
283 }
284
285 /* Section: vm related */
286 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
287
288 /*
289  * Get (and clear) the dirty memory log for a memory slot.
290  */
291 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
292                                struct kvm_dirty_log *log)
293 {
294         int r;
295         unsigned long n;
296         struct kvm_memslots *slots;
297         struct kvm_memory_slot *memslot;
298         int is_dirty = 0;
299
300         mutex_lock(&kvm->slots_lock);
301
302         r = -EINVAL;
303         if (log->slot >= KVM_USER_MEM_SLOTS)
304                 goto out;
305
306         slots = kvm_memslots(kvm);
307         memslot = id_to_memslot(slots, log->slot);
308         r = -ENOENT;
309         if (!memslot->dirty_bitmap)
310                 goto out;
311
312         kvm_s390_sync_dirty_log(kvm, memslot);
313         r = kvm_get_dirty_log(kvm, log, &is_dirty);
314         if (r)
315                 goto out;
316
317         /* Clear the dirty log */
318         if (is_dirty) {
319                 n = kvm_dirty_bitmap_bytes(memslot);
320                 memset(memslot->dirty_bitmap, 0, n);
321         }
322         r = 0;
323 out:
324         mutex_unlock(&kvm->slots_lock);
325         return r;
326 }
327
328 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
329 {
330         int r;
331
332         if (cap->flags)
333                 return -EINVAL;
334
335         switch (cap->cap) {
336         case KVM_CAP_S390_IRQCHIP:
337                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
338                 kvm->arch.use_irqchip = 1;
339                 r = 0;
340                 break;
341         case KVM_CAP_S390_USER_SIGP:
342                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
343                 kvm->arch.user_sigp = 1;
344                 r = 0;
345                 break;
346         case KVM_CAP_S390_VECTOR_REGISTERS:
347                 mutex_lock(&kvm->lock);
348                 if (atomic_read(&kvm->online_vcpus)) {
349                         r = -EBUSY;
350                 } else if (MACHINE_HAS_VX) {
351                         set_kvm_facility(kvm->arch.model.fac->mask, 129);
352                         set_kvm_facility(kvm->arch.model.fac->list, 129);
353                         r = 0;
354                 } else
355                         r = -EINVAL;
356                 mutex_unlock(&kvm->lock);
357                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
358                          r ? "(not available)" : "(success)");
359                 break;
360         case KVM_CAP_S390_USER_STSI:
361                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
362                 kvm->arch.user_stsi = 1;
363                 r = 0;
364                 break;
365         default:
366                 r = -EINVAL;
367                 break;
368         }
369         return r;
370 }
371
372 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
373 {
374         int ret;
375
376         switch (attr->attr) {
377         case KVM_S390_VM_MEM_LIMIT_SIZE:
378                 ret = 0;
379                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
380                          kvm->arch.gmap->asce_end);
381                 if (put_user(kvm->arch.gmap->asce_end, (u64 __user *)attr->addr))
382                         ret = -EFAULT;
383                 break;
384         default:
385                 ret = -ENXIO;
386                 break;
387         }
388         return ret;
389 }
390
391 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
392 {
393         int ret;
394         unsigned int idx;
395         switch (attr->attr) {
396         case KVM_S390_VM_MEM_ENABLE_CMMA:
397                 /* enable CMMA only for z10 and later (EDAT_1) */
398                 ret = -EINVAL;
399                 if (!MACHINE_IS_LPAR || !MACHINE_HAS_EDAT1)
400                         break;
401
402                 ret = -EBUSY;
403                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
404                 mutex_lock(&kvm->lock);
405                 if (atomic_read(&kvm->online_vcpus) == 0) {
406                         kvm->arch.use_cmma = 1;
407                         ret = 0;
408                 }
409                 mutex_unlock(&kvm->lock);
410                 break;
411         case KVM_S390_VM_MEM_CLR_CMMA:
412                 ret = -EINVAL;
413                 if (!kvm->arch.use_cmma)
414                         break;
415
416                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
417                 mutex_lock(&kvm->lock);
418                 idx = srcu_read_lock(&kvm->srcu);
419                 s390_reset_cmma(kvm->arch.gmap->mm);
420                 srcu_read_unlock(&kvm->srcu, idx);
421                 mutex_unlock(&kvm->lock);
422                 ret = 0;
423                 break;
424         case KVM_S390_VM_MEM_LIMIT_SIZE: {
425                 unsigned long new_limit;
426
427                 if (kvm_is_ucontrol(kvm))
428                         return -EINVAL;
429
430                 if (get_user(new_limit, (u64 __user *)attr->addr))
431                         return -EFAULT;
432
433                 if (new_limit > kvm->arch.gmap->asce_end)
434                         return -E2BIG;
435
436                 ret = -EBUSY;
437                 mutex_lock(&kvm->lock);
438                 if (atomic_read(&kvm->online_vcpus) == 0) {
439                         /* gmap_alloc will round the limit up */
440                         struct gmap *new = gmap_alloc(current->mm, new_limit);
441
442                         if (!new) {
443                                 ret = -ENOMEM;
444                         } else {
445                                 gmap_free(kvm->arch.gmap);
446                                 new->private = kvm;
447                                 kvm->arch.gmap = new;
448                                 ret = 0;
449                         }
450                 }
451                 mutex_unlock(&kvm->lock);
452                 VM_EVENT(kvm, 3, "SET: max guest memory: %lu bytes", new_limit);
453                 break;
454         }
455         default:
456                 ret = -ENXIO;
457                 break;
458         }
459         return ret;
460 }
461
462 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
463
464 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
465 {
466         struct kvm_vcpu *vcpu;
467         int i;
468
469         if (!test_kvm_facility(kvm, 76))
470                 return -EINVAL;
471
472         mutex_lock(&kvm->lock);
473         switch (attr->attr) {
474         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
475                 get_random_bytes(
476                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
477                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
478                 kvm->arch.crypto.aes_kw = 1;
479                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
480                 break;
481         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
482                 get_random_bytes(
483                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
484                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
485                 kvm->arch.crypto.dea_kw = 1;
486                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
487                 break;
488         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
489                 kvm->arch.crypto.aes_kw = 0;
490                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
491                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
492                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
493                 break;
494         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
495                 kvm->arch.crypto.dea_kw = 0;
496                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
497                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
498                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
499                 break;
500         default:
501                 mutex_unlock(&kvm->lock);
502                 return -ENXIO;
503         }
504
505         kvm_for_each_vcpu(i, vcpu, kvm) {
506                 kvm_s390_vcpu_crypto_setup(vcpu);
507                 exit_sie(vcpu);
508         }
509         mutex_unlock(&kvm->lock);
510         return 0;
511 }
512
513 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
514 {
515         u8 gtod_high;
516
517         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
518                                            sizeof(gtod_high)))
519                 return -EFAULT;
520
521         if (gtod_high != 0)
522                 return -EINVAL;
523         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
524
525         return 0;
526 }
527
528 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
529 {
530         u64 gtod;
531
532         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
533                 return -EFAULT;
534
535         kvm_s390_set_tod_clock(kvm, gtod);
536         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
537         return 0;
538 }
539
540 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
541 {
542         int ret;
543
544         if (attr->flags)
545                 return -EINVAL;
546
547         switch (attr->attr) {
548         case KVM_S390_VM_TOD_HIGH:
549                 ret = kvm_s390_set_tod_high(kvm, attr);
550                 break;
551         case KVM_S390_VM_TOD_LOW:
552                 ret = kvm_s390_set_tod_low(kvm, attr);
553                 break;
554         default:
555                 ret = -ENXIO;
556                 break;
557         }
558         return ret;
559 }
560
561 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
562 {
563         u8 gtod_high = 0;
564
565         if (copy_to_user((void __user *)attr->addr, &gtod_high,
566                                          sizeof(gtod_high)))
567                 return -EFAULT;
568         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
569
570         return 0;
571 }
572
573 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
574 {
575         u64 gtod;
576
577         gtod = kvm_s390_get_tod_clock_fast(kvm);
578         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
579                 return -EFAULT;
580         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
581
582         return 0;
583 }
584
585 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
586 {
587         int ret;
588
589         if (attr->flags)
590                 return -EINVAL;
591
592         switch (attr->attr) {
593         case KVM_S390_VM_TOD_HIGH:
594                 ret = kvm_s390_get_tod_high(kvm, attr);
595                 break;
596         case KVM_S390_VM_TOD_LOW:
597                 ret = kvm_s390_get_tod_low(kvm, attr);
598                 break;
599         default:
600                 ret = -ENXIO;
601                 break;
602         }
603         return ret;
604 }
605
606 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
607 {
608         struct kvm_s390_vm_cpu_processor *proc;
609         int ret = 0;
610
611         mutex_lock(&kvm->lock);
612         if (atomic_read(&kvm->online_vcpus)) {
613                 ret = -EBUSY;
614                 goto out;
615         }
616         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
617         if (!proc) {
618                 ret = -ENOMEM;
619                 goto out;
620         }
621         if (!copy_from_user(proc, (void __user *)attr->addr,
622                             sizeof(*proc))) {
623                 memcpy(&kvm->arch.model.cpu_id, &proc->cpuid,
624                        sizeof(struct cpuid));
625                 kvm->arch.model.ibc = proc->ibc;
626                 memcpy(kvm->arch.model.fac->list, proc->fac_list,
627                        S390_ARCH_FAC_LIST_SIZE_BYTE);
628         } else
629                 ret = -EFAULT;
630         kfree(proc);
631 out:
632         mutex_unlock(&kvm->lock);
633         return ret;
634 }
635
636 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
637 {
638         int ret = -ENXIO;
639
640         switch (attr->attr) {
641         case KVM_S390_VM_CPU_PROCESSOR:
642                 ret = kvm_s390_set_processor(kvm, attr);
643                 break;
644         }
645         return ret;
646 }
647
648 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
649 {
650         struct kvm_s390_vm_cpu_processor *proc;
651         int ret = 0;
652
653         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
654         if (!proc) {
655                 ret = -ENOMEM;
656                 goto out;
657         }
658         memcpy(&proc->cpuid, &kvm->arch.model.cpu_id, sizeof(struct cpuid));
659         proc->ibc = kvm->arch.model.ibc;
660         memcpy(&proc->fac_list, kvm->arch.model.fac->list, S390_ARCH_FAC_LIST_SIZE_BYTE);
661         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
662                 ret = -EFAULT;
663         kfree(proc);
664 out:
665         return ret;
666 }
667
668 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
669 {
670         struct kvm_s390_vm_cpu_machine *mach;
671         int ret = 0;
672
673         mach = kzalloc(sizeof(*mach), GFP_KERNEL);
674         if (!mach) {
675                 ret = -ENOMEM;
676                 goto out;
677         }
678         get_cpu_id((struct cpuid *) &mach->cpuid);
679         mach->ibc = sclp.ibc;
680         memcpy(&mach->fac_mask, kvm->arch.model.fac->mask,
681                S390_ARCH_FAC_LIST_SIZE_BYTE);
682         memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
683                S390_ARCH_FAC_LIST_SIZE_BYTE);
684         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
685                 ret = -EFAULT;
686         kfree(mach);
687 out:
688         return ret;
689 }
690
691 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
692 {
693         int ret = -ENXIO;
694
695         switch (attr->attr) {
696         case KVM_S390_VM_CPU_PROCESSOR:
697                 ret = kvm_s390_get_processor(kvm, attr);
698                 break;
699         case KVM_S390_VM_CPU_MACHINE:
700                 ret = kvm_s390_get_machine(kvm, attr);
701                 break;
702         }
703         return ret;
704 }
705
706 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
707 {
708         int ret;
709
710         switch (attr->group) {
711         case KVM_S390_VM_MEM_CTRL:
712                 ret = kvm_s390_set_mem_control(kvm, attr);
713                 break;
714         case KVM_S390_VM_TOD:
715                 ret = kvm_s390_set_tod(kvm, attr);
716                 break;
717         case KVM_S390_VM_CPU_MODEL:
718                 ret = kvm_s390_set_cpu_model(kvm, attr);
719                 break;
720         case KVM_S390_VM_CRYPTO:
721                 ret = kvm_s390_vm_set_crypto(kvm, attr);
722                 break;
723         default:
724                 ret = -ENXIO;
725                 break;
726         }
727
728         return ret;
729 }
730
731 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
732 {
733         int ret;
734
735         switch (attr->group) {
736         case KVM_S390_VM_MEM_CTRL:
737                 ret = kvm_s390_get_mem_control(kvm, attr);
738                 break;
739         case KVM_S390_VM_TOD:
740                 ret = kvm_s390_get_tod(kvm, attr);
741                 break;
742         case KVM_S390_VM_CPU_MODEL:
743                 ret = kvm_s390_get_cpu_model(kvm, attr);
744                 break;
745         default:
746                 ret = -ENXIO;
747                 break;
748         }
749
750         return ret;
751 }
752
753 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
754 {
755         int ret;
756
757         switch (attr->group) {
758         case KVM_S390_VM_MEM_CTRL:
759                 switch (attr->attr) {
760                 case KVM_S390_VM_MEM_ENABLE_CMMA:
761                 case KVM_S390_VM_MEM_CLR_CMMA:
762                 case KVM_S390_VM_MEM_LIMIT_SIZE:
763                         ret = 0;
764                         break;
765                 default:
766                         ret = -ENXIO;
767                         break;
768                 }
769                 break;
770         case KVM_S390_VM_TOD:
771                 switch (attr->attr) {
772                 case KVM_S390_VM_TOD_LOW:
773                 case KVM_S390_VM_TOD_HIGH:
774                         ret = 0;
775                         break;
776                 default:
777                         ret = -ENXIO;
778                         break;
779                 }
780                 break;
781         case KVM_S390_VM_CPU_MODEL:
782                 switch (attr->attr) {
783                 case KVM_S390_VM_CPU_PROCESSOR:
784                 case KVM_S390_VM_CPU_MACHINE:
785                         ret = 0;
786                         break;
787                 default:
788                         ret = -ENXIO;
789                         break;
790                 }
791                 break;
792         case KVM_S390_VM_CRYPTO:
793                 switch (attr->attr) {
794                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
795                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
796                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
797                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
798                         ret = 0;
799                         break;
800                 default:
801                         ret = -ENXIO;
802                         break;
803                 }
804                 break;
805         default:
806                 ret = -ENXIO;
807                 break;
808         }
809
810         return ret;
811 }
812
813 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
814 {
815         uint8_t *keys;
816         uint64_t hva;
817         unsigned long curkey;
818         int i, r = 0;
819
820         if (args->flags != 0)
821                 return -EINVAL;
822
823         /* Is this guest using storage keys? */
824         if (!mm_use_skey(current->mm))
825                 return KVM_S390_GET_SKEYS_NONE;
826
827         /* Enforce sane limit on memory allocation */
828         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
829                 return -EINVAL;
830
831         keys = kmalloc_array(args->count, sizeof(uint8_t),
832                              GFP_KERNEL | __GFP_NOWARN);
833         if (!keys)
834                 keys = vmalloc(sizeof(uint8_t) * args->count);
835         if (!keys)
836                 return -ENOMEM;
837
838         for (i = 0; i < args->count; i++) {
839                 hva = gfn_to_hva(kvm, args->start_gfn + i);
840                 if (kvm_is_error_hva(hva)) {
841                         r = -EFAULT;
842                         goto out;
843                 }
844
845                 curkey = get_guest_storage_key(current->mm, hva);
846                 if (IS_ERR_VALUE(curkey)) {
847                         r = curkey;
848                         goto out;
849                 }
850                 keys[i] = curkey;
851         }
852
853         r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
854                          sizeof(uint8_t) * args->count);
855         if (r)
856                 r = -EFAULT;
857 out:
858         kvfree(keys);
859         return r;
860 }
861
862 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
863 {
864         uint8_t *keys;
865         uint64_t hva;
866         int i, r = 0;
867
868         if (args->flags != 0)
869                 return -EINVAL;
870
871         /* Enforce sane limit on memory allocation */
872         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
873                 return -EINVAL;
874
875         keys = kmalloc_array(args->count, sizeof(uint8_t),
876                              GFP_KERNEL | __GFP_NOWARN);
877         if (!keys)
878                 keys = vmalloc(sizeof(uint8_t) * args->count);
879         if (!keys)
880                 return -ENOMEM;
881
882         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
883                            sizeof(uint8_t) * args->count);
884         if (r) {
885                 r = -EFAULT;
886                 goto out;
887         }
888
889         /* Enable storage key handling for the guest */
890         r = s390_enable_skey();
891         if (r)
892                 goto out;
893
894         for (i = 0; i < args->count; i++) {
895                 hva = gfn_to_hva(kvm, args->start_gfn + i);
896                 if (kvm_is_error_hva(hva)) {
897                         r = -EFAULT;
898                         goto out;
899                 }
900
901                 /* Lowest order bit is reserved */
902                 if (keys[i] & 0x01) {
903                         r = -EINVAL;
904                         goto out;
905                 }
906
907                 r = set_guest_storage_key(current->mm, hva,
908                                           (unsigned long)keys[i], 0);
909                 if (r)
910                         goto out;
911         }
912 out:
913         kvfree(keys);
914         return r;
915 }
916
917 long kvm_arch_vm_ioctl(struct file *filp,
918                        unsigned int ioctl, unsigned long arg)
919 {
920         struct kvm *kvm = filp->private_data;
921         void __user *argp = (void __user *)arg;
922         struct kvm_device_attr attr;
923         int r;
924
925         switch (ioctl) {
926         case KVM_S390_INTERRUPT: {
927                 struct kvm_s390_interrupt s390int;
928
929                 r = -EFAULT;
930                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
931                         break;
932                 r = kvm_s390_inject_vm(kvm, &s390int);
933                 break;
934         }
935         case KVM_ENABLE_CAP: {
936                 struct kvm_enable_cap cap;
937                 r = -EFAULT;
938                 if (copy_from_user(&cap, argp, sizeof(cap)))
939                         break;
940                 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
941                 break;
942         }
943         case KVM_CREATE_IRQCHIP: {
944                 struct kvm_irq_routing_entry routing;
945
946                 r = -EINVAL;
947                 if (kvm->arch.use_irqchip) {
948                         /* Set up dummy routing. */
949                         memset(&routing, 0, sizeof(routing));
950                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
951                 }
952                 break;
953         }
954         case KVM_SET_DEVICE_ATTR: {
955                 r = -EFAULT;
956                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
957                         break;
958                 r = kvm_s390_vm_set_attr(kvm, &attr);
959                 break;
960         }
961         case KVM_GET_DEVICE_ATTR: {
962                 r = -EFAULT;
963                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
964                         break;
965                 r = kvm_s390_vm_get_attr(kvm, &attr);
966                 break;
967         }
968         case KVM_HAS_DEVICE_ATTR: {
969                 r = -EFAULT;
970                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
971                         break;
972                 r = kvm_s390_vm_has_attr(kvm, &attr);
973                 break;
974         }
975         case KVM_S390_GET_SKEYS: {
976                 struct kvm_s390_skeys args;
977
978                 r = -EFAULT;
979                 if (copy_from_user(&args, argp,
980                                    sizeof(struct kvm_s390_skeys)))
981                         break;
982                 r = kvm_s390_get_skeys(kvm, &args);
983                 break;
984         }
985         case KVM_S390_SET_SKEYS: {
986                 struct kvm_s390_skeys args;
987
988                 r = -EFAULT;
989                 if (copy_from_user(&args, argp,
990                                    sizeof(struct kvm_s390_skeys)))
991                         break;
992                 r = kvm_s390_set_skeys(kvm, &args);
993                 break;
994         }
995         default:
996                 r = -ENOTTY;
997         }
998
999         return r;
1000 }
1001
1002 static int kvm_s390_query_ap_config(u8 *config)
1003 {
1004         u32 fcn_code = 0x04000000UL;
1005         u32 cc = 0;
1006
1007         memset(config, 0, 128);
1008         asm volatile(
1009                 "lgr 0,%1\n"
1010                 "lgr 2,%2\n"
1011                 ".long 0xb2af0000\n"            /* PQAP(QCI) */
1012                 "0: ipm %0\n"
1013                 "srl %0,28\n"
1014                 "1:\n"
1015                 EX_TABLE(0b, 1b)
1016                 : "+r" (cc)
1017                 : "r" (fcn_code), "r" (config)
1018                 : "cc", "0", "2", "memory"
1019         );
1020
1021         return cc;
1022 }
1023
1024 static int kvm_s390_apxa_installed(void)
1025 {
1026         u8 config[128];
1027         int cc;
1028
1029         if (test_facility(2) && test_facility(12)) {
1030                 cc = kvm_s390_query_ap_config(config);
1031
1032                 if (cc)
1033                         pr_err("PQAP(QCI) failed with cc=%d", cc);
1034                 else
1035                         return config[0] & 0x40;
1036         }
1037
1038         return 0;
1039 }
1040
1041 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1042 {
1043         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1044
1045         if (kvm_s390_apxa_installed())
1046                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1047         else
1048                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1049 }
1050
1051 static void kvm_s390_get_cpu_id(struct cpuid *cpu_id)
1052 {
1053         get_cpu_id(cpu_id);
1054         cpu_id->version = 0xff;
1055 }
1056
1057 static int kvm_s390_crypto_init(struct kvm *kvm)
1058 {
1059         if (!test_kvm_facility(kvm, 76))
1060                 return 0;
1061
1062         kvm->arch.crypto.crycb = kzalloc(sizeof(*kvm->arch.crypto.crycb),
1063                                          GFP_KERNEL | GFP_DMA);
1064         if (!kvm->arch.crypto.crycb)
1065                 return -ENOMEM;
1066
1067         kvm_s390_set_crycb_format(kvm);
1068
1069         /* Enable AES/DEA protected key functions by default */
1070         kvm->arch.crypto.aes_kw = 1;
1071         kvm->arch.crypto.dea_kw = 1;
1072         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1073                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1074         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1075                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1076
1077         return 0;
1078 }
1079
1080 static void sca_dispose(struct kvm *kvm)
1081 {
1082         if (kvm->arch.use_esca)
1083                 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1084         else
1085                 free_page((unsigned long)(kvm->arch.sca));
1086         kvm->arch.sca = NULL;
1087 }
1088
1089 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1090 {
1091         int i, rc;
1092         char debug_name[16];
1093         static unsigned long sca_offset;
1094
1095         rc = -EINVAL;
1096 #ifdef CONFIG_KVM_S390_UCONTROL
1097         if (type & ~KVM_VM_S390_UCONTROL)
1098                 goto out_err;
1099         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1100                 goto out_err;
1101 #else
1102         if (type)
1103                 goto out_err;
1104 #endif
1105
1106         rc = s390_enable_sie();
1107         if (rc)
1108                 goto out_err;
1109
1110         rc = -ENOMEM;
1111
1112         kvm->arch.use_esca = 0; /* start with basic SCA */
1113         rwlock_init(&kvm->arch.sca_lock);
1114         kvm->arch.sca = (struct bsca_block *) get_zeroed_page(GFP_KERNEL);
1115         if (!kvm->arch.sca)
1116                 goto out_err;
1117         spin_lock(&kvm_lock);
1118         sca_offset += 16;
1119         if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1120                 sca_offset = 0;
1121         kvm->arch.sca = (struct bsca_block *)
1122                         ((char *) kvm->arch.sca + sca_offset);
1123         spin_unlock(&kvm_lock);
1124
1125         sprintf(debug_name, "kvm-%u", current->pid);
1126
1127         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1128         if (!kvm->arch.dbf)
1129                 goto out_err;
1130
1131         /*
1132          * The architectural maximum amount of facilities is 16 kbit. To store
1133          * this amount, 2 kbyte of memory is required. Thus we need a full
1134          * page to hold the guest facility list (arch.model.fac->list) and the
1135          * facility mask (arch.model.fac->mask). Its address size has to be
1136          * 31 bits and word aligned.
1137          */
1138         kvm->arch.model.fac =
1139                 (struct kvm_s390_fac *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1140         if (!kvm->arch.model.fac)
1141                 goto out_err;
1142
1143         /* Populate the facility mask initially. */
1144         memcpy(kvm->arch.model.fac->mask, S390_lowcore.stfle_fac_list,
1145                S390_ARCH_FAC_LIST_SIZE_BYTE);
1146         for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1147                 if (i < kvm_s390_fac_list_mask_size())
1148                         kvm->arch.model.fac->mask[i] &= kvm_s390_fac_list_mask[i];
1149                 else
1150                         kvm->arch.model.fac->mask[i] = 0UL;
1151         }
1152
1153         /* Populate the facility list initially. */
1154         memcpy(kvm->arch.model.fac->list, kvm->arch.model.fac->mask,
1155                S390_ARCH_FAC_LIST_SIZE_BYTE);
1156
1157         kvm_s390_get_cpu_id(&kvm->arch.model.cpu_id);
1158         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1159
1160         if (kvm_s390_crypto_init(kvm) < 0)
1161                 goto out_err;
1162
1163         spin_lock_init(&kvm->arch.float_int.lock);
1164         for (i = 0; i < FIRQ_LIST_COUNT; i++)
1165                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1166         init_waitqueue_head(&kvm->arch.ipte_wq);
1167         mutex_init(&kvm->arch.ipte_mutex);
1168
1169         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1170         VM_EVENT(kvm, 3, "vm created with type %lu", type);
1171
1172         if (type & KVM_VM_S390_UCONTROL) {
1173                 kvm->arch.gmap = NULL;
1174         } else {
1175                 kvm->arch.gmap = gmap_alloc(current->mm, (1UL << 44) - 1);
1176                 if (!kvm->arch.gmap)
1177                         goto out_err;
1178                 kvm->arch.gmap->private = kvm;
1179                 kvm->arch.gmap->pfault_enabled = 0;
1180         }
1181
1182         kvm->arch.css_support = 0;
1183         kvm->arch.use_irqchip = 0;
1184         kvm->arch.epoch = 0;
1185
1186         spin_lock_init(&kvm->arch.start_stop_lock);
1187         KVM_EVENT(3, "vm 0x%p created by pid %u", kvm, current->pid);
1188
1189         return 0;
1190 out_err:
1191         kfree(kvm->arch.crypto.crycb);
1192         free_page((unsigned long)kvm->arch.model.fac);
1193         debug_unregister(kvm->arch.dbf);
1194         sca_dispose(kvm);
1195         KVM_EVENT(3, "creation of vm failed: %d", rc);
1196         return rc;
1197 }
1198
1199 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1200 {
1201         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1202         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1203         kvm_s390_clear_local_irqs(vcpu);
1204         kvm_clear_async_pf_completion_queue(vcpu);
1205         if (!kvm_is_ucontrol(vcpu->kvm))
1206                 sca_del_vcpu(vcpu);
1207         smp_mb();
1208
1209         if (kvm_is_ucontrol(vcpu->kvm))
1210                 gmap_free(vcpu->arch.gmap);
1211
1212         if (vcpu->kvm->arch.use_cmma)
1213                 kvm_s390_vcpu_unsetup_cmma(vcpu);
1214         free_page((unsigned long)(vcpu->arch.sie_block));
1215
1216         kvm_vcpu_uninit(vcpu);
1217         kmem_cache_free(kvm_vcpu_cache, vcpu);
1218 }
1219
1220 static void kvm_free_vcpus(struct kvm *kvm)
1221 {
1222         unsigned int i;
1223         struct kvm_vcpu *vcpu;
1224
1225         kvm_for_each_vcpu(i, vcpu, kvm)
1226                 kvm_arch_vcpu_destroy(vcpu);
1227
1228         mutex_lock(&kvm->lock);
1229         for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1230                 kvm->vcpus[i] = NULL;
1231
1232         atomic_set(&kvm->online_vcpus, 0);
1233         mutex_unlock(&kvm->lock);
1234 }
1235
1236 void kvm_arch_destroy_vm(struct kvm *kvm)
1237 {
1238         kvm_free_vcpus(kvm);
1239         free_page((unsigned long)kvm->arch.model.fac);
1240         sca_dispose(kvm);
1241         debug_unregister(kvm->arch.dbf);
1242         kfree(kvm->arch.crypto.crycb);
1243         if (!kvm_is_ucontrol(kvm))
1244                 gmap_free(kvm->arch.gmap);
1245         kvm_s390_destroy_adapters(kvm);
1246         kvm_s390_clear_float_irqs(kvm);
1247         KVM_EVENT(3, "vm 0x%p destroyed", kvm);
1248 }
1249
1250 /* Section: vcpu related */
1251 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1252 {
1253         vcpu->arch.gmap = gmap_alloc(current->mm, -1UL);
1254         if (!vcpu->arch.gmap)
1255                 return -ENOMEM;
1256         vcpu->arch.gmap->private = vcpu->kvm;
1257
1258         return 0;
1259 }
1260
1261 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
1262 {
1263         read_lock(&vcpu->kvm->arch.sca_lock);
1264         if (vcpu->kvm->arch.use_esca) {
1265                 struct esca_block *sca = vcpu->kvm->arch.sca;
1266
1267                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1268                 if (sca->cpu[vcpu->vcpu_id].sda == (__u64) vcpu->arch.sie_block)
1269                         sca->cpu[vcpu->vcpu_id].sda = 0;
1270         } else {
1271                 struct bsca_block *sca = vcpu->kvm->arch.sca;
1272
1273                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1274                 if (sca->cpu[vcpu->vcpu_id].sda == (__u64) vcpu->arch.sie_block)
1275                         sca->cpu[vcpu->vcpu_id].sda = 0;
1276         }
1277         read_unlock(&vcpu->kvm->arch.sca_lock);
1278 }
1279
1280 static void sca_add_vcpu(struct kvm_vcpu *vcpu, struct kvm *kvm,
1281                         unsigned int id)
1282 {
1283         read_lock(&kvm->arch.sca_lock);
1284         if (kvm->arch.use_esca) {
1285                 struct esca_block *sca = kvm->arch.sca;
1286
1287                 if (!sca->cpu[id].sda)
1288                         sca->cpu[id].sda = (__u64) vcpu->arch.sie_block;
1289                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1290                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
1291                 set_bit_inv(id, (unsigned long *) sca->mcn);
1292         } else {
1293                 struct bsca_block *sca = kvm->arch.sca;
1294
1295                 if (!sca->cpu[id].sda)
1296                         sca->cpu[id].sda = (__u64) vcpu->arch.sie_block;
1297                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1298                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
1299                 set_bit_inv(id, (unsigned long *) &sca->mcn);
1300         }
1301         read_unlock(&kvm->arch.sca_lock);
1302 }
1303
1304 /* Basic SCA to Extended SCA data copy routines */
1305 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
1306 {
1307         d->sda = s->sda;
1308         d->sigp_ctrl.c = s->sigp_ctrl.c;
1309         d->sigp_ctrl.scn = s->sigp_ctrl.scn;
1310 }
1311
1312 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
1313 {
1314         int i;
1315
1316         d->ipte_control = s->ipte_control;
1317         d->mcn[0] = s->mcn;
1318         for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
1319                 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
1320 }
1321
1322 static int sca_switch_to_extended(struct kvm *kvm)
1323 {
1324         struct bsca_block *old_sca = kvm->arch.sca;
1325         struct esca_block *new_sca;
1326         struct kvm_vcpu *vcpu;
1327         unsigned int vcpu_idx;
1328         u32 scaol, scaoh;
1329
1330         new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
1331         if (!new_sca)
1332                 return -ENOMEM;
1333
1334         scaoh = (u32)((u64)(new_sca) >> 32);
1335         scaol = (u32)(u64)(new_sca) & ~0x3fU;
1336
1337         kvm_s390_vcpu_block_all(kvm);
1338         write_lock(&kvm->arch.sca_lock);
1339
1340         sca_copy_b_to_e(new_sca, old_sca);
1341
1342         kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
1343                 vcpu->arch.sie_block->scaoh = scaoh;
1344                 vcpu->arch.sie_block->scaol = scaol;
1345                 vcpu->arch.sie_block->ecb2 |= 0x04U;
1346         }
1347         kvm->arch.sca = new_sca;
1348         kvm->arch.use_esca = 1;
1349
1350         write_unlock(&kvm->arch.sca_lock);
1351         kvm_s390_vcpu_unblock_all(kvm);
1352
1353         free_page((unsigned long)old_sca);
1354
1355         VM_EVENT(kvm, 2, "Switched to ESCA (%p -> %p)", old_sca, kvm->arch.sca);
1356         return 0;
1357 }
1358
1359 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
1360 {
1361         int rc;
1362
1363         if (id < KVM_S390_BSCA_CPU_SLOTS)
1364                 return true;
1365         if (!sclp.has_esca)
1366                 return false;
1367
1368         mutex_lock(&kvm->lock);
1369         rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
1370         mutex_unlock(&kvm->lock);
1371
1372         return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
1373 }
1374
1375 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1376 {
1377         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1378         kvm_clear_async_pf_completion_queue(vcpu);
1379         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
1380                                     KVM_SYNC_GPRS |
1381                                     KVM_SYNC_ACRS |
1382                                     KVM_SYNC_CRS |
1383                                     KVM_SYNC_ARCH0 |
1384                                     KVM_SYNC_PFAULT;
1385         if (test_kvm_facility(vcpu->kvm, 129))
1386                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
1387
1388         if (kvm_is_ucontrol(vcpu->kvm))
1389                 return __kvm_ucontrol_vcpu_init(vcpu);
1390
1391         return 0;
1392 }
1393
1394 /*
1395  * Backs up the current FP/VX register save area on a particular
1396  * destination.  Used to switch between different register save
1397  * areas.
1398  */
1399 static inline void save_fpu_to(struct fpu *dst)
1400 {
1401         dst->fpc = current->thread.fpu.fpc;
1402         dst->regs = current->thread.fpu.regs;
1403 }
1404
1405 /*
1406  * Switches the FP/VX register save area from which to lazy
1407  * restore register contents.
1408  */
1409 static inline void load_fpu_from(struct fpu *from)
1410 {
1411         current->thread.fpu.fpc = from->fpc;
1412         current->thread.fpu.regs = from->regs;
1413 }
1414
1415 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1416 {
1417         /* Save host register state */
1418         save_fpu_regs();
1419         save_fpu_to(&vcpu->arch.host_fpregs);
1420
1421         if (test_kvm_facility(vcpu->kvm, 129)) {
1422                 current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
1423                 /*
1424                  * Use the register save area in the SIE-control block
1425                  * for register restore and save in kvm_arch_vcpu_put()
1426                  */
1427                 current->thread.fpu.vxrs =
1428                         (__vector128 *)&vcpu->run->s.regs.vrs;
1429         } else
1430                 load_fpu_from(&vcpu->arch.guest_fpregs);
1431
1432         if (test_fp_ctl(current->thread.fpu.fpc))
1433                 /* User space provided an invalid FPC, let's clear it */
1434                 current->thread.fpu.fpc = 0;
1435
1436         save_access_regs(vcpu->arch.host_acrs);
1437         restore_access_regs(vcpu->run->s.regs.acrs);
1438         gmap_enable(vcpu->arch.gmap);
1439         atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1440 }
1441
1442 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1443 {
1444         atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1445         gmap_disable(vcpu->arch.gmap);
1446
1447         save_fpu_regs();
1448
1449         if (test_kvm_facility(vcpu->kvm, 129))
1450                 /*
1451                  * kvm_arch_vcpu_load() set up the register save area to
1452                  * the &vcpu->run->s.regs.vrs and, thus, the vector registers
1453                  * are already saved.  Only the floating-point control must be
1454                  * copied.
1455                  */
1456                 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
1457         else
1458                 save_fpu_to(&vcpu->arch.guest_fpregs);
1459         load_fpu_from(&vcpu->arch.host_fpregs);
1460
1461         save_access_regs(vcpu->run->s.regs.acrs);
1462         restore_access_regs(vcpu->arch.host_acrs);
1463 }
1464
1465 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
1466 {
1467         /* this equals initial cpu reset in pop, but we don't switch to ESA */
1468         vcpu->arch.sie_block->gpsw.mask = 0UL;
1469         vcpu->arch.sie_block->gpsw.addr = 0UL;
1470         kvm_s390_set_prefix(vcpu, 0);
1471         vcpu->arch.sie_block->cputm     = 0UL;
1472         vcpu->arch.sie_block->ckc       = 0UL;
1473         vcpu->arch.sie_block->todpr     = 0;
1474         memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
1475         vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
1476         vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
1477         vcpu->arch.guest_fpregs.fpc = 0;
1478         asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc));
1479         vcpu->arch.sie_block->gbea = 1;
1480         vcpu->arch.sie_block->pp = 0;
1481         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1482         kvm_clear_async_pf_completion_queue(vcpu);
1483         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
1484                 kvm_s390_vcpu_stop(vcpu);
1485         kvm_s390_clear_local_irqs(vcpu);
1486 }
1487
1488 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
1489 {
1490         mutex_lock(&vcpu->kvm->lock);
1491         preempt_disable();
1492         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
1493         preempt_enable();
1494         mutex_unlock(&vcpu->kvm->lock);
1495         if (!kvm_is_ucontrol(vcpu->kvm))
1496                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
1497 }
1498
1499 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
1500 {
1501         if (!test_kvm_facility(vcpu->kvm, 76))
1502                 return;
1503
1504         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
1505
1506         if (vcpu->kvm->arch.crypto.aes_kw)
1507                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
1508         if (vcpu->kvm->arch.crypto.dea_kw)
1509                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
1510
1511         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
1512 }
1513
1514 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
1515 {
1516         free_page(vcpu->arch.sie_block->cbrlo);
1517         vcpu->arch.sie_block->cbrlo = 0;
1518 }
1519
1520 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
1521 {
1522         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
1523         if (!vcpu->arch.sie_block->cbrlo)
1524                 return -ENOMEM;
1525
1526         vcpu->arch.sie_block->ecb2 |= 0x80;
1527         vcpu->arch.sie_block->ecb2 &= ~0x08;
1528         return 0;
1529 }
1530
1531 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
1532 {
1533         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
1534
1535         vcpu->arch.cpu_id = model->cpu_id;
1536         vcpu->arch.sie_block->ibc = model->ibc;
1537         vcpu->arch.sie_block->fac = (int) (long) model->fac->list;
1538 }
1539
1540 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1541 {
1542         int rc = 0;
1543
1544         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
1545                                                     CPUSTAT_SM |
1546                                                     CPUSTAT_STOPPED);
1547
1548         if (test_kvm_facility(vcpu->kvm, 78))
1549                 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
1550         else if (test_kvm_facility(vcpu->kvm, 8))
1551                 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
1552
1553         kvm_s390_vcpu_setup_model(vcpu);
1554
1555         vcpu->arch.sie_block->ecb   = 6;
1556         if (test_kvm_facility(vcpu->kvm, 50) && test_kvm_facility(vcpu->kvm, 73))
1557                 vcpu->arch.sie_block->ecb |= 0x10;
1558
1559         vcpu->arch.sie_block->ecb2  = 8;
1560         if (vcpu->kvm->arch.use_esca)
1561                 vcpu->arch.sie_block->ecb2 |= 4;
1562         vcpu->arch.sie_block->eca   = 0xC1002000U;
1563         if (sclp.has_siif)
1564                 vcpu->arch.sie_block->eca |= 1;
1565         if (sclp.has_sigpif)
1566                 vcpu->arch.sie_block->eca |= 0x10000000U;
1567         if (test_kvm_facility(vcpu->kvm, 129)) {
1568                 vcpu->arch.sie_block->eca |= 0x00020000;
1569                 vcpu->arch.sie_block->ecd |= 0x20000000;
1570         }
1571         vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
1572
1573         if (vcpu->kvm->arch.use_cmma) {
1574                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
1575                 if (rc)
1576                         return rc;
1577         }
1578         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1579         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
1580
1581         kvm_s390_vcpu_crypto_setup(vcpu);
1582
1583         return rc;
1584 }
1585
1586 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
1587                                       unsigned int id)
1588 {
1589         struct kvm_vcpu *vcpu;
1590         struct sie_page *sie_page;
1591         int rc = -EINVAL;
1592
1593         if (!sca_can_add_vcpu(kvm, id))
1594                 goto out;
1595
1596         rc = -ENOMEM;
1597
1598         vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
1599         if (!vcpu)
1600                 goto out;
1601
1602         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
1603         if (!sie_page)
1604                 goto out_free_cpu;
1605
1606         vcpu->arch.sie_block = &sie_page->sie_block;
1607         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
1608
1609         vcpu->arch.sie_block->icpua = id;
1610         if (!kvm_is_ucontrol(kvm)) {
1611                 if (!kvm->arch.sca) {
1612                         WARN_ON_ONCE(1);
1613                         goto out_free_cpu;
1614                 }
1615                 sca_add_vcpu(vcpu, kvm, id);
1616         }
1617
1618         spin_lock_init(&vcpu->arch.local_int.lock);
1619         vcpu->arch.local_int.float_int = &kvm->arch.float_int;
1620         vcpu->arch.local_int.wq = &vcpu->wq;
1621         vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
1622
1623         /*
1624          * Allocate a save area for floating-point registers.  If the vector
1625          * extension is available, register contents are saved in the SIE
1626          * control block.  The allocated save area is still required in
1627          * particular places, for example, in kvm_s390_vcpu_store_status().
1628          */
1629         vcpu->arch.guest_fpregs.fprs = kzalloc(sizeof(freg_t) * __NUM_FPRS,
1630                                                GFP_KERNEL);
1631         if (!vcpu->arch.guest_fpregs.fprs) {
1632                 rc = -ENOMEM;
1633                 goto out_free_sie_block;
1634         }
1635
1636         rc = kvm_vcpu_init(vcpu, kvm, id);
1637         if (rc)
1638                 goto out_free_sie_block;
1639         VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu,
1640                  vcpu->arch.sie_block);
1641         trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
1642
1643         return vcpu;
1644 out_free_sie_block:
1645         free_page((unsigned long)(vcpu->arch.sie_block));
1646 out_free_cpu:
1647         kmem_cache_free(kvm_vcpu_cache, vcpu);
1648 out:
1649         return ERR_PTR(rc);
1650 }
1651
1652 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
1653 {
1654         return kvm_s390_vcpu_has_irq(vcpu, 0);
1655 }
1656
1657 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
1658 {
1659         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1660         exit_sie(vcpu);
1661 }
1662
1663 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
1664 {
1665         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1666 }
1667
1668 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
1669 {
1670         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
1671         exit_sie(vcpu);
1672 }
1673
1674 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
1675 {
1676         atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
1677 }
1678
1679 /*
1680  * Kick a guest cpu out of SIE and wait until SIE is not running.
1681  * If the CPU is not running (e.g. waiting as idle) the function will
1682  * return immediately. */
1683 void exit_sie(struct kvm_vcpu *vcpu)
1684 {
1685         atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
1686         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
1687                 cpu_relax();
1688 }
1689
1690 /* Kick a guest cpu out of SIE to process a request synchronously */
1691 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
1692 {
1693         kvm_make_request(req, vcpu);
1694         kvm_s390_vcpu_request(vcpu);
1695 }
1696
1697 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address)
1698 {
1699         int i;
1700         struct kvm *kvm = gmap->private;
1701         struct kvm_vcpu *vcpu;
1702
1703         kvm_for_each_vcpu(i, vcpu, kvm) {
1704                 /* match against both prefix pages */
1705                 if (kvm_s390_get_prefix(vcpu) == (address & ~0x1000UL)) {
1706                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx", address);
1707                         kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
1708                 }
1709         }
1710 }
1711
1712 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
1713 {
1714         /* kvm common code refers to this, but never calls it */
1715         BUG();
1716         return 0;
1717 }
1718
1719 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
1720                                            struct kvm_one_reg *reg)
1721 {
1722         int r = -EINVAL;
1723
1724         switch (reg->id) {
1725         case KVM_REG_S390_TODPR:
1726                 r = put_user(vcpu->arch.sie_block->todpr,
1727                              (u32 __user *)reg->addr);
1728                 break;
1729         case KVM_REG_S390_EPOCHDIFF:
1730                 r = put_user(vcpu->arch.sie_block->epoch,
1731                              (u64 __user *)reg->addr);
1732                 break;
1733         case KVM_REG_S390_CPU_TIMER:
1734                 r = put_user(vcpu->arch.sie_block->cputm,
1735                              (u64 __user *)reg->addr);
1736                 break;
1737         case KVM_REG_S390_CLOCK_COMP:
1738                 r = put_user(vcpu->arch.sie_block->ckc,
1739                              (u64 __user *)reg->addr);
1740                 break;
1741         case KVM_REG_S390_PFTOKEN:
1742                 r = put_user(vcpu->arch.pfault_token,
1743                              (u64 __user *)reg->addr);
1744                 break;
1745         case KVM_REG_S390_PFCOMPARE:
1746                 r = put_user(vcpu->arch.pfault_compare,
1747                              (u64 __user *)reg->addr);
1748                 break;
1749         case KVM_REG_S390_PFSELECT:
1750                 r = put_user(vcpu->arch.pfault_select,
1751                              (u64 __user *)reg->addr);
1752                 break;
1753         case KVM_REG_S390_PP:
1754                 r = put_user(vcpu->arch.sie_block->pp,
1755                              (u64 __user *)reg->addr);
1756                 break;
1757         case KVM_REG_S390_GBEA:
1758                 r = put_user(vcpu->arch.sie_block->gbea,
1759                              (u64 __user *)reg->addr);
1760                 break;
1761         default:
1762                 break;
1763         }
1764
1765         return r;
1766 }
1767
1768 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
1769                                            struct kvm_one_reg *reg)
1770 {
1771         int r = -EINVAL;
1772
1773         switch (reg->id) {
1774         case KVM_REG_S390_TODPR:
1775                 r = get_user(vcpu->arch.sie_block->todpr,
1776                              (u32 __user *)reg->addr);
1777                 break;
1778         case KVM_REG_S390_EPOCHDIFF:
1779                 r = get_user(vcpu->arch.sie_block->epoch,
1780                              (u64 __user *)reg->addr);
1781                 break;
1782         case KVM_REG_S390_CPU_TIMER:
1783                 r = get_user(vcpu->arch.sie_block->cputm,
1784                              (u64 __user *)reg->addr);
1785                 break;
1786         case KVM_REG_S390_CLOCK_COMP:
1787                 r = get_user(vcpu->arch.sie_block->ckc,
1788                              (u64 __user *)reg->addr);
1789                 break;
1790         case KVM_REG_S390_PFTOKEN:
1791                 r = get_user(vcpu->arch.pfault_token,
1792                              (u64 __user *)reg->addr);
1793                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
1794                         kvm_clear_async_pf_completion_queue(vcpu);
1795                 break;
1796         case KVM_REG_S390_PFCOMPARE:
1797                 r = get_user(vcpu->arch.pfault_compare,
1798                              (u64 __user *)reg->addr);
1799                 break;
1800         case KVM_REG_S390_PFSELECT:
1801                 r = get_user(vcpu->arch.pfault_select,
1802                              (u64 __user *)reg->addr);
1803                 break;
1804         case KVM_REG_S390_PP:
1805                 r = get_user(vcpu->arch.sie_block->pp,
1806                              (u64 __user *)reg->addr);
1807                 break;
1808         case KVM_REG_S390_GBEA:
1809                 r = get_user(vcpu->arch.sie_block->gbea,
1810                              (u64 __user *)reg->addr);
1811                 break;
1812         default:
1813                 break;
1814         }
1815
1816         return r;
1817 }
1818
1819 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
1820 {
1821         kvm_s390_vcpu_initial_reset(vcpu);
1822         return 0;
1823 }
1824
1825 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1826 {
1827         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
1828         return 0;
1829 }
1830
1831 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1832 {
1833         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
1834         return 0;
1835 }
1836
1837 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
1838                                   struct kvm_sregs *sregs)
1839 {
1840         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
1841         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
1842         restore_access_regs(vcpu->run->s.regs.acrs);
1843         return 0;
1844 }
1845
1846 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
1847                                   struct kvm_sregs *sregs)
1848 {
1849         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
1850         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
1851         return 0;
1852 }
1853
1854 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1855 {
1856         if (test_fp_ctl(fpu->fpc))
1857                 return -EINVAL;
1858         memcpy(vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs));
1859         vcpu->arch.guest_fpregs.fpc = fpu->fpc;
1860         save_fpu_regs();
1861         load_fpu_from(&vcpu->arch.guest_fpregs);
1862         return 0;
1863 }
1864
1865 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1866 {
1867         memcpy(&fpu->fprs, vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs));
1868         fpu->fpc = vcpu->arch.guest_fpregs.fpc;
1869         return 0;
1870 }
1871
1872 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
1873 {
1874         int rc = 0;
1875
1876         if (!is_vcpu_stopped(vcpu))
1877                 rc = -EBUSY;
1878         else {
1879                 vcpu->run->psw_mask = psw.mask;
1880                 vcpu->run->psw_addr = psw.addr;
1881         }
1882         return rc;
1883 }
1884
1885 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
1886                                   struct kvm_translation *tr)
1887 {
1888         return -EINVAL; /* not implemented yet */
1889 }
1890
1891 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
1892                               KVM_GUESTDBG_USE_HW_BP | \
1893                               KVM_GUESTDBG_ENABLE)
1894
1895 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
1896                                         struct kvm_guest_debug *dbg)
1897 {
1898         int rc = 0;
1899
1900         vcpu->guest_debug = 0;
1901         kvm_s390_clear_bp_data(vcpu);
1902
1903         if (dbg->control & ~VALID_GUESTDBG_FLAGS)
1904                 return -EINVAL;
1905
1906         if (dbg->control & KVM_GUESTDBG_ENABLE) {
1907                 vcpu->guest_debug = dbg->control;
1908                 /* enforce guest PER */
1909                 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1910
1911                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
1912                         rc = kvm_s390_import_bp_data(vcpu, dbg);
1913         } else {
1914                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1915                 vcpu->arch.guestdbg.last_bp = 0;
1916         }
1917
1918         if (rc) {
1919                 vcpu->guest_debug = 0;
1920                 kvm_s390_clear_bp_data(vcpu);
1921                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1922         }
1923
1924         return rc;
1925 }
1926
1927 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
1928                                     struct kvm_mp_state *mp_state)
1929 {
1930         /* CHECK_STOP and LOAD are not supported yet */
1931         return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
1932                                        KVM_MP_STATE_OPERATING;
1933 }
1934
1935 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
1936                                     struct kvm_mp_state *mp_state)
1937 {
1938         int rc = 0;
1939
1940         /* user space knows about this interface - let it control the state */
1941         vcpu->kvm->arch.user_cpu_state_ctrl = 1;
1942
1943         switch (mp_state->mp_state) {
1944         case KVM_MP_STATE_STOPPED:
1945                 kvm_s390_vcpu_stop(vcpu);
1946                 break;
1947         case KVM_MP_STATE_OPERATING:
1948                 kvm_s390_vcpu_start(vcpu);
1949                 break;
1950         case KVM_MP_STATE_LOAD:
1951         case KVM_MP_STATE_CHECK_STOP:
1952                 /* fall through - CHECK_STOP and LOAD are not supported yet */
1953         default:
1954                 rc = -ENXIO;
1955         }
1956
1957         return rc;
1958 }
1959
1960 static bool ibs_enabled(struct kvm_vcpu *vcpu)
1961 {
1962         return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
1963 }
1964
1965 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
1966 {
1967 retry:
1968         kvm_s390_vcpu_request_handled(vcpu);
1969         if (!vcpu->requests)
1970                 return 0;
1971         /*
1972          * We use MMU_RELOAD just to re-arm the ipte notifier for the
1973          * guest prefix page. gmap_ipte_notify will wait on the ptl lock.
1974          * This ensures that the ipte instruction for this request has
1975          * already finished. We might race against a second unmapper that
1976          * wants to set the blocking bit. Lets just retry the request loop.
1977          */
1978         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
1979                 int rc;
1980                 rc = gmap_ipte_notify(vcpu->arch.gmap,
1981                                       kvm_s390_get_prefix(vcpu),
1982                                       PAGE_SIZE * 2);
1983                 if (rc)
1984                         return rc;
1985                 goto retry;
1986         }
1987
1988         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
1989                 vcpu->arch.sie_block->ihcpu = 0xffff;
1990                 goto retry;
1991         }
1992
1993         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
1994                 if (!ibs_enabled(vcpu)) {
1995                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
1996                         atomic_or(CPUSTAT_IBS,
1997                                         &vcpu->arch.sie_block->cpuflags);
1998                 }
1999                 goto retry;
2000         }
2001
2002         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2003                 if (ibs_enabled(vcpu)) {
2004                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2005                         atomic_andnot(CPUSTAT_IBS,
2006                                           &vcpu->arch.sie_block->cpuflags);
2007                 }
2008                 goto retry;
2009         }
2010
2011         /* nothing to do, just clear the request */
2012         clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
2013
2014         return 0;
2015 }
2016
2017 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2018 {
2019         struct kvm_vcpu *vcpu;
2020         int i;
2021
2022         mutex_lock(&kvm->lock);
2023         preempt_disable();
2024         kvm->arch.epoch = tod - get_tod_clock();
2025         kvm_s390_vcpu_block_all(kvm);
2026         kvm_for_each_vcpu(i, vcpu, kvm)
2027                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2028         kvm_s390_vcpu_unblock_all(kvm);
2029         preempt_enable();
2030         mutex_unlock(&kvm->lock);
2031 }
2032
2033 /**
2034  * kvm_arch_fault_in_page - fault-in guest page if necessary
2035  * @vcpu: The corresponding virtual cpu
2036  * @gpa: Guest physical address
2037  * @writable: Whether the page should be writable or not
2038  *
2039  * Make sure that a guest page has been faulted-in on the host.
2040  *
2041  * Return: Zero on success, negative error code otherwise.
2042  */
2043 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
2044 {
2045         return gmap_fault(vcpu->arch.gmap, gpa,
2046                           writable ? FAULT_FLAG_WRITE : 0);
2047 }
2048
2049 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
2050                                       unsigned long token)
2051 {
2052         struct kvm_s390_interrupt inti;
2053         struct kvm_s390_irq irq;
2054
2055         if (start_token) {
2056                 irq.u.ext.ext_params2 = token;
2057                 irq.type = KVM_S390_INT_PFAULT_INIT;
2058                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
2059         } else {
2060                 inti.type = KVM_S390_INT_PFAULT_DONE;
2061                 inti.parm64 = token;
2062                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
2063         }
2064 }
2065
2066 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
2067                                      struct kvm_async_pf *work)
2068 {
2069         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
2070         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
2071 }
2072
2073 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
2074                                  struct kvm_async_pf *work)
2075 {
2076         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
2077         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
2078 }
2079
2080 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
2081                                struct kvm_async_pf *work)
2082 {
2083         /* s390 will always inject the page directly */
2084 }
2085
2086 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
2087 {
2088         /*
2089          * s390 will always inject the page directly,
2090          * but we still want check_async_completion to cleanup
2091          */
2092         return true;
2093 }
2094
2095 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
2096 {
2097         hva_t hva;
2098         struct kvm_arch_async_pf arch;
2099         int rc;
2100
2101         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2102                 return 0;
2103         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
2104             vcpu->arch.pfault_compare)
2105                 return 0;
2106         if (psw_extint_disabled(vcpu))
2107                 return 0;
2108         if (kvm_s390_vcpu_has_irq(vcpu, 0))
2109                 return 0;
2110         if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
2111                 return 0;
2112         if (!vcpu->arch.gmap->pfault_enabled)
2113                 return 0;
2114
2115         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
2116         hva += current->thread.gmap_addr & ~PAGE_MASK;
2117         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
2118                 return 0;
2119
2120         rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
2121         return rc;
2122 }
2123
2124 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
2125 {
2126         int rc, cpuflags;
2127
2128         /*
2129          * On s390 notifications for arriving pages will be delivered directly
2130          * to the guest but the house keeping for completed pfaults is
2131          * handled outside the worker.
2132          */
2133         kvm_check_async_pf_completion(vcpu);
2134
2135         memcpy(&vcpu->arch.sie_block->gg14, &vcpu->run->s.regs.gprs[14], 16);
2136
2137         if (need_resched())
2138                 schedule();
2139
2140         if (test_cpu_flag(CIF_MCCK_PENDING))
2141                 s390_handle_mcck();
2142
2143         if (!kvm_is_ucontrol(vcpu->kvm)) {
2144                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
2145                 if (rc)
2146                         return rc;
2147         }
2148
2149         rc = kvm_s390_handle_requests(vcpu);
2150         if (rc)
2151                 return rc;
2152
2153         if (guestdbg_enabled(vcpu)) {
2154                 kvm_s390_backup_guest_per_regs(vcpu);
2155                 kvm_s390_patch_guest_per_regs(vcpu);
2156         }
2157
2158         vcpu->arch.sie_block->icptcode = 0;
2159         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
2160         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
2161         trace_kvm_s390_sie_enter(vcpu, cpuflags);
2162
2163         return 0;
2164 }
2165
2166 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
2167 {
2168         psw_t *psw = &vcpu->arch.sie_block->gpsw;
2169         u8 opcode;
2170         int rc;
2171
2172         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
2173         trace_kvm_s390_sie_fault(vcpu);
2174
2175         /*
2176          * We want to inject an addressing exception, which is defined as a
2177          * suppressing or terminating exception. However, since we came here
2178          * by a DAT access exception, the PSW still points to the faulting
2179          * instruction since DAT exceptions are nullifying. So we've got
2180          * to look up the current opcode to get the length of the instruction
2181          * to be able to forward the PSW.
2182          */
2183         rc = read_guest(vcpu, psw->addr, 0, &opcode, 1);
2184         if (rc)
2185                 return kvm_s390_inject_prog_cond(vcpu, rc);
2186         psw->addr = __rewind_psw(*psw, -insn_length(opcode));
2187
2188         return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
2189 }
2190
2191 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
2192 {
2193         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
2194                    vcpu->arch.sie_block->icptcode);
2195         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
2196
2197         if (guestdbg_enabled(vcpu))
2198                 kvm_s390_restore_guest_per_regs(vcpu);
2199
2200         memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16);
2201
2202         if (vcpu->arch.sie_block->icptcode > 0) {
2203                 int rc = kvm_handle_sie_intercept(vcpu);
2204
2205                 if (rc != -EOPNOTSUPP)
2206                         return rc;
2207                 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
2208                 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
2209                 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
2210                 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
2211                 return -EREMOTE;
2212         } else if (exit_reason != -EFAULT) {
2213                 vcpu->stat.exit_null++;
2214                 return 0;
2215         } else if (kvm_is_ucontrol(vcpu->kvm)) {
2216                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
2217                 vcpu->run->s390_ucontrol.trans_exc_code =
2218                                                 current->thread.gmap_addr;
2219                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
2220                 return -EREMOTE;
2221         } else if (current->thread.gmap_pfault) {
2222                 trace_kvm_s390_major_guest_pfault(vcpu);
2223                 current->thread.gmap_pfault = 0;
2224                 if (kvm_arch_setup_async_pf(vcpu))
2225                         return 0;
2226                 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
2227         }
2228         return vcpu_post_run_fault_in_sie(vcpu);
2229 }
2230
2231 static int __vcpu_run(struct kvm_vcpu *vcpu)
2232 {
2233         int rc, exit_reason;
2234
2235         /*
2236          * We try to hold kvm->srcu during most of vcpu_run (except when run-
2237          * ning the guest), so that memslots (and other stuff) are protected
2238          */
2239         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2240
2241         do {
2242                 rc = vcpu_pre_run(vcpu);
2243                 if (rc)
2244                         break;
2245
2246                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2247                 /*
2248                  * As PF_VCPU will be used in fault handler, between
2249                  * guest_enter and guest_exit should be no uaccess.
2250                  */
2251                 local_irq_disable();
2252                 __kvm_guest_enter();
2253                 local_irq_enable();
2254                 exit_reason = sie64a(vcpu->arch.sie_block,
2255                                      vcpu->run->s.regs.gprs);
2256                 local_irq_disable();
2257                 __kvm_guest_exit();
2258                 local_irq_enable();
2259                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2260
2261                 rc = vcpu_post_run(vcpu, exit_reason);
2262         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
2263
2264         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2265         return rc;
2266 }
2267
2268 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2269 {
2270         vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
2271         vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
2272         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
2273                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
2274         if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
2275                 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
2276                 /* some control register changes require a tlb flush */
2277                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2278         }
2279         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
2280                 vcpu->arch.sie_block->cputm = kvm_run->s.regs.cputm;
2281                 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
2282                 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
2283                 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
2284                 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
2285         }
2286         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
2287                 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
2288                 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
2289                 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
2290                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2291                         kvm_clear_async_pf_completion_queue(vcpu);
2292         }
2293         kvm_run->kvm_dirty_regs = 0;
2294 }
2295
2296 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2297 {
2298         kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
2299         kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
2300         kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
2301         memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
2302         kvm_run->s.regs.cputm = vcpu->arch.sie_block->cputm;
2303         kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
2304         kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
2305         kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
2306         kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
2307         kvm_run->s.regs.pft = vcpu->arch.pfault_token;
2308         kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
2309         kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
2310 }
2311
2312 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2313 {
2314         int rc;
2315         sigset_t sigsaved;
2316
2317         if (guestdbg_exit_pending(vcpu)) {
2318                 kvm_s390_prepare_debug_exit(vcpu);
2319                 return 0;
2320         }
2321
2322         if (vcpu->sigset_active)
2323                 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
2324
2325         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
2326                 kvm_s390_vcpu_start(vcpu);
2327         } else if (is_vcpu_stopped(vcpu)) {
2328                 pr_err_ratelimited("can't run stopped vcpu %d\n",
2329                                    vcpu->vcpu_id);
2330                 return -EINVAL;
2331         }
2332
2333         sync_regs(vcpu, kvm_run);
2334
2335         might_fault();
2336         rc = __vcpu_run(vcpu);
2337
2338         if (signal_pending(current) && !rc) {
2339                 kvm_run->exit_reason = KVM_EXIT_INTR;
2340                 rc = -EINTR;
2341         }
2342
2343         if (guestdbg_exit_pending(vcpu) && !rc)  {
2344                 kvm_s390_prepare_debug_exit(vcpu);
2345                 rc = 0;
2346         }
2347
2348         if (rc == -EREMOTE) {
2349                 /* userspace support is needed, kvm_run has been prepared */
2350                 rc = 0;
2351         }
2352
2353         store_regs(vcpu, kvm_run);
2354
2355         if (vcpu->sigset_active)
2356                 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
2357
2358         vcpu->stat.exit_userspace++;
2359         return rc;
2360 }
2361
2362 /*
2363  * store status at address
2364  * we use have two special cases:
2365  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
2366  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
2367  */
2368 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
2369 {
2370         unsigned char archmode = 1;
2371         unsigned int px;
2372         u64 clkcomp;
2373         int rc;
2374
2375         if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
2376                 if (write_guest_abs(vcpu, 163, &archmode, 1))
2377                         return -EFAULT;
2378                 gpa = SAVE_AREA_BASE;
2379         } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
2380                 if (write_guest_real(vcpu, 163, &archmode, 1))
2381                         return -EFAULT;
2382                 gpa = kvm_s390_real_to_abs(vcpu, SAVE_AREA_BASE);
2383         }
2384         rc = write_guest_abs(vcpu, gpa + offsetof(struct save_area, fp_regs),
2385                              vcpu->arch.guest_fpregs.fprs, 128);
2386         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, gp_regs),
2387                               vcpu->run->s.regs.gprs, 128);
2388         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, psw),
2389                               &vcpu->arch.sie_block->gpsw, 16);
2390         px = kvm_s390_get_prefix(vcpu);
2391         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, pref_reg),
2392                               &px, 4);
2393         rc |= write_guest_abs(vcpu,
2394                               gpa + offsetof(struct save_area, fp_ctrl_reg),
2395                               &vcpu->arch.guest_fpregs.fpc, 4);
2396         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, tod_reg),
2397                               &vcpu->arch.sie_block->todpr, 4);
2398         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, timer),
2399                               &vcpu->arch.sie_block->cputm, 8);
2400         clkcomp = vcpu->arch.sie_block->ckc >> 8;
2401         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, clk_cmp),
2402                               &clkcomp, 8);
2403         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, acc_regs),
2404                               &vcpu->run->s.regs.acrs, 64);
2405         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, ctrl_regs),
2406                               &vcpu->arch.sie_block->gcr, 128);
2407         return rc ? -EFAULT : 0;
2408 }
2409
2410 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
2411 {
2412         /*
2413          * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
2414          * copying in vcpu load/put. Lets update our copies before we save
2415          * it into the save area
2416          */
2417         save_fpu_regs();
2418         if (test_kvm_facility(vcpu->kvm, 129)) {
2419                 /*
2420                  * If the vector extension is available, the vector registers
2421                  * which overlaps with floating-point registers are saved in
2422                  * the SIE-control block.  Hence, extract the floating-point
2423                  * registers and the FPC value and store them in the
2424                  * guest_fpregs structure.
2425                  */
2426                 vcpu->arch.guest_fpregs.fpc = current->thread.fpu.fpc;
2427                 convert_vx_to_fp(vcpu->arch.guest_fpregs.fprs,
2428                                  current->thread.fpu.vxrs);
2429         } else
2430                 save_fpu_to(&vcpu->arch.guest_fpregs);
2431         save_access_regs(vcpu->run->s.regs.acrs);
2432
2433         return kvm_s390_store_status_unloaded(vcpu, addr);
2434 }
2435
2436 /*
2437  * store additional status at address
2438  */
2439 int kvm_s390_store_adtl_status_unloaded(struct kvm_vcpu *vcpu,
2440                                         unsigned long gpa)
2441 {
2442         /* Only bits 0-53 are used for address formation */
2443         if (!(gpa & ~0x3ff))
2444                 return 0;
2445
2446         return write_guest_abs(vcpu, gpa & ~0x3ff,
2447                                (void *)&vcpu->run->s.regs.vrs, 512);
2448 }
2449
2450 int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu *vcpu, unsigned long addr)
2451 {
2452         if (!test_kvm_facility(vcpu->kvm, 129))
2453                 return 0;
2454
2455         /*
2456          * The guest VXRS are in the host VXRs due to the lazy
2457          * copying in vcpu load/put. We can simply call save_fpu_regs()
2458          * to save the current register state because we are in the
2459          * middle of a load/put cycle.
2460          *
2461          * Let's update our copies before we save it into the save area.
2462          */
2463         save_fpu_regs();
2464
2465         return kvm_s390_store_adtl_status_unloaded(vcpu, addr);
2466 }
2467
2468 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2469 {
2470         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
2471         kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
2472 }
2473
2474 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
2475 {
2476         unsigned int i;
2477         struct kvm_vcpu *vcpu;
2478
2479         kvm_for_each_vcpu(i, vcpu, kvm) {
2480                 __disable_ibs_on_vcpu(vcpu);
2481         }
2482 }
2483
2484 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2485 {
2486         kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
2487         kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
2488 }
2489
2490 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
2491 {
2492         int i, online_vcpus, started_vcpus = 0;
2493
2494         if (!is_vcpu_stopped(vcpu))
2495                 return;
2496
2497         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
2498         /* Only one cpu at a time may enter/leave the STOPPED state. */
2499         spin_lock(&vcpu->kvm->arch.start_stop_lock);
2500         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2501
2502         for (i = 0; i < online_vcpus; i++) {
2503                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
2504                         started_vcpus++;
2505         }
2506
2507         if (started_vcpus == 0) {
2508                 /* we're the only active VCPU -> speed it up */
2509                 __enable_ibs_on_vcpu(vcpu);
2510         } else if (started_vcpus == 1) {
2511                 /*
2512                  * As we are starting a second VCPU, we have to disable
2513                  * the IBS facility on all VCPUs to remove potentially
2514                  * oustanding ENABLE requests.
2515                  */
2516                 __disable_ibs_on_all_vcpus(vcpu->kvm);
2517         }
2518
2519         atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2520         /*
2521          * Another VCPU might have used IBS while we were offline.
2522          * Let's play safe and flush the VCPU at startup.
2523          */
2524         kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2525         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2526         return;
2527 }
2528
2529 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
2530 {
2531         int i, online_vcpus, started_vcpus = 0;
2532         struct kvm_vcpu *started_vcpu = NULL;
2533
2534         if (is_vcpu_stopped(vcpu))
2535                 return;
2536
2537         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
2538         /* Only one cpu at a time may enter/leave the STOPPED state. */
2539         spin_lock(&vcpu->kvm->arch.start_stop_lock);
2540         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2541
2542         /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
2543         kvm_s390_clear_stop_irq(vcpu);
2544
2545         atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2546         __disable_ibs_on_vcpu(vcpu);
2547
2548         for (i = 0; i < online_vcpus; i++) {
2549                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
2550                         started_vcpus++;
2551                         started_vcpu = vcpu->kvm->vcpus[i];
2552                 }
2553         }
2554
2555         if (started_vcpus == 1) {
2556                 /*
2557                  * As we only have one VCPU left, we want to enable the
2558                  * IBS facility for that VCPU to speed it up.
2559                  */
2560                 __enable_ibs_on_vcpu(started_vcpu);
2561         }
2562
2563         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2564         return;
2565 }
2566
2567 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
2568                                      struct kvm_enable_cap *cap)
2569 {
2570         int r;
2571
2572         if (cap->flags)
2573                 return -EINVAL;
2574
2575         switch (cap->cap) {
2576         case KVM_CAP_S390_CSS_SUPPORT:
2577                 if (!vcpu->kvm->arch.css_support) {
2578                         vcpu->kvm->arch.css_support = 1;
2579                         VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
2580                         trace_kvm_s390_enable_css(vcpu->kvm);
2581                 }
2582                 r = 0;
2583                 break;
2584         default:
2585                 r = -EINVAL;
2586                 break;
2587         }
2588         return r;
2589 }
2590
2591 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
2592                                   struct kvm_s390_mem_op *mop)
2593 {
2594         void __user *uaddr = (void __user *)mop->buf;
2595         void *tmpbuf = NULL;
2596         int r, srcu_idx;
2597         const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
2598                                     | KVM_S390_MEMOP_F_CHECK_ONLY;
2599
2600         if (mop->flags & ~supported_flags)
2601                 return -EINVAL;
2602
2603         if (mop->size > MEM_OP_MAX_SIZE)
2604                 return -E2BIG;
2605
2606         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
2607                 tmpbuf = vmalloc(mop->size);
2608                 if (!tmpbuf)
2609                         return -ENOMEM;
2610         }
2611
2612         srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2613
2614         switch (mop->op) {
2615         case KVM_S390_MEMOP_LOGICAL_READ:
2616                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2617                         r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, false);
2618                         break;
2619                 }
2620                 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2621                 if (r == 0) {
2622                         if (copy_to_user(uaddr, tmpbuf, mop->size))
2623                                 r = -EFAULT;
2624                 }
2625                 break;
2626         case KVM_S390_MEMOP_LOGICAL_WRITE:
2627                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2628                         r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, true);
2629                         break;
2630                 }
2631                 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
2632                         r = -EFAULT;
2633                         break;
2634                 }
2635                 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2636                 break;
2637         default:
2638                 r = -EINVAL;
2639         }
2640
2641         srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
2642
2643         if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
2644                 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
2645
2646         vfree(tmpbuf);
2647         return r;
2648 }
2649
2650 long kvm_arch_vcpu_ioctl(struct file *filp,
2651                          unsigned int ioctl, unsigned long arg)
2652 {
2653         struct kvm_vcpu *vcpu = filp->private_data;
2654         void __user *argp = (void __user *)arg;
2655         int idx;
2656         long r;
2657
2658         switch (ioctl) {
2659         case KVM_S390_IRQ: {
2660                 struct kvm_s390_irq s390irq;
2661
2662                 r = -EFAULT;
2663                 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
2664                         break;
2665                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
2666                 break;
2667         }
2668         case KVM_S390_INTERRUPT: {
2669                 struct kvm_s390_interrupt s390int;
2670                 struct kvm_s390_irq s390irq;
2671
2672                 r = -EFAULT;
2673                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
2674                         break;
2675                 if (s390int_to_s390irq(&s390int, &s390irq))
2676                         return -EINVAL;
2677                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
2678                 break;
2679         }
2680         case KVM_S390_STORE_STATUS:
2681                 idx = srcu_read_lock(&vcpu->kvm->srcu);
2682                 r = kvm_s390_vcpu_store_status(vcpu, arg);
2683                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
2684                 break;
2685         case KVM_S390_SET_INITIAL_PSW: {
2686                 psw_t psw;
2687
2688                 r = -EFAULT;
2689                 if (copy_from_user(&psw, argp, sizeof(psw)))
2690                         break;
2691                 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
2692                 break;
2693         }
2694         case KVM_S390_INITIAL_RESET:
2695                 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
2696                 break;
2697         case KVM_SET_ONE_REG:
2698         case KVM_GET_ONE_REG: {
2699                 struct kvm_one_reg reg;
2700                 r = -EFAULT;
2701                 if (copy_from_user(&reg, argp, sizeof(reg)))
2702                         break;
2703                 if (ioctl == KVM_SET_ONE_REG)
2704                         r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
2705                 else
2706                         r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
2707                 break;
2708         }
2709 #ifdef CONFIG_KVM_S390_UCONTROL
2710         case KVM_S390_UCAS_MAP: {
2711                 struct kvm_s390_ucas_mapping ucasmap;
2712
2713                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
2714                         r = -EFAULT;
2715                         break;
2716                 }
2717
2718                 if (!kvm_is_ucontrol(vcpu->kvm)) {
2719                         r = -EINVAL;
2720                         break;
2721                 }
2722
2723                 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
2724                                      ucasmap.vcpu_addr, ucasmap.length);
2725                 break;
2726         }
2727         case KVM_S390_UCAS_UNMAP: {
2728                 struct kvm_s390_ucas_mapping ucasmap;
2729
2730                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
2731                         r = -EFAULT;
2732                         break;
2733                 }
2734
2735                 if (!kvm_is_ucontrol(vcpu->kvm)) {
2736                         r = -EINVAL;
2737                         break;
2738                 }
2739
2740                 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
2741                         ucasmap.length);
2742                 break;
2743         }
2744 #endif
2745         case KVM_S390_VCPU_FAULT: {
2746                 r = gmap_fault(vcpu->arch.gmap, arg, 0);
2747                 break;
2748         }
2749         case KVM_ENABLE_CAP:
2750         {
2751                 struct kvm_enable_cap cap;
2752                 r = -EFAULT;
2753                 if (copy_from_user(&cap, argp, sizeof(cap)))
2754                         break;
2755                 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
2756                 break;
2757         }
2758         case KVM_S390_MEM_OP: {
2759                 struct kvm_s390_mem_op mem_op;
2760
2761                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
2762                         r = kvm_s390_guest_mem_op(vcpu, &mem_op);
2763                 else
2764                         r = -EFAULT;
2765                 break;
2766         }
2767         case KVM_S390_SET_IRQ_STATE: {
2768                 struct kvm_s390_irq_state irq_state;
2769
2770                 r = -EFAULT;
2771                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
2772                         break;
2773                 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
2774                     irq_state.len == 0 ||
2775                     irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
2776                         r = -EINVAL;
2777                         break;
2778                 }
2779                 r = kvm_s390_set_irq_state(vcpu,
2780                                            (void __user *) irq_state.buf,
2781                                            irq_state.len);
2782                 break;
2783         }
2784         case KVM_S390_GET_IRQ_STATE: {
2785                 struct kvm_s390_irq_state irq_state;
2786
2787                 r = -EFAULT;
2788                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
2789                         break;
2790                 if (irq_state.len == 0) {
2791                         r = -EINVAL;
2792                         break;
2793                 }
2794                 r = kvm_s390_get_irq_state(vcpu,
2795                                            (__u8 __user *)  irq_state.buf,
2796                                            irq_state.len);
2797                 break;
2798         }
2799         default:
2800                 r = -ENOTTY;
2801         }
2802         return r;
2803 }
2804
2805 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
2806 {
2807 #ifdef CONFIG_KVM_S390_UCONTROL
2808         if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
2809                  && (kvm_is_ucontrol(vcpu->kvm))) {
2810                 vmf->page = virt_to_page(vcpu->arch.sie_block);
2811                 get_page(vmf->page);
2812                 return 0;
2813         }
2814 #endif
2815         return VM_FAULT_SIGBUS;
2816 }
2817
2818 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
2819                             unsigned long npages)
2820 {
2821         return 0;
2822 }
2823
2824 /* Section: memory related */
2825 int kvm_arch_prepare_memory_region(struct kvm *kvm,
2826                                    struct kvm_memory_slot *memslot,
2827                                    const struct kvm_userspace_memory_region *mem,
2828                                    enum kvm_mr_change change)
2829 {
2830         /* A few sanity checks. We can have memory slots which have to be
2831            located/ended at a segment boundary (1MB). The memory in userland is
2832            ok to be fragmented into various different vmas. It is okay to mmap()
2833            and munmap() stuff in this slot after doing this call at any time */
2834
2835         if (mem->userspace_addr & 0xffffful)
2836                 return -EINVAL;
2837
2838         if (mem->memory_size & 0xffffful)
2839                 return -EINVAL;
2840
2841         return 0;
2842 }
2843
2844 void kvm_arch_commit_memory_region(struct kvm *kvm,
2845                                 const struct kvm_userspace_memory_region *mem,
2846                                 const struct kvm_memory_slot *old,
2847                                 const struct kvm_memory_slot *new,
2848                                 enum kvm_mr_change change)
2849 {
2850         int rc;
2851
2852         /* If the basics of the memslot do not change, we do not want
2853          * to update the gmap. Every update causes several unnecessary
2854          * segment translation exceptions. This is usually handled just
2855          * fine by the normal fault handler + gmap, but it will also
2856          * cause faults on the prefix page of running guest CPUs.
2857          */
2858         if (old->userspace_addr == mem->userspace_addr &&
2859             old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
2860             old->npages * PAGE_SIZE == mem->memory_size)
2861                 return;
2862
2863         rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
2864                 mem->guest_phys_addr, mem->memory_size);
2865         if (rc)
2866                 pr_warn("failed to commit memory region\n");
2867         return;
2868 }
2869
2870 static int __init kvm_s390_init(void)
2871 {
2872         return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
2873 }
2874
2875 static void __exit kvm_s390_exit(void)
2876 {
2877         kvm_exit();
2878 }
2879
2880 module_init(kvm_s390_init);
2881 module_exit(kvm_s390_exit);
2882
2883 /*
2884  * Enable autoloading of the kvm module.
2885  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
2886  * since x86 takes a different approach.
2887  */
2888 #include <linux/miscdevice.h>
2889 MODULE_ALIAS_MISCDEV(KVM_MINOR);
2890 MODULE_ALIAS("devname:kvm");