2 * hosting zSeries kernel virtual machines
4 * Copyright IBM Corp. 2008, 2009
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
10 * Author(s): Carsten Otte <cotte@de.ibm.com>
11 * Christian Borntraeger <borntraeger@de.ibm.com>
12 * Heiko Carstens <heiko.carstens@de.ibm.com>
13 * Christian Ehrhardt <ehrhardt@de.ibm.com>
14 * Jason J. Herne <jjherne@us.ibm.com>
17 #include <linux/compiler.h>
18 #include <linux/err.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/mman.h>
25 #include <linux/module.h>
26 #include <linux/random.h>
27 #include <linux/slab.h>
28 #include <linux/timer.h>
29 #include <linux/vmalloc.h>
30 #include <linux/bitmap.h>
31 #include <asm/asm-offsets.h>
32 #include <asm/lowcore.h>
34 #include <asm/pgtable.h>
37 #include <asm/switch_to.h>
40 #include <asm/cpacf.h>
41 #include <asm/timex.h>
45 #define KMSG_COMPONENT "kvm-s390"
47 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
49 #define CREATE_TRACE_POINTS
51 #include "trace-s390.h"
53 #define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */
55 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
56 (KVM_MAX_VCPUS + LOCAL_IRQS))
58 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
60 struct kvm_stats_debugfs_item debugfs_entries[] = {
61 { "userspace_handled", VCPU_STAT(exit_userspace) },
62 { "exit_null", VCPU_STAT(exit_null) },
63 { "exit_validity", VCPU_STAT(exit_validity) },
64 { "exit_stop_request", VCPU_STAT(exit_stop_request) },
65 { "exit_external_request", VCPU_STAT(exit_external_request) },
66 { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
67 { "exit_instruction", VCPU_STAT(exit_instruction) },
68 { "exit_pei", VCPU_STAT(exit_pei) },
69 { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
70 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
71 { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
72 { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
73 { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
74 { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
75 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
76 { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
77 { "instruction_lctl", VCPU_STAT(instruction_lctl) },
78 { "instruction_stctl", VCPU_STAT(instruction_stctl) },
79 { "instruction_stctg", VCPU_STAT(instruction_stctg) },
80 { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
81 { "deliver_external_call", VCPU_STAT(deliver_external_call) },
82 { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
83 { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
84 { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
85 { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
86 { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
87 { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
88 { "exit_wait_state", VCPU_STAT(exit_wait_state) },
89 { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
90 { "instruction_stidp", VCPU_STAT(instruction_stidp) },
91 { "instruction_spx", VCPU_STAT(instruction_spx) },
92 { "instruction_stpx", VCPU_STAT(instruction_stpx) },
93 { "instruction_stap", VCPU_STAT(instruction_stap) },
94 { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
95 { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
96 { "instruction_stsch", VCPU_STAT(instruction_stsch) },
97 { "instruction_chsc", VCPU_STAT(instruction_chsc) },
98 { "instruction_essa", VCPU_STAT(instruction_essa) },
99 { "instruction_stsi", VCPU_STAT(instruction_stsi) },
100 { "instruction_stfl", VCPU_STAT(instruction_stfl) },
101 { "instruction_tprot", VCPU_STAT(instruction_tprot) },
102 { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
103 { "instruction_sie", VCPU_STAT(instruction_sie) },
104 { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
105 { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
106 { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
107 { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
108 { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
109 { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
110 { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
111 { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
112 { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
113 { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
114 { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
115 { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
116 { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
117 { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
118 { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
119 { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
120 { "diagnose_10", VCPU_STAT(diagnose_10) },
121 { "diagnose_44", VCPU_STAT(diagnose_44) },
122 { "diagnose_9c", VCPU_STAT(diagnose_9c) },
123 { "diagnose_258", VCPU_STAT(diagnose_258) },
124 { "diagnose_308", VCPU_STAT(diagnose_308) },
125 { "diagnose_500", VCPU_STAT(diagnose_500) },
129 /* allow nested virtualization in KVM (if enabled by user space) */
131 module_param(nested, int, S_IRUGO);
132 MODULE_PARM_DESC(nested, "Nested virtualization support");
134 /* upper facilities limit for kvm */
135 unsigned long kvm_s390_fac_list_mask[16] = { FACILITIES_KVM };
137 unsigned long kvm_s390_fac_list_mask_size(void)
139 BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
140 return ARRAY_SIZE(kvm_s390_fac_list_mask);
143 /* available cpu features supported by kvm */
144 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
145 /* available subfunctions indicated via query / "test bit" */
146 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
148 static struct gmap_notifier gmap_notifier;
149 static struct gmap_notifier vsie_gmap_notifier;
150 debug_info_t *kvm_s390_dbf;
152 /* Section: not file related */
153 int kvm_arch_hardware_enable(void)
155 /* every s390 is virtualization enabled ;-) */
159 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
163 * This callback is executed during stop_machine(). All CPUs are therefore
164 * temporarily stopped. In order not to change guest behavior, we have to
165 * disable preemption whenever we touch the epoch of kvm and the VCPUs,
166 * so a CPU won't be stopped while calculating with the epoch.
168 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
172 struct kvm_vcpu *vcpu;
174 unsigned long long *delta = v;
176 list_for_each_entry(kvm, &vm_list, vm_list) {
177 kvm->arch.epoch -= *delta;
178 kvm_for_each_vcpu(i, vcpu, kvm) {
179 vcpu->arch.sie_block->epoch -= *delta;
180 if (vcpu->arch.cputm_enabled)
181 vcpu->arch.cputm_start += *delta;
182 if (vcpu->arch.vsie_block)
183 vcpu->arch.vsie_block->epoch -= *delta;
189 static struct notifier_block kvm_clock_notifier = {
190 .notifier_call = kvm_clock_sync,
193 int kvm_arch_hardware_setup(void)
195 gmap_notifier.notifier_call = kvm_gmap_notifier;
196 gmap_register_pte_notifier(&gmap_notifier);
197 vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
198 gmap_register_pte_notifier(&vsie_gmap_notifier);
199 atomic_notifier_chain_register(&s390_epoch_delta_notifier,
200 &kvm_clock_notifier);
204 void kvm_arch_hardware_unsetup(void)
206 gmap_unregister_pte_notifier(&gmap_notifier);
207 gmap_unregister_pte_notifier(&vsie_gmap_notifier);
208 atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
209 &kvm_clock_notifier);
212 static void allow_cpu_feat(unsigned long nr)
214 set_bit_inv(nr, kvm_s390_available_cpu_feat);
217 static inline int plo_test_bit(unsigned char nr)
219 register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
220 int cc = 3; /* subfunction not available */
223 /* Parameter registers are ignored for "test bit" */
233 static void kvm_s390_cpu_feat_init(void)
237 for (i = 0; i < 256; ++i) {
239 kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
242 if (test_facility(28)) /* TOD-clock steering */
243 ptff(kvm_s390_available_subfunc.ptff,
244 sizeof(kvm_s390_available_subfunc.ptff),
247 if (test_facility(17)) { /* MSA */
248 __cpacf_query(CPACF_KMAC, kvm_s390_available_subfunc.kmac);
249 __cpacf_query(CPACF_KMC, kvm_s390_available_subfunc.kmc);
250 __cpacf_query(CPACF_KM, kvm_s390_available_subfunc.km);
251 __cpacf_query(CPACF_KIMD, kvm_s390_available_subfunc.kimd);
252 __cpacf_query(CPACF_KLMD, kvm_s390_available_subfunc.klmd);
254 if (test_facility(76)) /* MSA3 */
255 __cpacf_query(CPACF_PCKMO, kvm_s390_available_subfunc.pckmo);
256 if (test_facility(77)) { /* MSA4 */
257 __cpacf_query(CPACF_KMCTR, kvm_s390_available_subfunc.kmctr);
258 __cpacf_query(CPACF_KMF, kvm_s390_available_subfunc.kmf);
259 __cpacf_query(CPACF_KMO, kvm_s390_available_subfunc.kmo);
260 __cpacf_query(CPACF_PCC, kvm_s390_available_subfunc.pcc);
262 if (test_facility(57)) /* MSA5 */
263 __cpacf_query(CPACF_PPNO, kvm_s390_available_subfunc.ppno);
265 if (MACHINE_HAS_ESOP)
266 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
268 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
269 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
271 if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
272 !test_facility(3) || !nested)
274 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
275 if (sclp.has_64bscao)
276 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
278 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
280 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
282 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
284 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
286 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
288 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
290 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
291 * all skey handling functions read/set the skey from the PGSTE
292 * instead of the real storage key.
294 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
295 * pages being detected as preserved although they are resident.
297 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
298 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
300 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
301 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
302 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
304 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
305 * cannot easily shadow the SCA because of the ipte lock.
309 int kvm_arch_init(void *opaque)
311 kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
315 if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
316 debug_unregister(kvm_s390_dbf);
320 kvm_s390_cpu_feat_init();
322 /* Register floating interrupt controller interface. */
323 return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
326 void kvm_arch_exit(void)
328 debug_unregister(kvm_s390_dbf);
331 /* Section: device related */
332 long kvm_arch_dev_ioctl(struct file *filp,
333 unsigned int ioctl, unsigned long arg)
335 if (ioctl == KVM_S390_ENABLE_SIE)
336 return s390_enable_sie();
340 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
345 case KVM_CAP_S390_PSW:
346 case KVM_CAP_S390_GMAP:
347 case KVM_CAP_SYNC_MMU:
348 #ifdef CONFIG_KVM_S390_UCONTROL
349 case KVM_CAP_S390_UCONTROL:
351 case KVM_CAP_ASYNC_PF:
352 case KVM_CAP_SYNC_REGS:
353 case KVM_CAP_ONE_REG:
354 case KVM_CAP_ENABLE_CAP:
355 case KVM_CAP_S390_CSS_SUPPORT:
356 case KVM_CAP_IOEVENTFD:
357 case KVM_CAP_DEVICE_CTRL:
358 case KVM_CAP_ENABLE_CAP_VM:
359 case KVM_CAP_S390_IRQCHIP:
360 case KVM_CAP_VM_ATTRIBUTES:
361 case KVM_CAP_MP_STATE:
362 case KVM_CAP_S390_INJECT_IRQ:
363 case KVM_CAP_S390_USER_SIGP:
364 case KVM_CAP_S390_USER_STSI:
365 case KVM_CAP_S390_SKEYS:
366 case KVM_CAP_S390_IRQ_STATE:
367 case KVM_CAP_S390_USER_INSTR0:
370 case KVM_CAP_S390_MEM_OP:
373 case KVM_CAP_NR_VCPUS:
374 case KVM_CAP_MAX_VCPUS:
375 r = KVM_S390_BSCA_CPU_SLOTS;
376 if (!kvm_s390_use_sca_entries())
378 else if (sclp.has_esca && sclp.has_64bscao)
379 r = KVM_S390_ESCA_CPU_SLOTS;
381 case KVM_CAP_NR_MEMSLOTS:
382 r = KVM_USER_MEM_SLOTS;
384 case KVM_CAP_S390_COW:
385 r = MACHINE_HAS_ESOP;
387 case KVM_CAP_S390_VECTOR_REGISTERS:
390 case KVM_CAP_S390_RI:
391 r = test_facility(64);
399 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
400 struct kvm_memory_slot *memslot)
402 gfn_t cur_gfn, last_gfn;
403 unsigned long address;
404 struct gmap *gmap = kvm->arch.gmap;
406 /* Loop over all guest pages */
407 last_gfn = memslot->base_gfn + memslot->npages;
408 for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
409 address = gfn_to_hva_memslot(memslot, cur_gfn);
411 if (test_and_clear_guest_dirty(gmap->mm, address))
412 mark_page_dirty(kvm, cur_gfn);
413 if (fatal_signal_pending(current))
419 /* Section: vm related */
420 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
423 * Get (and clear) the dirty memory log for a memory slot.
425 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
426 struct kvm_dirty_log *log)
430 struct kvm_memslots *slots;
431 struct kvm_memory_slot *memslot;
434 mutex_lock(&kvm->slots_lock);
437 if (log->slot >= KVM_USER_MEM_SLOTS)
440 slots = kvm_memslots(kvm);
441 memslot = id_to_memslot(slots, log->slot);
443 if (!memslot->dirty_bitmap)
446 kvm_s390_sync_dirty_log(kvm, memslot);
447 r = kvm_get_dirty_log(kvm, log, &is_dirty);
451 /* Clear the dirty log */
453 n = kvm_dirty_bitmap_bytes(memslot);
454 memset(memslot->dirty_bitmap, 0, n);
458 mutex_unlock(&kvm->slots_lock);
462 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
465 struct kvm_vcpu *vcpu;
467 kvm_for_each_vcpu(i, vcpu, kvm) {
468 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
472 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
480 case KVM_CAP_S390_IRQCHIP:
481 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
482 kvm->arch.use_irqchip = 1;
485 case KVM_CAP_S390_USER_SIGP:
486 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
487 kvm->arch.user_sigp = 1;
490 case KVM_CAP_S390_VECTOR_REGISTERS:
491 mutex_lock(&kvm->lock);
492 if (kvm->created_vcpus) {
494 } else if (MACHINE_HAS_VX) {
495 set_kvm_facility(kvm->arch.model.fac_mask, 129);
496 set_kvm_facility(kvm->arch.model.fac_list, 129);
500 mutex_unlock(&kvm->lock);
501 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
502 r ? "(not available)" : "(success)");
504 case KVM_CAP_S390_RI:
506 mutex_lock(&kvm->lock);
507 if (kvm->created_vcpus) {
509 } else if (test_facility(64)) {
510 set_kvm_facility(kvm->arch.model.fac_mask, 64);
511 set_kvm_facility(kvm->arch.model.fac_list, 64);
514 mutex_unlock(&kvm->lock);
515 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
516 r ? "(not available)" : "(success)");
518 case KVM_CAP_S390_USER_STSI:
519 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
520 kvm->arch.user_stsi = 1;
523 case KVM_CAP_S390_USER_INSTR0:
524 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
525 kvm->arch.user_instr0 = 1;
526 icpt_operexc_on_all_vcpus(kvm);
536 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
540 switch (attr->attr) {
541 case KVM_S390_VM_MEM_LIMIT_SIZE:
543 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
544 kvm->arch.mem_limit);
545 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
555 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
559 switch (attr->attr) {
560 case KVM_S390_VM_MEM_ENABLE_CMMA:
566 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
567 mutex_lock(&kvm->lock);
568 if (!kvm->created_vcpus) {
569 kvm->arch.use_cmma = 1;
572 mutex_unlock(&kvm->lock);
574 case KVM_S390_VM_MEM_CLR_CMMA:
579 if (!kvm->arch.use_cmma)
582 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
583 mutex_lock(&kvm->lock);
584 idx = srcu_read_lock(&kvm->srcu);
585 s390_reset_cmma(kvm->arch.gmap->mm);
586 srcu_read_unlock(&kvm->srcu, idx);
587 mutex_unlock(&kvm->lock);
590 case KVM_S390_VM_MEM_LIMIT_SIZE: {
591 unsigned long new_limit;
593 if (kvm_is_ucontrol(kvm))
596 if (get_user(new_limit, (u64 __user *)attr->addr))
599 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
600 new_limit > kvm->arch.mem_limit)
606 /* gmap_create takes last usable address */
607 if (new_limit != KVM_S390_NO_MEM_LIMIT)
611 mutex_lock(&kvm->lock);
612 if (!kvm->created_vcpus) {
613 /* gmap_create will round the limit up */
614 struct gmap *new = gmap_create(current->mm, new_limit);
619 gmap_remove(kvm->arch.gmap);
621 kvm->arch.gmap = new;
625 mutex_unlock(&kvm->lock);
626 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
627 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
628 (void *) kvm->arch.gmap->asce);
638 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
640 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
642 struct kvm_vcpu *vcpu;
645 if (!test_kvm_facility(kvm, 76))
648 mutex_lock(&kvm->lock);
649 switch (attr->attr) {
650 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
652 kvm->arch.crypto.crycb->aes_wrapping_key_mask,
653 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
654 kvm->arch.crypto.aes_kw = 1;
655 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
657 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
659 kvm->arch.crypto.crycb->dea_wrapping_key_mask,
660 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
661 kvm->arch.crypto.dea_kw = 1;
662 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
664 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
665 kvm->arch.crypto.aes_kw = 0;
666 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
667 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
668 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
670 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
671 kvm->arch.crypto.dea_kw = 0;
672 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
673 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
674 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
677 mutex_unlock(&kvm->lock);
681 kvm_for_each_vcpu(i, vcpu, kvm) {
682 kvm_s390_vcpu_crypto_setup(vcpu);
685 mutex_unlock(&kvm->lock);
689 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
693 if (copy_from_user(>od_high, (void __user *)attr->addr,
699 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
704 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
708 if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod)))
711 kvm_s390_set_tod_clock(kvm, gtod);
712 VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
716 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
723 switch (attr->attr) {
724 case KVM_S390_VM_TOD_HIGH:
725 ret = kvm_s390_set_tod_high(kvm, attr);
727 case KVM_S390_VM_TOD_LOW:
728 ret = kvm_s390_set_tod_low(kvm, attr);
737 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
741 if (copy_to_user((void __user *)attr->addr, >od_high,
744 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
749 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
753 gtod = kvm_s390_get_tod_clock_fast(kvm);
754 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod)))
756 VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
761 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
768 switch (attr->attr) {
769 case KVM_S390_VM_TOD_HIGH:
770 ret = kvm_s390_get_tod_high(kvm, attr);
772 case KVM_S390_VM_TOD_LOW:
773 ret = kvm_s390_get_tod_low(kvm, attr);
782 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
784 struct kvm_s390_vm_cpu_processor *proc;
785 u16 lowest_ibc, unblocked_ibc;
788 mutex_lock(&kvm->lock);
789 if (kvm->created_vcpus) {
793 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
798 if (!copy_from_user(proc, (void __user *)attr->addr,
800 kvm->arch.model.cpuid = proc->cpuid;
801 lowest_ibc = sclp.ibc >> 16 & 0xfff;
802 unblocked_ibc = sclp.ibc & 0xfff;
803 if (lowest_ibc && proc->ibc) {
804 if (proc->ibc > unblocked_ibc)
805 kvm->arch.model.ibc = unblocked_ibc;
806 else if (proc->ibc < lowest_ibc)
807 kvm->arch.model.ibc = lowest_ibc;
809 kvm->arch.model.ibc = proc->ibc;
811 memcpy(kvm->arch.model.fac_list, proc->fac_list,
812 S390_ARCH_FAC_LIST_SIZE_BYTE);
817 mutex_unlock(&kvm->lock);
821 static int kvm_s390_set_processor_feat(struct kvm *kvm,
822 struct kvm_device_attr *attr)
824 struct kvm_s390_vm_cpu_feat data;
827 if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
829 if (!bitmap_subset((unsigned long *) data.feat,
830 kvm_s390_available_cpu_feat,
831 KVM_S390_VM_CPU_FEAT_NR_BITS))
834 mutex_lock(&kvm->lock);
835 if (!atomic_read(&kvm->online_vcpus)) {
836 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
837 KVM_S390_VM_CPU_FEAT_NR_BITS);
840 mutex_unlock(&kvm->lock);
844 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
845 struct kvm_device_attr *attr)
848 * Once supported by kernel + hw, we have to store the subfunctions
849 * in kvm->arch and remember that user space configured them.
854 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
858 switch (attr->attr) {
859 case KVM_S390_VM_CPU_PROCESSOR:
860 ret = kvm_s390_set_processor(kvm, attr);
862 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
863 ret = kvm_s390_set_processor_feat(kvm, attr);
865 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
866 ret = kvm_s390_set_processor_subfunc(kvm, attr);
872 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
874 struct kvm_s390_vm_cpu_processor *proc;
877 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
882 proc->cpuid = kvm->arch.model.cpuid;
883 proc->ibc = kvm->arch.model.ibc;
884 memcpy(&proc->fac_list, kvm->arch.model.fac_list,
885 S390_ARCH_FAC_LIST_SIZE_BYTE);
886 if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
893 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
895 struct kvm_s390_vm_cpu_machine *mach;
898 mach = kzalloc(sizeof(*mach), GFP_KERNEL);
903 get_cpu_id((struct cpuid *) &mach->cpuid);
904 mach->ibc = sclp.ibc;
905 memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
906 S390_ARCH_FAC_LIST_SIZE_BYTE);
907 memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
908 S390_ARCH_FAC_LIST_SIZE_BYTE);
909 if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
916 static int kvm_s390_get_processor_feat(struct kvm *kvm,
917 struct kvm_device_attr *attr)
919 struct kvm_s390_vm_cpu_feat data;
921 bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
922 KVM_S390_VM_CPU_FEAT_NR_BITS);
923 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
928 static int kvm_s390_get_machine_feat(struct kvm *kvm,
929 struct kvm_device_attr *attr)
931 struct kvm_s390_vm_cpu_feat data;
933 bitmap_copy((unsigned long *) data.feat,
934 kvm_s390_available_cpu_feat,
935 KVM_S390_VM_CPU_FEAT_NR_BITS);
936 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
941 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
942 struct kvm_device_attr *attr)
945 * Once we can actually configure subfunctions (kernel + hw support),
946 * we have to check if they were already set by user space, if so copy
947 * them from kvm->arch.
952 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
953 struct kvm_device_attr *attr)
955 if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
956 sizeof(struct kvm_s390_vm_cpu_subfunc)))
960 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
964 switch (attr->attr) {
965 case KVM_S390_VM_CPU_PROCESSOR:
966 ret = kvm_s390_get_processor(kvm, attr);
968 case KVM_S390_VM_CPU_MACHINE:
969 ret = kvm_s390_get_machine(kvm, attr);
971 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
972 ret = kvm_s390_get_processor_feat(kvm, attr);
974 case KVM_S390_VM_CPU_MACHINE_FEAT:
975 ret = kvm_s390_get_machine_feat(kvm, attr);
977 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
978 ret = kvm_s390_get_processor_subfunc(kvm, attr);
980 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
981 ret = kvm_s390_get_machine_subfunc(kvm, attr);
987 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
991 switch (attr->group) {
992 case KVM_S390_VM_MEM_CTRL:
993 ret = kvm_s390_set_mem_control(kvm, attr);
995 case KVM_S390_VM_TOD:
996 ret = kvm_s390_set_tod(kvm, attr);
998 case KVM_S390_VM_CPU_MODEL:
999 ret = kvm_s390_set_cpu_model(kvm, attr);
1001 case KVM_S390_VM_CRYPTO:
1002 ret = kvm_s390_vm_set_crypto(kvm, attr);
1012 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1016 switch (attr->group) {
1017 case KVM_S390_VM_MEM_CTRL:
1018 ret = kvm_s390_get_mem_control(kvm, attr);
1020 case KVM_S390_VM_TOD:
1021 ret = kvm_s390_get_tod(kvm, attr);
1023 case KVM_S390_VM_CPU_MODEL:
1024 ret = kvm_s390_get_cpu_model(kvm, attr);
1034 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1038 switch (attr->group) {
1039 case KVM_S390_VM_MEM_CTRL:
1040 switch (attr->attr) {
1041 case KVM_S390_VM_MEM_ENABLE_CMMA:
1042 case KVM_S390_VM_MEM_CLR_CMMA:
1043 ret = sclp.has_cmma ? 0 : -ENXIO;
1045 case KVM_S390_VM_MEM_LIMIT_SIZE:
1053 case KVM_S390_VM_TOD:
1054 switch (attr->attr) {
1055 case KVM_S390_VM_TOD_LOW:
1056 case KVM_S390_VM_TOD_HIGH:
1064 case KVM_S390_VM_CPU_MODEL:
1065 switch (attr->attr) {
1066 case KVM_S390_VM_CPU_PROCESSOR:
1067 case KVM_S390_VM_CPU_MACHINE:
1068 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1069 case KVM_S390_VM_CPU_MACHINE_FEAT:
1070 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1073 /* configuring subfunctions is not supported yet */
1074 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1080 case KVM_S390_VM_CRYPTO:
1081 switch (attr->attr) {
1082 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1083 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1084 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1085 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1101 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1107 if (args->flags != 0)
1110 /* Is this guest using storage keys? */
1111 if (!mm_use_skey(current->mm))
1112 return KVM_S390_GET_SKEYS_NONE;
1114 /* Enforce sane limit on memory allocation */
1115 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1118 keys = kmalloc_array(args->count, sizeof(uint8_t),
1119 GFP_KERNEL | __GFP_NOWARN);
1121 keys = vmalloc(sizeof(uint8_t) * args->count);
1125 down_read(¤t->mm->mmap_sem);
1126 for (i = 0; i < args->count; i++) {
1127 hva = gfn_to_hva(kvm, args->start_gfn + i);
1128 if (kvm_is_error_hva(hva)) {
1133 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1137 up_read(¤t->mm->mmap_sem);
1140 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1141 sizeof(uint8_t) * args->count);
1150 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1156 if (args->flags != 0)
1159 /* Enforce sane limit on memory allocation */
1160 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1163 keys = kmalloc_array(args->count, sizeof(uint8_t),
1164 GFP_KERNEL | __GFP_NOWARN);
1166 keys = vmalloc(sizeof(uint8_t) * args->count);
1170 r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1171 sizeof(uint8_t) * args->count);
1177 /* Enable storage key handling for the guest */
1178 r = s390_enable_skey();
1182 down_read(¤t->mm->mmap_sem);
1183 for (i = 0; i < args->count; i++) {
1184 hva = gfn_to_hva(kvm, args->start_gfn + i);
1185 if (kvm_is_error_hva(hva)) {
1190 /* Lowest order bit is reserved */
1191 if (keys[i] & 0x01) {
1196 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1200 up_read(¤t->mm->mmap_sem);
1206 long kvm_arch_vm_ioctl(struct file *filp,
1207 unsigned int ioctl, unsigned long arg)
1209 struct kvm *kvm = filp->private_data;
1210 void __user *argp = (void __user *)arg;
1211 struct kvm_device_attr attr;
1215 case KVM_S390_INTERRUPT: {
1216 struct kvm_s390_interrupt s390int;
1219 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1221 r = kvm_s390_inject_vm(kvm, &s390int);
1224 case KVM_ENABLE_CAP: {
1225 struct kvm_enable_cap cap;
1227 if (copy_from_user(&cap, argp, sizeof(cap)))
1229 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1232 case KVM_CREATE_IRQCHIP: {
1233 struct kvm_irq_routing_entry routing;
1236 if (kvm->arch.use_irqchip) {
1237 /* Set up dummy routing. */
1238 memset(&routing, 0, sizeof(routing));
1239 r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1243 case KVM_SET_DEVICE_ATTR: {
1245 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1247 r = kvm_s390_vm_set_attr(kvm, &attr);
1250 case KVM_GET_DEVICE_ATTR: {
1252 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1254 r = kvm_s390_vm_get_attr(kvm, &attr);
1257 case KVM_HAS_DEVICE_ATTR: {
1259 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1261 r = kvm_s390_vm_has_attr(kvm, &attr);
1264 case KVM_S390_GET_SKEYS: {
1265 struct kvm_s390_skeys args;
1268 if (copy_from_user(&args, argp,
1269 sizeof(struct kvm_s390_skeys)))
1271 r = kvm_s390_get_skeys(kvm, &args);
1274 case KVM_S390_SET_SKEYS: {
1275 struct kvm_s390_skeys args;
1278 if (copy_from_user(&args, argp,
1279 sizeof(struct kvm_s390_skeys)))
1281 r = kvm_s390_set_skeys(kvm, &args);
1291 static int kvm_s390_query_ap_config(u8 *config)
1293 u32 fcn_code = 0x04000000UL;
1296 memset(config, 0, 128);
1300 ".long 0xb2af0000\n" /* PQAP(QCI) */
1306 : "r" (fcn_code), "r" (config)
1307 : "cc", "0", "2", "memory"
1313 static int kvm_s390_apxa_installed(void)
1318 if (test_facility(12)) {
1319 cc = kvm_s390_query_ap_config(config);
1322 pr_err("PQAP(QCI) failed with cc=%d", cc);
1324 return config[0] & 0x40;
1330 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1332 kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1334 if (kvm_s390_apxa_installed())
1335 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1337 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1340 static u64 kvm_s390_get_initial_cpuid(void)
1345 cpuid.version = 0xff;
1346 return *((u64 *) &cpuid);
1349 static void kvm_s390_crypto_init(struct kvm *kvm)
1351 if (!test_kvm_facility(kvm, 76))
1354 kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1355 kvm_s390_set_crycb_format(kvm);
1357 /* Enable AES/DEA protected key functions by default */
1358 kvm->arch.crypto.aes_kw = 1;
1359 kvm->arch.crypto.dea_kw = 1;
1360 get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1361 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1362 get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1363 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1366 static void sca_dispose(struct kvm *kvm)
1368 if (kvm->arch.use_esca)
1369 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1371 free_page((unsigned long)(kvm->arch.sca));
1372 kvm->arch.sca = NULL;
1375 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1377 gfp_t alloc_flags = GFP_KERNEL;
1379 char debug_name[16];
1380 static unsigned long sca_offset;
1383 #ifdef CONFIG_KVM_S390_UCONTROL
1384 if (type & ~KVM_VM_S390_UCONTROL)
1386 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1393 rc = s390_enable_sie();
1399 ratelimit_state_init(&kvm->arch.sthyi_limit, 5 * HZ, 500);
1401 kvm->arch.use_esca = 0; /* start with basic SCA */
1402 if (!sclp.has_64bscao)
1403 alloc_flags |= GFP_DMA;
1404 rwlock_init(&kvm->arch.sca_lock);
1405 kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1408 spin_lock(&kvm_lock);
1410 if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1412 kvm->arch.sca = (struct bsca_block *)
1413 ((char *) kvm->arch.sca + sca_offset);
1414 spin_unlock(&kvm_lock);
1416 sprintf(debug_name, "kvm-%u", current->pid);
1418 kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1422 kvm->arch.sie_page2 =
1423 (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1424 if (!kvm->arch.sie_page2)
1427 /* Populate the facility mask initially. */
1428 memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1429 S390_ARCH_FAC_LIST_SIZE_BYTE);
1430 for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1431 if (i < kvm_s390_fac_list_mask_size())
1432 kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1434 kvm->arch.model.fac_mask[i] = 0UL;
1437 /* Populate the facility list initially. */
1438 kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1439 memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1440 S390_ARCH_FAC_LIST_SIZE_BYTE);
1442 set_kvm_facility(kvm->arch.model.fac_mask, 74);
1443 set_kvm_facility(kvm->arch.model.fac_list, 74);
1445 kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
1446 kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1448 kvm_s390_crypto_init(kvm);
1450 spin_lock_init(&kvm->arch.float_int.lock);
1451 for (i = 0; i < FIRQ_LIST_COUNT; i++)
1452 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1453 init_waitqueue_head(&kvm->arch.ipte_wq);
1454 mutex_init(&kvm->arch.ipte_mutex);
1456 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1457 VM_EVENT(kvm, 3, "vm created with type %lu", type);
1459 if (type & KVM_VM_S390_UCONTROL) {
1460 kvm->arch.gmap = NULL;
1461 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1463 if (sclp.hamax == U64_MAX)
1464 kvm->arch.mem_limit = TASK_MAX_SIZE;
1466 kvm->arch.mem_limit = min_t(unsigned long, TASK_MAX_SIZE,
1468 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
1469 if (!kvm->arch.gmap)
1471 kvm->arch.gmap->private = kvm;
1472 kvm->arch.gmap->pfault_enabled = 0;
1475 kvm->arch.css_support = 0;
1476 kvm->arch.use_irqchip = 0;
1477 kvm->arch.epoch = 0;
1479 spin_lock_init(&kvm->arch.start_stop_lock);
1480 kvm_s390_vsie_init(kvm);
1481 KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1485 free_page((unsigned long)kvm->arch.sie_page2);
1486 debug_unregister(kvm->arch.dbf);
1488 KVM_EVENT(3, "creation of vm failed: %d", rc);
1492 bool kvm_arch_has_vcpu_debugfs(void)
1497 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
1502 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1504 VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1505 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1506 kvm_s390_clear_local_irqs(vcpu);
1507 kvm_clear_async_pf_completion_queue(vcpu);
1508 if (!kvm_is_ucontrol(vcpu->kvm))
1511 if (kvm_is_ucontrol(vcpu->kvm))
1512 gmap_remove(vcpu->arch.gmap);
1514 if (vcpu->kvm->arch.use_cmma)
1515 kvm_s390_vcpu_unsetup_cmma(vcpu);
1516 free_page((unsigned long)(vcpu->arch.sie_block));
1518 kvm_vcpu_uninit(vcpu);
1519 kmem_cache_free(kvm_vcpu_cache, vcpu);
1522 static void kvm_free_vcpus(struct kvm *kvm)
1525 struct kvm_vcpu *vcpu;
1527 kvm_for_each_vcpu(i, vcpu, kvm)
1528 kvm_arch_vcpu_destroy(vcpu);
1530 mutex_lock(&kvm->lock);
1531 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1532 kvm->vcpus[i] = NULL;
1534 atomic_set(&kvm->online_vcpus, 0);
1535 mutex_unlock(&kvm->lock);
1538 void kvm_arch_destroy_vm(struct kvm *kvm)
1540 kvm_free_vcpus(kvm);
1542 debug_unregister(kvm->arch.dbf);
1543 free_page((unsigned long)kvm->arch.sie_page2);
1544 if (!kvm_is_ucontrol(kvm))
1545 gmap_remove(kvm->arch.gmap);
1546 kvm_s390_destroy_adapters(kvm);
1547 kvm_s390_clear_float_irqs(kvm);
1548 kvm_s390_vsie_destroy(kvm);
1549 KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
1552 /* Section: vcpu related */
1553 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1555 vcpu->arch.gmap = gmap_create(current->mm, -1UL);
1556 if (!vcpu->arch.gmap)
1558 vcpu->arch.gmap->private = vcpu->kvm;
1563 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
1565 if (!kvm_s390_use_sca_entries())
1567 read_lock(&vcpu->kvm->arch.sca_lock);
1568 if (vcpu->kvm->arch.use_esca) {
1569 struct esca_block *sca = vcpu->kvm->arch.sca;
1571 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1572 sca->cpu[vcpu->vcpu_id].sda = 0;
1574 struct bsca_block *sca = vcpu->kvm->arch.sca;
1576 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1577 sca->cpu[vcpu->vcpu_id].sda = 0;
1579 read_unlock(&vcpu->kvm->arch.sca_lock);
1582 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
1584 if (!kvm_s390_use_sca_entries()) {
1585 struct bsca_block *sca = vcpu->kvm->arch.sca;
1587 /* we still need the basic sca for the ipte control */
1588 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1589 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
1591 read_lock(&vcpu->kvm->arch.sca_lock);
1592 if (vcpu->kvm->arch.use_esca) {
1593 struct esca_block *sca = vcpu->kvm->arch.sca;
1595 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1596 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1597 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
1598 vcpu->arch.sie_block->ecb2 |= 0x04U;
1599 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1601 struct bsca_block *sca = vcpu->kvm->arch.sca;
1603 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1604 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1605 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
1606 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1608 read_unlock(&vcpu->kvm->arch.sca_lock);
1611 /* Basic SCA to Extended SCA data copy routines */
1612 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
1615 d->sigp_ctrl.c = s->sigp_ctrl.c;
1616 d->sigp_ctrl.scn = s->sigp_ctrl.scn;
1619 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
1623 d->ipte_control = s->ipte_control;
1625 for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
1626 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
1629 static int sca_switch_to_extended(struct kvm *kvm)
1631 struct bsca_block *old_sca = kvm->arch.sca;
1632 struct esca_block *new_sca;
1633 struct kvm_vcpu *vcpu;
1634 unsigned int vcpu_idx;
1637 new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
1641 scaoh = (u32)((u64)(new_sca) >> 32);
1642 scaol = (u32)(u64)(new_sca) & ~0x3fU;
1644 kvm_s390_vcpu_block_all(kvm);
1645 write_lock(&kvm->arch.sca_lock);
1647 sca_copy_b_to_e(new_sca, old_sca);
1649 kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
1650 vcpu->arch.sie_block->scaoh = scaoh;
1651 vcpu->arch.sie_block->scaol = scaol;
1652 vcpu->arch.sie_block->ecb2 |= 0x04U;
1654 kvm->arch.sca = new_sca;
1655 kvm->arch.use_esca = 1;
1657 write_unlock(&kvm->arch.sca_lock);
1658 kvm_s390_vcpu_unblock_all(kvm);
1660 free_page((unsigned long)old_sca);
1662 VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
1663 old_sca, kvm->arch.sca);
1667 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
1671 if (!kvm_s390_use_sca_entries()) {
1672 if (id < KVM_MAX_VCPUS)
1676 if (id < KVM_S390_BSCA_CPU_SLOTS)
1678 if (!sclp.has_esca || !sclp.has_64bscao)
1681 mutex_lock(&kvm->lock);
1682 rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
1683 mutex_unlock(&kvm->lock);
1685 return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
1688 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1690 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1691 kvm_clear_async_pf_completion_queue(vcpu);
1692 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
1698 kvm_s390_set_prefix(vcpu, 0);
1699 if (test_kvm_facility(vcpu->kvm, 64))
1700 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
1701 /* fprs can be synchronized via vrs, even if the guest has no vx. With
1702 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
1705 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
1707 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
1709 if (kvm_is_ucontrol(vcpu->kvm))
1710 return __kvm_ucontrol_vcpu_init(vcpu);
1715 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1716 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1718 WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
1719 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1720 vcpu->arch.cputm_start = get_tod_clock_fast();
1721 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1724 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1725 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1727 WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
1728 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1729 vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1730 vcpu->arch.cputm_start = 0;
1731 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1734 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1735 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1737 WARN_ON_ONCE(vcpu->arch.cputm_enabled);
1738 vcpu->arch.cputm_enabled = true;
1739 __start_cpu_timer_accounting(vcpu);
1742 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1743 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1745 WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
1746 __stop_cpu_timer_accounting(vcpu);
1747 vcpu->arch.cputm_enabled = false;
1750 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1752 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1753 __enable_cpu_timer_accounting(vcpu);
1757 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1759 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1760 __disable_cpu_timer_accounting(vcpu);
1764 /* set the cpu timer - may only be called from the VCPU thread itself */
1765 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
1767 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1768 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1769 if (vcpu->arch.cputm_enabled)
1770 vcpu->arch.cputm_start = get_tod_clock_fast();
1771 vcpu->arch.sie_block->cputm = cputm;
1772 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1776 /* update and get the cpu timer - can also be called from other VCPU threads */
1777 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
1782 if (unlikely(!vcpu->arch.cputm_enabled))
1783 return vcpu->arch.sie_block->cputm;
1785 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1787 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
1789 * If the writer would ever execute a read in the critical
1790 * section, e.g. in irq context, we have a deadlock.
1792 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
1793 value = vcpu->arch.sie_block->cputm;
1794 /* if cputm_start is 0, accounting is being started/stopped */
1795 if (likely(vcpu->arch.cputm_start))
1796 value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1797 } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
1802 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1804 /* Save host register state */
1806 vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
1807 vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
1810 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
1812 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
1813 current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
1814 if (test_fp_ctl(current->thread.fpu.fpc))
1815 /* User space provided an invalid FPC, let's clear it */
1816 current->thread.fpu.fpc = 0;
1818 save_access_regs(vcpu->arch.host_acrs);
1819 restore_access_regs(vcpu->run->s.regs.acrs);
1820 gmap_enable(vcpu->arch.enabled_gmap);
1821 atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1822 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1823 __start_cpu_timer_accounting(vcpu);
1827 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1830 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1831 __stop_cpu_timer_accounting(vcpu);
1832 atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1833 vcpu->arch.enabled_gmap = gmap_get_enabled();
1834 gmap_disable(vcpu->arch.enabled_gmap);
1836 /* Save guest register state */
1838 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
1840 /* Restore host register state */
1841 current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
1842 current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
1844 save_access_regs(vcpu->run->s.regs.acrs);
1845 restore_access_regs(vcpu->arch.host_acrs);
1848 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
1850 /* this equals initial cpu reset in pop, but we don't switch to ESA */
1851 vcpu->arch.sie_block->gpsw.mask = 0UL;
1852 vcpu->arch.sie_block->gpsw.addr = 0UL;
1853 kvm_s390_set_prefix(vcpu, 0);
1854 kvm_s390_set_cpu_timer(vcpu, 0);
1855 vcpu->arch.sie_block->ckc = 0UL;
1856 vcpu->arch.sie_block->todpr = 0;
1857 memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
1858 vcpu->arch.sie_block->gcr[0] = 0xE0UL;
1859 vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
1860 /* make sure the new fpc will be lazily loaded */
1862 current->thread.fpu.fpc = 0;
1863 vcpu->arch.sie_block->gbea = 1;
1864 vcpu->arch.sie_block->pp = 0;
1865 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1866 kvm_clear_async_pf_completion_queue(vcpu);
1867 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
1868 kvm_s390_vcpu_stop(vcpu);
1869 kvm_s390_clear_local_irqs(vcpu);
1872 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
1874 mutex_lock(&vcpu->kvm->lock);
1876 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
1878 mutex_unlock(&vcpu->kvm->lock);
1879 if (!kvm_is_ucontrol(vcpu->kvm)) {
1880 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
1883 if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
1884 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
1885 /* make vcpu_load load the right gmap on the first trigger */
1886 vcpu->arch.enabled_gmap = vcpu->arch.gmap;
1889 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
1891 if (!test_kvm_facility(vcpu->kvm, 76))
1894 vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
1896 if (vcpu->kvm->arch.crypto.aes_kw)
1897 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
1898 if (vcpu->kvm->arch.crypto.dea_kw)
1899 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
1901 vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
1904 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
1906 free_page(vcpu->arch.sie_block->cbrlo);
1907 vcpu->arch.sie_block->cbrlo = 0;
1910 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
1912 vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
1913 if (!vcpu->arch.sie_block->cbrlo)
1916 vcpu->arch.sie_block->ecb2 |= 0x80;
1917 vcpu->arch.sie_block->ecb2 &= ~0x08;
1921 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
1923 struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
1925 vcpu->arch.sie_block->ibc = model->ibc;
1926 if (test_kvm_facility(vcpu->kvm, 7))
1927 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
1930 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1934 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
1938 if (test_kvm_facility(vcpu->kvm, 78))
1939 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
1940 else if (test_kvm_facility(vcpu->kvm, 8))
1941 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
1943 kvm_s390_vcpu_setup_model(vcpu);
1945 /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
1946 if (MACHINE_HAS_ESOP)
1947 vcpu->arch.sie_block->ecb |= 0x02;
1948 if (test_kvm_facility(vcpu->kvm, 9))
1949 vcpu->arch.sie_block->ecb |= 0x04;
1950 if (test_kvm_facility(vcpu->kvm, 73))
1951 vcpu->arch.sie_block->ecb |= 0x10;
1953 if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
1954 vcpu->arch.sie_block->ecb2 |= 0x08;
1955 vcpu->arch.sie_block->eca = 0x1002000U;
1957 vcpu->arch.sie_block->eca |= 0x80000000U;
1959 vcpu->arch.sie_block->eca |= 0x40000000U;
1961 vcpu->arch.sie_block->eca |= 1;
1962 if (sclp.has_sigpif)
1963 vcpu->arch.sie_block->eca |= 0x10000000U;
1964 if (test_kvm_facility(vcpu->kvm, 129)) {
1965 vcpu->arch.sie_block->eca |= 0x00020000;
1966 vcpu->arch.sie_block->ecd |= 0x20000000;
1968 vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
1969 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
1971 if (vcpu->kvm->arch.use_cmma) {
1972 rc = kvm_s390_vcpu_setup_cmma(vcpu);
1976 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1977 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
1979 kvm_s390_vcpu_crypto_setup(vcpu);
1984 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
1987 struct kvm_vcpu *vcpu;
1988 struct sie_page *sie_page;
1991 if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
1996 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2000 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2004 vcpu->arch.sie_block = &sie_page->sie_block;
2005 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2007 /* the real guest size will always be smaller than msl */
2008 vcpu->arch.sie_block->mso = 0;
2009 vcpu->arch.sie_block->msl = sclp.hamax;
2011 vcpu->arch.sie_block->icpua = id;
2012 spin_lock_init(&vcpu->arch.local_int.lock);
2013 vcpu->arch.local_int.float_int = &kvm->arch.float_int;
2014 vcpu->arch.local_int.wq = &vcpu->wq;
2015 vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
2016 seqcount_init(&vcpu->arch.cputm_seqcount);
2018 rc = kvm_vcpu_init(vcpu, kvm, id);
2020 goto out_free_sie_block;
2021 VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2022 vcpu->arch.sie_block);
2023 trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2027 free_page((unsigned long)(vcpu->arch.sie_block));
2029 kmem_cache_free(kvm_vcpu_cache, vcpu);
2034 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2036 return kvm_s390_vcpu_has_irq(vcpu, 0);
2039 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2041 atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2045 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2047 atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2050 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2052 atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2056 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2058 atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2062 * Kick a guest cpu out of SIE and wait until SIE is not running.
2063 * If the CPU is not running (e.g. waiting as idle) the function will
2064 * return immediately. */
2065 void exit_sie(struct kvm_vcpu *vcpu)
2067 atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
2068 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2072 /* Kick a guest cpu out of SIE to process a request synchronously */
2073 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2075 kvm_make_request(req, vcpu);
2076 kvm_s390_vcpu_request(vcpu);
2079 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2082 struct kvm *kvm = gmap->private;
2083 struct kvm_vcpu *vcpu;
2084 unsigned long prefix;
2087 if (gmap_is_shadow(gmap))
2089 if (start >= 1UL << 31)
2090 /* We are only interested in prefix pages */
2092 kvm_for_each_vcpu(i, vcpu, kvm) {
2093 /* match against both prefix pages */
2094 prefix = kvm_s390_get_prefix(vcpu);
2095 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2096 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2098 kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2103 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2105 /* kvm common code refers to this, but never calls it */
2110 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2111 struct kvm_one_reg *reg)
2116 case KVM_REG_S390_TODPR:
2117 r = put_user(vcpu->arch.sie_block->todpr,
2118 (u32 __user *)reg->addr);
2120 case KVM_REG_S390_EPOCHDIFF:
2121 r = put_user(vcpu->arch.sie_block->epoch,
2122 (u64 __user *)reg->addr);
2124 case KVM_REG_S390_CPU_TIMER:
2125 r = put_user(kvm_s390_get_cpu_timer(vcpu),
2126 (u64 __user *)reg->addr);
2128 case KVM_REG_S390_CLOCK_COMP:
2129 r = put_user(vcpu->arch.sie_block->ckc,
2130 (u64 __user *)reg->addr);
2132 case KVM_REG_S390_PFTOKEN:
2133 r = put_user(vcpu->arch.pfault_token,
2134 (u64 __user *)reg->addr);
2136 case KVM_REG_S390_PFCOMPARE:
2137 r = put_user(vcpu->arch.pfault_compare,
2138 (u64 __user *)reg->addr);
2140 case KVM_REG_S390_PFSELECT:
2141 r = put_user(vcpu->arch.pfault_select,
2142 (u64 __user *)reg->addr);
2144 case KVM_REG_S390_PP:
2145 r = put_user(vcpu->arch.sie_block->pp,
2146 (u64 __user *)reg->addr);
2148 case KVM_REG_S390_GBEA:
2149 r = put_user(vcpu->arch.sie_block->gbea,
2150 (u64 __user *)reg->addr);
2159 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2160 struct kvm_one_reg *reg)
2166 case KVM_REG_S390_TODPR:
2167 r = get_user(vcpu->arch.sie_block->todpr,
2168 (u32 __user *)reg->addr);
2170 case KVM_REG_S390_EPOCHDIFF:
2171 r = get_user(vcpu->arch.sie_block->epoch,
2172 (u64 __user *)reg->addr);
2174 case KVM_REG_S390_CPU_TIMER:
2175 r = get_user(val, (u64 __user *)reg->addr);
2177 kvm_s390_set_cpu_timer(vcpu, val);
2179 case KVM_REG_S390_CLOCK_COMP:
2180 r = get_user(vcpu->arch.sie_block->ckc,
2181 (u64 __user *)reg->addr);
2183 case KVM_REG_S390_PFTOKEN:
2184 r = get_user(vcpu->arch.pfault_token,
2185 (u64 __user *)reg->addr);
2186 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2187 kvm_clear_async_pf_completion_queue(vcpu);
2189 case KVM_REG_S390_PFCOMPARE:
2190 r = get_user(vcpu->arch.pfault_compare,
2191 (u64 __user *)reg->addr);
2193 case KVM_REG_S390_PFSELECT:
2194 r = get_user(vcpu->arch.pfault_select,
2195 (u64 __user *)reg->addr);
2197 case KVM_REG_S390_PP:
2198 r = get_user(vcpu->arch.sie_block->pp,
2199 (u64 __user *)reg->addr);
2201 case KVM_REG_S390_GBEA:
2202 r = get_user(vcpu->arch.sie_block->gbea,
2203 (u64 __user *)reg->addr);
2212 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2214 kvm_s390_vcpu_initial_reset(vcpu);
2218 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2220 memcpy(&vcpu->run->s.regs.gprs, ®s->gprs, sizeof(regs->gprs));
2224 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2226 memcpy(®s->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2230 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2231 struct kvm_sregs *sregs)
2233 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2234 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2235 restore_access_regs(vcpu->run->s.regs.acrs);
2239 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2240 struct kvm_sregs *sregs)
2242 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2243 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2247 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2249 /* make sure the new values will be lazily loaded */
2251 if (test_fp_ctl(fpu->fpc))
2253 current->thread.fpu.fpc = fpu->fpc;
2255 convert_fp_to_vx(current->thread.fpu.vxrs, (freg_t *)fpu->fprs);
2257 memcpy(current->thread.fpu.fprs, &fpu->fprs, sizeof(fpu->fprs));
2261 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2263 /* make sure we have the latest values */
2266 convert_vx_to_fp((freg_t *)fpu->fprs, current->thread.fpu.vxrs);
2268 memcpy(fpu->fprs, current->thread.fpu.fprs, sizeof(fpu->fprs));
2269 fpu->fpc = current->thread.fpu.fpc;
2273 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2277 if (!is_vcpu_stopped(vcpu))
2280 vcpu->run->psw_mask = psw.mask;
2281 vcpu->run->psw_addr = psw.addr;
2286 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2287 struct kvm_translation *tr)
2289 return -EINVAL; /* not implemented yet */
2292 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2293 KVM_GUESTDBG_USE_HW_BP | \
2294 KVM_GUESTDBG_ENABLE)
2296 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2297 struct kvm_guest_debug *dbg)
2301 vcpu->guest_debug = 0;
2302 kvm_s390_clear_bp_data(vcpu);
2304 if (dbg->control & ~VALID_GUESTDBG_FLAGS)
2306 if (!sclp.has_gpere)
2309 if (dbg->control & KVM_GUESTDBG_ENABLE) {
2310 vcpu->guest_debug = dbg->control;
2311 /* enforce guest PER */
2312 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2314 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2315 rc = kvm_s390_import_bp_data(vcpu, dbg);
2317 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2318 vcpu->arch.guestdbg.last_bp = 0;
2322 vcpu->guest_debug = 0;
2323 kvm_s390_clear_bp_data(vcpu);
2324 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2330 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2331 struct kvm_mp_state *mp_state)
2333 /* CHECK_STOP and LOAD are not supported yet */
2334 return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2335 KVM_MP_STATE_OPERATING;
2338 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2339 struct kvm_mp_state *mp_state)
2343 /* user space knows about this interface - let it control the state */
2344 vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2346 switch (mp_state->mp_state) {
2347 case KVM_MP_STATE_STOPPED:
2348 kvm_s390_vcpu_stop(vcpu);
2350 case KVM_MP_STATE_OPERATING:
2351 kvm_s390_vcpu_start(vcpu);
2353 case KVM_MP_STATE_LOAD:
2354 case KVM_MP_STATE_CHECK_STOP:
2355 /* fall through - CHECK_STOP and LOAD are not supported yet */
2363 static bool ibs_enabled(struct kvm_vcpu *vcpu)
2365 return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
2368 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2371 kvm_s390_vcpu_request_handled(vcpu);
2372 if (!vcpu->requests)
2375 * We use MMU_RELOAD just to re-arm the ipte notifier for the
2376 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
2377 * This ensures that the ipte instruction for this request has
2378 * already finished. We might race against a second unmapper that
2379 * wants to set the blocking bit. Lets just retry the request loop.
2381 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2383 rc = gmap_mprotect_notify(vcpu->arch.gmap,
2384 kvm_s390_get_prefix(vcpu),
2385 PAGE_SIZE * 2, PROT_WRITE);
2387 kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
2393 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2394 vcpu->arch.sie_block->ihcpu = 0xffff;
2398 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
2399 if (!ibs_enabled(vcpu)) {
2400 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
2401 atomic_or(CPUSTAT_IBS,
2402 &vcpu->arch.sie_block->cpuflags);
2407 if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2408 if (ibs_enabled(vcpu)) {
2409 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2410 atomic_andnot(CPUSTAT_IBS,
2411 &vcpu->arch.sie_block->cpuflags);
2416 if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
2417 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2421 /* nothing to do, just clear the request */
2422 clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
2427 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2429 struct kvm_vcpu *vcpu;
2432 mutex_lock(&kvm->lock);
2434 kvm->arch.epoch = tod - get_tod_clock();
2435 kvm_s390_vcpu_block_all(kvm);
2436 kvm_for_each_vcpu(i, vcpu, kvm)
2437 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2438 kvm_s390_vcpu_unblock_all(kvm);
2440 mutex_unlock(&kvm->lock);
2444 * kvm_arch_fault_in_page - fault-in guest page if necessary
2445 * @vcpu: The corresponding virtual cpu
2446 * @gpa: Guest physical address
2447 * @writable: Whether the page should be writable or not
2449 * Make sure that a guest page has been faulted-in on the host.
2451 * Return: Zero on success, negative error code otherwise.
2453 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
2455 return gmap_fault(vcpu->arch.gmap, gpa,
2456 writable ? FAULT_FLAG_WRITE : 0);
2459 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
2460 unsigned long token)
2462 struct kvm_s390_interrupt inti;
2463 struct kvm_s390_irq irq;
2466 irq.u.ext.ext_params2 = token;
2467 irq.type = KVM_S390_INT_PFAULT_INIT;
2468 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
2470 inti.type = KVM_S390_INT_PFAULT_DONE;
2471 inti.parm64 = token;
2472 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
2476 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
2477 struct kvm_async_pf *work)
2479 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
2480 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
2483 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
2484 struct kvm_async_pf *work)
2486 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
2487 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
2490 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
2491 struct kvm_async_pf *work)
2493 /* s390 will always inject the page directly */
2496 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
2499 * s390 will always inject the page directly,
2500 * but we still want check_async_completion to cleanup
2505 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
2508 struct kvm_arch_async_pf arch;
2511 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2513 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
2514 vcpu->arch.pfault_compare)
2516 if (psw_extint_disabled(vcpu))
2518 if (kvm_s390_vcpu_has_irq(vcpu, 0))
2520 if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
2522 if (!vcpu->arch.gmap->pfault_enabled)
2525 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
2526 hva += current->thread.gmap_addr & ~PAGE_MASK;
2527 if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
2530 rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
2534 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
2539 * On s390 notifications for arriving pages will be delivered directly
2540 * to the guest but the house keeping for completed pfaults is
2541 * handled outside the worker.
2543 kvm_check_async_pf_completion(vcpu);
2545 vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
2546 vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
2551 if (test_cpu_flag(CIF_MCCK_PENDING))
2554 if (!kvm_is_ucontrol(vcpu->kvm)) {
2555 rc = kvm_s390_deliver_pending_interrupts(vcpu);
2560 rc = kvm_s390_handle_requests(vcpu);
2564 if (guestdbg_enabled(vcpu)) {
2565 kvm_s390_backup_guest_per_regs(vcpu);
2566 kvm_s390_patch_guest_per_regs(vcpu);
2569 vcpu->arch.sie_block->icptcode = 0;
2570 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
2571 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
2572 trace_kvm_s390_sie_enter(vcpu, cpuflags);
2577 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
2579 struct kvm_s390_pgm_info pgm_info = {
2580 .code = PGM_ADDRESSING,
2585 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
2586 trace_kvm_s390_sie_fault(vcpu);
2589 * We want to inject an addressing exception, which is defined as a
2590 * suppressing or terminating exception. However, since we came here
2591 * by a DAT access exception, the PSW still points to the faulting
2592 * instruction since DAT exceptions are nullifying. So we've got
2593 * to look up the current opcode to get the length of the instruction
2594 * to be able to forward the PSW.
2596 rc = read_guest_instr(vcpu, &opcode, 1);
2597 ilen = insn_length(opcode);
2601 /* Instruction-Fetching Exceptions - we can't detect the ilen.
2602 * Forward by arbitrary ilc, injection will take care of
2603 * nullification if necessary.
2605 pgm_info = vcpu->arch.pgm;
2608 pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
2609 kvm_s390_forward_psw(vcpu, ilen);
2610 return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
2613 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
2615 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
2616 vcpu->arch.sie_block->icptcode);
2617 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
2619 if (guestdbg_enabled(vcpu))
2620 kvm_s390_restore_guest_per_regs(vcpu);
2622 vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
2623 vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
2625 if (vcpu->arch.sie_block->icptcode > 0) {
2626 int rc = kvm_handle_sie_intercept(vcpu);
2628 if (rc != -EOPNOTSUPP)
2630 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
2631 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
2632 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
2633 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
2635 } else if (exit_reason != -EFAULT) {
2636 vcpu->stat.exit_null++;
2638 } else if (kvm_is_ucontrol(vcpu->kvm)) {
2639 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
2640 vcpu->run->s390_ucontrol.trans_exc_code =
2641 current->thread.gmap_addr;
2642 vcpu->run->s390_ucontrol.pgm_code = 0x10;
2644 } else if (current->thread.gmap_pfault) {
2645 trace_kvm_s390_major_guest_pfault(vcpu);
2646 current->thread.gmap_pfault = 0;
2647 if (kvm_arch_setup_async_pf(vcpu))
2649 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
2651 return vcpu_post_run_fault_in_sie(vcpu);
2654 static int __vcpu_run(struct kvm_vcpu *vcpu)
2656 int rc, exit_reason;
2659 * We try to hold kvm->srcu during most of vcpu_run (except when run-
2660 * ning the guest), so that memslots (and other stuff) are protected
2662 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2665 rc = vcpu_pre_run(vcpu);
2669 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2671 * As PF_VCPU will be used in fault handler, between
2672 * guest_enter and guest_exit should be no uaccess.
2674 local_irq_disable();
2675 guest_enter_irqoff();
2676 __disable_cpu_timer_accounting(vcpu);
2678 exit_reason = sie64a(vcpu->arch.sie_block,
2679 vcpu->run->s.regs.gprs);
2680 local_irq_disable();
2681 __enable_cpu_timer_accounting(vcpu);
2682 guest_exit_irqoff();
2684 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2686 rc = vcpu_post_run(vcpu, exit_reason);
2687 } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
2689 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2693 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2695 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
2696 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
2697 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
2698 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
2699 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
2700 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
2701 /* some control register changes require a tlb flush */
2702 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2704 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
2705 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
2706 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
2707 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
2708 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
2709 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
2711 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
2712 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
2713 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
2714 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
2715 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2716 kvm_clear_async_pf_completion_queue(vcpu);
2719 * If userspace sets the riccb (e.g. after migration) to a valid state,
2720 * we should enable RI here instead of doing the lazy enablement.
2722 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
2723 test_kvm_facility(vcpu->kvm, 64)) {
2724 struct runtime_instr_cb *riccb =
2725 (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
2728 vcpu->arch.sie_block->ecb3 |= 0x01;
2731 kvm_run->kvm_dirty_regs = 0;
2734 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2736 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
2737 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
2738 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
2739 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
2740 kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
2741 kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
2742 kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
2743 kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
2744 kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
2745 kvm_run->s.regs.pft = vcpu->arch.pfault_token;
2746 kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
2747 kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
2750 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2755 if (guestdbg_exit_pending(vcpu)) {
2756 kvm_s390_prepare_debug_exit(vcpu);
2760 if (vcpu->sigset_active)
2761 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
2763 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
2764 kvm_s390_vcpu_start(vcpu);
2765 } else if (is_vcpu_stopped(vcpu)) {
2766 pr_err_ratelimited("can't run stopped vcpu %d\n",
2771 sync_regs(vcpu, kvm_run);
2772 enable_cpu_timer_accounting(vcpu);
2775 rc = __vcpu_run(vcpu);
2777 if (signal_pending(current) && !rc) {
2778 kvm_run->exit_reason = KVM_EXIT_INTR;
2782 if (guestdbg_exit_pending(vcpu) && !rc) {
2783 kvm_s390_prepare_debug_exit(vcpu);
2787 if (rc == -EREMOTE) {
2788 /* userspace support is needed, kvm_run has been prepared */
2792 disable_cpu_timer_accounting(vcpu);
2793 store_regs(vcpu, kvm_run);
2795 if (vcpu->sigset_active)
2796 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
2798 vcpu->stat.exit_userspace++;
2803 * store status at address
2804 * we use have two special cases:
2805 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
2806 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
2808 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
2810 unsigned char archmode = 1;
2811 freg_t fprs[NUM_FPRS];
2816 px = kvm_s390_get_prefix(vcpu);
2817 if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
2818 if (write_guest_abs(vcpu, 163, &archmode, 1))
2821 } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
2822 if (write_guest_real(vcpu, 163, &archmode, 1))
2826 gpa -= __LC_FPREGS_SAVE_AREA;
2828 /* manually convert vector registers if necessary */
2829 if (MACHINE_HAS_VX) {
2830 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
2831 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2834 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2835 vcpu->run->s.regs.fprs, 128);
2837 rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
2838 vcpu->run->s.regs.gprs, 128);
2839 rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
2840 &vcpu->arch.sie_block->gpsw, 16);
2841 rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
2843 rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
2844 &vcpu->run->s.regs.fpc, 4);
2845 rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
2846 &vcpu->arch.sie_block->todpr, 4);
2847 cputm = kvm_s390_get_cpu_timer(vcpu);
2848 rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
2850 clkcomp = vcpu->arch.sie_block->ckc >> 8;
2851 rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
2853 rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
2854 &vcpu->run->s.regs.acrs, 64);
2855 rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
2856 &vcpu->arch.sie_block->gcr, 128);
2857 return rc ? -EFAULT : 0;
2860 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
2863 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
2864 * copying in vcpu load/put. Lets update our copies before we save
2865 * it into the save area
2868 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
2869 save_access_regs(vcpu->run->s.regs.acrs);
2871 return kvm_s390_store_status_unloaded(vcpu, addr);
2874 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2876 kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
2877 kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
2880 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
2883 struct kvm_vcpu *vcpu;
2885 kvm_for_each_vcpu(i, vcpu, kvm) {
2886 __disable_ibs_on_vcpu(vcpu);
2890 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2894 kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
2895 kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
2898 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
2900 int i, online_vcpus, started_vcpus = 0;
2902 if (!is_vcpu_stopped(vcpu))
2905 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
2906 /* Only one cpu at a time may enter/leave the STOPPED state. */
2907 spin_lock(&vcpu->kvm->arch.start_stop_lock);
2908 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2910 for (i = 0; i < online_vcpus; i++) {
2911 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
2915 if (started_vcpus == 0) {
2916 /* we're the only active VCPU -> speed it up */
2917 __enable_ibs_on_vcpu(vcpu);
2918 } else if (started_vcpus == 1) {
2920 * As we are starting a second VCPU, we have to disable
2921 * the IBS facility on all VCPUs to remove potentially
2922 * oustanding ENABLE requests.
2924 __disable_ibs_on_all_vcpus(vcpu->kvm);
2927 atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2929 * Another VCPU might have used IBS while we were offline.
2930 * Let's play safe and flush the VCPU at startup.
2932 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2933 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2937 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
2939 int i, online_vcpus, started_vcpus = 0;
2940 struct kvm_vcpu *started_vcpu = NULL;
2942 if (is_vcpu_stopped(vcpu))
2945 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
2946 /* Only one cpu at a time may enter/leave the STOPPED state. */
2947 spin_lock(&vcpu->kvm->arch.start_stop_lock);
2948 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2950 /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
2951 kvm_s390_clear_stop_irq(vcpu);
2953 atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2954 __disable_ibs_on_vcpu(vcpu);
2956 for (i = 0; i < online_vcpus; i++) {
2957 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
2959 started_vcpu = vcpu->kvm->vcpus[i];
2963 if (started_vcpus == 1) {
2965 * As we only have one VCPU left, we want to enable the
2966 * IBS facility for that VCPU to speed it up.
2968 __enable_ibs_on_vcpu(started_vcpu);
2971 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2975 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
2976 struct kvm_enable_cap *cap)
2984 case KVM_CAP_S390_CSS_SUPPORT:
2985 if (!vcpu->kvm->arch.css_support) {
2986 vcpu->kvm->arch.css_support = 1;
2987 VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
2988 trace_kvm_s390_enable_css(vcpu->kvm);
2999 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3000 struct kvm_s390_mem_op *mop)
3002 void __user *uaddr = (void __user *)mop->buf;
3003 void *tmpbuf = NULL;
3005 const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3006 | KVM_S390_MEMOP_F_CHECK_ONLY;
3008 if (mop->flags & ~supported_flags)
3011 if (mop->size > MEM_OP_MAX_SIZE)
3014 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3015 tmpbuf = vmalloc(mop->size);
3020 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3023 case KVM_S390_MEMOP_LOGICAL_READ:
3024 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3025 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3026 mop->size, GACC_FETCH);
3029 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3031 if (copy_to_user(uaddr, tmpbuf, mop->size))
3035 case KVM_S390_MEMOP_LOGICAL_WRITE:
3036 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3037 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3038 mop->size, GACC_STORE);
3041 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
3045 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3051 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
3053 if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
3054 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
3060 long kvm_arch_vcpu_ioctl(struct file *filp,
3061 unsigned int ioctl, unsigned long arg)
3063 struct kvm_vcpu *vcpu = filp->private_data;
3064 void __user *argp = (void __user *)arg;
3069 case KVM_S390_IRQ: {
3070 struct kvm_s390_irq s390irq;
3073 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3075 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3078 case KVM_S390_INTERRUPT: {
3079 struct kvm_s390_interrupt s390int;
3080 struct kvm_s390_irq s390irq;
3083 if (copy_from_user(&s390int, argp, sizeof(s390int)))
3085 if (s390int_to_s390irq(&s390int, &s390irq))
3087 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3090 case KVM_S390_STORE_STATUS:
3091 idx = srcu_read_lock(&vcpu->kvm->srcu);
3092 r = kvm_s390_vcpu_store_status(vcpu, arg);
3093 srcu_read_unlock(&vcpu->kvm->srcu, idx);
3095 case KVM_S390_SET_INITIAL_PSW: {
3099 if (copy_from_user(&psw, argp, sizeof(psw)))
3101 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3104 case KVM_S390_INITIAL_RESET:
3105 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3107 case KVM_SET_ONE_REG:
3108 case KVM_GET_ONE_REG: {
3109 struct kvm_one_reg reg;
3111 if (copy_from_user(®, argp, sizeof(reg)))
3113 if (ioctl == KVM_SET_ONE_REG)
3114 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, ®);
3116 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, ®);
3119 #ifdef CONFIG_KVM_S390_UCONTROL
3120 case KVM_S390_UCAS_MAP: {
3121 struct kvm_s390_ucas_mapping ucasmap;
3123 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3128 if (!kvm_is_ucontrol(vcpu->kvm)) {
3133 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
3134 ucasmap.vcpu_addr, ucasmap.length);
3137 case KVM_S390_UCAS_UNMAP: {
3138 struct kvm_s390_ucas_mapping ucasmap;
3140 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3145 if (!kvm_is_ucontrol(vcpu->kvm)) {
3150 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
3155 case KVM_S390_VCPU_FAULT: {
3156 r = gmap_fault(vcpu->arch.gmap, arg, 0);
3159 case KVM_ENABLE_CAP:
3161 struct kvm_enable_cap cap;
3163 if (copy_from_user(&cap, argp, sizeof(cap)))
3165 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
3168 case KVM_S390_MEM_OP: {
3169 struct kvm_s390_mem_op mem_op;
3171 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3172 r = kvm_s390_guest_mem_op(vcpu, &mem_op);
3177 case KVM_S390_SET_IRQ_STATE: {
3178 struct kvm_s390_irq_state irq_state;
3181 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3183 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
3184 irq_state.len == 0 ||
3185 irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
3189 r = kvm_s390_set_irq_state(vcpu,
3190 (void __user *) irq_state.buf,
3194 case KVM_S390_GET_IRQ_STATE: {
3195 struct kvm_s390_irq_state irq_state;
3198 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3200 if (irq_state.len == 0) {
3204 r = kvm_s390_get_irq_state(vcpu,
3205 (__u8 __user *) irq_state.buf,
3215 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3217 #ifdef CONFIG_KVM_S390_UCONTROL
3218 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
3219 && (kvm_is_ucontrol(vcpu->kvm))) {
3220 vmf->page = virt_to_page(vcpu->arch.sie_block);
3221 get_page(vmf->page);
3225 return VM_FAULT_SIGBUS;
3228 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
3229 unsigned long npages)
3234 /* Section: memory related */
3235 int kvm_arch_prepare_memory_region(struct kvm *kvm,
3236 struct kvm_memory_slot *memslot,
3237 const struct kvm_userspace_memory_region *mem,
3238 enum kvm_mr_change change)
3240 /* A few sanity checks. We can have memory slots which have to be
3241 located/ended at a segment boundary (1MB). The memory in userland is
3242 ok to be fragmented into various different vmas. It is okay to mmap()
3243 and munmap() stuff in this slot after doing this call at any time */
3245 if (mem->userspace_addr & 0xffffful)
3248 if (mem->memory_size & 0xffffful)
3251 if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
3257 void kvm_arch_commit_memory_region(struct kvm *kvm,
3258 const struct kvm_userspace_memory_region *mem,
3259 const struct kvm_memory_slot *old,
3260 const struct kvm_memory_slot *new,
3261 enum kvm_mr_change change)
3265 /* If the basics of the memslot do not change, we do not want
3266 * to update the gmap. Every update causes several unnecessary
3267 * segment translation exceptions. This is usually handled just
3268 * fine by the normal fault handler + gmap, but it will also
3269 * cause faults on the prefix page of running guest CPUs.
3271 if (old->userspace_addr == mem->userspace_addr &&
3272 old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
3273 old->npages * PAGE_SIZE == mem->memory_size)
3276 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
3277 mem->guest_phys_addr, mem->memory_size);
3279 pr_warn("failed to commit memory region\n");
3283 static inline unsigned long nonhyp_mask(int i)
3285 unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
3287 return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
3290 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
3292 vcpu->valid_wakeup = false;
3295 static int __init kvm_s390_init(void)
3299 if (!sclp.has_sief2) {
3300 pr_info("SIE not available\n");
3304 for (i = 0; i < 16; i++)
3305 kvm_s390_fac_list_mask[i] |=
3306 S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
3308 return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
3311 static void __exit kvm_s390_exit(void)
3316 module_init(kvm_s390_init);
3317 module_exit(kvm_s390_exit);
3320 * Enable autoloading of the kvm module.
3321 * Note that we add the module alias here instead of virt/kvm/kvm_main.c
3322 * since x86 takes a different approach.
3324 #include <linux/miscdevice.h>
3325 MODULE_ALIAS_MISCDEV(KVM_MINOR);
3326 MODULE_ALIAS("devname:kvm");