2 * hosting zSeries kernel virtual machines
4 * Copyright IBM Corp. 2008, 2009
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
10 * Author(s): Carsten Otte <cotte@de.ibm.com>
11 * Christian Borntraeger <borntraeger@de.ibm.com>
12 * Heiko Carstens <heiko.carstens@de.ibm.com>
13 * Christian Ehrhardt <ehrhardt@de.ibm.com>
14 * Jason J. Herne <jjherne@us.ibm.com>
17 #include <linux/compiler.h>
18 #include <linux/err.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/module.h>
25 #include <linux/random.h>
26 #include <linux/slab.h>
27 #include <linux/timer.h>
28 #include <linux/vmalloc.h>
29 #include <asm/asm-offsets.h>
30 #include <asm/lowcore.h>
32 #include <asm/pgtable.h>
34 #include <asm/switch_to.h>
40 #define KMSG_COMPONENT "kvm-s390"
42 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
44 #define CREATE_TRACE_POINTS
46 #include "trace-s390.h"
48 #define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */
50 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
51 (KVM_MAX_VCPUS + LOCAL_IRQS))
53 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
55 struct kvm_stats_debugfs_item debugfs_entries[] = {
56 { "userspace_handled", VCPU_STAT(exit_userspace) },
57 { "exit_null", VCPU_STAT(exit_null) },
58 { "exit_validity", VCPU_STAT(exit_validity) },
59 { "exit_stop_request", VCPU_STAT(exit_stop_request) },
60 { "exit_external_request", VCPU_STAT(exit_external_request) },
61 { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
62 { "exit_instruction", VCPU_STAT(exit_instruction) },
63 { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
64 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
65 { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
66 { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
67 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
68 { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
69 { "instruction_lctl", VCPU_STAT(instruction_lctl) },
70 { "instruction_stctl", VCPU_STAT(instruction_stctl) },
71 { "instruction_stctg", VCPU_STAT(instruction_stctg) },
72 { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
73 { "deliver_external_call", VCPU_STAT(deliver_external_call) },
74 { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
75 { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
76 { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
77 { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
78 { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
79 { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
80 { "exit_wait_state", VCPU_STAT(exit_wait_state) },
81 { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
82 { "instruction_stidp", VCPU_STAT(instruction_stidp) },
83 { "instruction_spx", VCPU_STAT(instruction_spx) },
84 { "instruction_stpx", VCPU_STAT(instruction_stpx) },
85 { "instruction_stap", VCPU_STAT(instruction_stap) },
86 { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
87 { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
88 { "instruction_stsch", VCPU_STAT(instruction_stsch) },
89 { "instruction_chsc", VCPU_STAT(instruction_chsc) },
90 { "instruction_essa", VCPU_STAT(instruction_essa) },
91 { "instruction_stsi", VCPU_STAT(instruction_stsi) },
92 { "instruction_stfl", VCPU_STAT(instruction_stfl) },
93 { "instruction_tprot", VCPU_STAT(instruction_tprot) },
94 { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
95 { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
96 { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
97 { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
98 { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
99 { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
100 { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
101 { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
102 { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
103 { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
104 { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
105 { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
106 { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
107 { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
108 { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
109 { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
110 { "diagnose_10", VCPU_STAT(diagnose_10) },
111 { "diagnose_44", VCPU_STAT(diagnose_44) },
112 { "diagnose_9c", VCPU_STAT(diagnose_9c) },
113 { "diagnose_258", VCPU_STAT(diagnose_258) },
114 { "diagnose_308", VCPU_STAT(diagnose_308) },
115 { "diagnose_500", VCPU_STAT(diagnose_500) },
119 /* upper facilities limit for kvm */
120 unsigned long kvm_s390_fac_list_mask[] = {
121 0xffe6fffbfcfdfc40UL,
122 0x005e800000000000UL,
125 unsigned long kvm_s390_fac_list_mask_size(void)
127 BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
128 return ARRAY_SIZE(kvm_s390_fac_list_mask);
131 static struct gmap_notifier gmap_notifier;
132 debug_info_t *kvm_s390_dbf;
134 /* Section: not file related */
135 int kvm_arch_hardware_enable(void)
137 /* every s390 is virtualization enabled ;-) */
141 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address);
144 * This callback is executed during stop_machine(). All CPUs are therefore
145 * temporarily stopped. In order not to change guest behavior, we have to
146 * disable preemption whenever we touch the epoch of kvm and the VCPUs,
147 * so a CPU won't be stopped while calculating with the epoch.
149 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
153 struct kvm_vcpu *vcpu;
155 unsigned long long *delta = v;
157 list_for_each_entry(kvm, &vm_list, vm_list) {
158 kvm->arch.epoch -= *delta;
159 kvm_for_each_vcpu(i, vcpu, kvm) {
160 vcpu->arch.sie_block->epoch -= *delta;
166 static struct notifier_block kvm_clock_notifier = {
167 .notifier_call = kvm_clock_sync,
170 int kvm_arch_hardware_setup(void)
172 gmap_notifier.notifier_call = kvm_gmap_notifier;
173 gmap_register_ipte_notifier(&gmap_notifier);
174 atomic_notifier_chain_register(&s390_epoch_delta_notifier,
175 &kvm_clock_notifier);
179 void kvm_arch_hardware_unsetup(void)
181 gmap_unregister_ipte_notifier(&gmap_notifier);
182 atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
183 &kvm_clock_notifier);
186 int kvm_arch_init(void *opaque)
188 kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
192 if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
193 debug_unregister(kvm_s390_dbf);
197 /* Register floating interrupt controller interface. */
198 return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
201 void kvm_arch_exit(void)
203 debug_unregister(kvm_s390_dbf);
206 /* Section: device related */
207 long kvm_arch_dev_ioctl(struct file *filp,
208 unsigned int ioctl, unsigned long arg)
210 if (ioctl == KVM_S390_ENABLE_SIE)
211 return s390_enable_sie();
215 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
220 case KVM_CAP_S390_PSW:
221 case KVM_CAP_S390_GMAP:
222 case KVM_CAP_SYNC_MMU:
223 #ifdef CONFIG_KVM_S390_UCONTROL
224 case KVM_CAP_S390_UCONTROL:
226 case KVM_CAP_ASYNC_PF:
227 case KVM_CAP_SYNC_REGS:
228 case KVM_CAP_ONE_REG:
229 case KVM_CAP_ENABLE_CAP:
230 case KVM_CAP_S390_CSS_SUPPORT:
231 case KVM_CAP_IOEVENTFD:
232 case KVM_CAP_DEVICE_CTRL:
233 case KVM_CAP_ENABLE_CAP_VM:
234 case KVM_CAP_S390_IRQCHIP:
235 case KVM_CAP_VM_ATTRIBUTES:
236 case KVM_CAP_MP_STATE:
237 case KVM_CAP_S390_INJECT_IRQ:
238 case KVM_CAP_S390_USER_SIGP:
239 case KVM_CAP_S390_USER_STSI:
240 case KVM_CAP_S390_SKEYS:
241 case KVM_CAP_S390_IRQ_STATE:
244 case KVM_CAP_S390_MEM_OP:
247 case KVM_CAP_NR_VCPUS:
248 case KVM_CAP_MAX_VCPUS:
251 case KVM_CAP_NR_MEMSLOTS:
252 r = KVM_USER_MEM_SLOTS;
254 case KVM_CAP_S390_COW:
255 r = MACHINE_HAS_ESOP;
257 case KVM_CAP_S390_VECTOR_REGISTERS:
266 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
267 struct kvm_memory_slot *memslot)
269 gfn_t cur_gfn, last_gfn;
270 unsigned long address;
271 struct gmap *gmap = kvm->arch.gmap;
273 down_read(&gmap->mm->mmap_sem);
274 /* Loop over all guest pages */
275 last_gfn = memslot->base_gfn + memslot->npages;
276 for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
277 address = gfn_to_hva_memslot(memslot, cur_gfn);
279 if (gmap_test_and_clear_dirty(address, gmap))
280 mark_page_dirty(kvm, cur_gfn);
282 up_read(&gmap->mm->mmap_sem);
285 /* Section: vm related */
286 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
289 * Get (and clear) the dirty memory log for a memory slot.
291 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
292 struct kvm_dirty_log *log)
296 struct kvm_memslots *slots;
297 struct kvm_memory_slot *memslot;
300 mutex_lock(&kvm->slots_lock);
303 if (log->slot >= KVM_USER_MEM_SLOTS)
306 slots = kvm_memslots(kvm);
307 memslot = id_to_memslot(slots, log->slot);
309 if (!memslot->dirty_bitmap)
312 kvm_s390_sync_dirty_log(kvm, memslot);
313 r = kvm_get_dirty_log(kvm, log, &is_dirty);
317 /* Clear the dirty log */
319 n = kvm_dirty_bitmap_bytes(memslot);
320 memset(memslot->dirty_bitmap, 0, n);
324 mutex_unlock(&kvm->slots_lock);
328 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
336 case KVM_CAP_S390_IRQCHIP:
337 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
338 kvm->arch.use_irqchip = 1;
341 case KVM_CAP_S390_USER_SIGP:
342 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
343 kvm->arch.user_sigp = 1;
346 case KVM_CAP_S390_VECTOR_REGISTERS:
347 mutex_lock(&kvm->lock);
348 if (atomic_read(&kvm->online_vcpus)) {
350 } else if (MACHINE_HAS_VX) {
351 set_kvm_facility(kvm->arch.model.fac->mask, 129);
352 set_kvm_facility(kvm->arch.model.fac->list, 129);
356 mutex_unlock(&kvm->lock);
357 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
358 r ? "(not available)" : "(success)");
360 case KVM_CAP_S390_USER_STSI:
361 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
362 kvm->arch.user_stsi = 1;
372 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
376 switch (attr->attr) {
377 case KVM_S390_VM_MEM_LIMIT_SIZE:
379 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
380 kvm->arch.gmap->asce_end);
381 if (put_user(kvm->arch.gmap->asce_end, (u64 __user *)attr->addr))
391 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
395 switch (attr->attr) {
396 case KVM_S390_VM_MEM_ENABLE_CMMA:
397 /* enable CMMA only for z10 and later (EDAT_1) */
399 if (!MACHINE_IS_LPAR || !MACHINE_HAS_EDAT1)
403 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
404 mutex_lock(&kvm->lock);
405 if (atomic_read(&kvm->online_vcpus) == 0) {
406 kvm->arch.use_cmma = 1;
409 mutex_unlock(&kvm->lock);
411 case KVM_S390_VM_MEM_CLR_CMMA:
413 if (!kvm->arch.use_cmma)
416 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
417 mutex_lock(&kvm->lock);
418 idx = srcu_read_lock(&kvm->srcu);
419 s390_reset_cmma(kvm->arch.gmap->mm);
420 srcu_read_unlock(&kvm->srcu, idx);
421 mutex_unlock(&kvm->lock);
424 case KVM_S390_VM_MEM_LIMIT_SIZE: {
425 unsigned long new_limit;
427 if (kvm_is_ucontrol(kvm))
430 if (get_user(new_limit, (u64 __user *)attr->addr))
433 if (new_limit > kvm->arch.gmap->asce_end)
437 mutex_lock(&kvm->lock);
438 if (atomic_read(&kvm->online_vcpus) == 0) {
439 /* gmap_alloc will round the limit up */
440 struct gmap *new = gmap_alloc(current->mm, new_limit);
445 gmap_free(kvm->arch.gmap);
447 kvm->arch.gmap = new;
451 mutex_unlock(&kvm->lock);
452 VM_EVENT(kvm, 3, "SET: max guest memory: %lu bytes", new_limit);
462 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
464 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
466 struct kvm_vcpu *vcpu;
469 if (!test_kvm_facility(kvm, 76))
472 mutex_lock(&kvm->lock);
473 switch (attr->attr) {
474 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
476 kvm->arch.crypto.crycb->aes_wrapping_key_mask,
477 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
478 kvm->arch.crypto.aes_kw = 1;
479 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
481 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
483 kvm->arch.crypto.crycb->dea_wrapping_key_mask,
484 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
485 kvm->arch.crypto.dea_kw = 1;
486 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
488 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
489 kvm->arch.crypto.aes_kw = 0;
490 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
491 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
492 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
494 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
495 kvm->arch.crypto.dea_kw = 0;
496 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
497 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
498 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
501 mutex_unlock(&kvm->lock);
505 kvm_for_each_vcpu(i, vcpu, kvm) {
506 kvm_s390_vcpu_crypto_setup(vcpu);
509 mutex_unlock(&kvm->lock);
513 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
517 if (copy_from_user(>od_high, (void __user *)attr->addr,
523 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
528 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
532 if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod)))
535 kvm_s390_set_tod_clock(kvm, gtod);
536 VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
540 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
547 switch (attr->attr) {
548 case KVM_S390_VM_TOD_HIGH:
549 ret = kvm_s390_set_tod_high(kvm, attr);
551 case KVM_S390_VM_TOD_LOW:
552 ret = kvm_s390_set_tod_low(kvm, attr);
561 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
565 if (copy_to_user((void __user *)attr->addr, >od_high,
568 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
573 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
577 gtod = kvm_s390_get_tod_clock_fast(kvm);
578 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod)))
580 VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
585 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
592 switch (attr->attr) {
593 case KVM_S390_VM_TOD_HIGH:
594 ret = kvm_s390_get_tod_high(kvm, attr);
596 case KVM_S390_VM_TOD_LOW:
597 ret = kvm_s390_get_tod_low(kvm, attr);
606 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
608 struct kvm_s390_vm_cpu_processor *proc;
611 mutex_lock(&kvm->lock);
612 if (atomic_read(&kvm->online_vcpus)) {
616 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
621 if (!copy_from_user(proc, (void __user *)attr->addr,
623 memcpy(&kvm->arch.model.cpu_id, &proc->cpuid,
624 sizeof(struct cpuid));
625 kvm->arch.model.ibc = proc->ibc;
626 memcpy(kvm->arch.model.fac->list, proc->fac_list,
627 S390_ARCH_FAC_LIST_SIZE_BYTE);
632 mutex_unlock(&kvm->lock);
636 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
640 switch (attr->attr) {
641 case KVM_S390_VM_CPU_PROCESSOR:
642 ret = kvm_s390_set_processor(kvm, attr);
648 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
650 struct kvm_s390_vm_cpu_processor *proc;
653 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
658 memcpy(&proc->cpuid, &kvm->arch.model.cpu_id, sizeof(struct cpuid));
659 proc->ibc = kvm->arch.model.ibc;
660 memcpy(&proc->fac_list, kvm->arch.model.fac->list, S390_ARCH_FAC_LIST_SIZE_BYTE);
661 if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
668 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
670 struct kvm_s390_vm_cpu_machine *mach;
673 mach = kzalloc(sizeof(*mach), GFP_KERNEL);
678 get_cpu_id((struct cpuid *) &mach->cpuid);
679 mach->ibc = sclp.ibc;
680 memcpy(&mach->fac_mask, kvm->arch.model.fac->mask,
681 S390_ARCH_FAC_LIST_SIZE_BYTE);
682 memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
683 S390_ARCH_FAC_LIST_SIZE_BYTE);
684 if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
691 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
695 switch (attr->attr) {
696 case KVM_S390_VM_CPU_PROCESSOR:
697 ret = kvm_s390_get_processor(kvm, attr);
699 case KVM_S390_VM_CPU_MACHINE:
700 ret = kvm_s390_get_machine(kvm, attr);
706 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
710 switch (attr->group) {
711 case KVM_S390_VM_MEM_CTRL:
712 ret = kvm_s390_set_mem_control(kvm, attr);
714 case KVM_S390_VM_TOD:
715 ret = kvm_s390_set_tod(kvm, attr);
717 case KVM_S390_VM_CPU_MODEL:
718 ret = kvm_s390_set_cpu_model(kvm, attr);
720 case KVM_S390_VM_CRYPTO:
721 ret = kvm_s390_vm_set_crypto(kvm, attr);
731 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
735 switch (attr->group) {
736 case KVM_S390_VM_MEM_CTRL:
737 ret = kvm_s390_get_mem_control(kvm, attr);
739 case KVM_S390_VM_TOD:
740 ret = kvm_s390_get_tod(kvm, attr);
742 case KVM_S390_VM_CPU_MODEL:
743 ret = kvm_s390_get_cpu_model(kvm, attr);
753 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
757 switch (attr->group) {
758 case KVM_S390_VM_MEM_CTRL:
759 switch (attr->attr) {
760 case KVM_S390_VM_MEM_ENABLE_CMMA:
761 case KVM_S390_VM_MEM_CLR_CMMA:
762 case KVM_S390_VM_MEM_LIMIT_SIZE:
770 case KVM_S390_VM_TOD:
771 switch (attr->attr) {
772 case KVM_S390_VM_TOD_LOW:
773 case KVM_S390_VM_TOD_HIGH:
781 case KVM_S390_VM_CPU_MODEL:
782 switch (attr->attr) {
783 case KVM_S390_VM_CPU_PROCESSOR:
784 case KVM_S390_VM_CPU_MACHINE:
792 case KVM_S390_VM_CRYPTO:
793 switch (attr->attr) {
794 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
795 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
796 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
797 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
813 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
817 unsigned long curkey;
820 if (args->flags != 0)
823 /* Is this guest using storage keys? */
824 if (!mm_use_skey(current->mm))
825 return KVM_S390_GET_SKEYS_NONE;
827 /* Enforce sane limit on memory allocation */
828 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
831 keys = kmalloc_array(args->count, sizeof(uint8_t),
832 GFP_KERNEL | __GFP_NOWARN);
834 keys = vmalloc(sizeof(uint8_t) * args->count);
838 for (i = 0; i < args->count; i++) {
839 hva = gfn_to_hva(kvm, args->start_gfn + i);
840 if (kvm_is_error_hva(hva)) {
845 curkey = get_guest_storage_key(current->mm, hva);
846 if (IS_ERR_VALUE(curkey)) {
853 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
854 sizeof(uint8_t) * args->count);
862 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
868 if (args->flags != 0)
871 /* Enforce sane limit on memory allocation */
872 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
875 keys = kmalloc_array(args->count, sizeof(uint8_t),
876 GFP_KERNEL | __GFP_NOWARN);
878 keys = vmalloc(sizeof(uint8_t) * args->count);
882 r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
883 sizeof(uint8_t) * args->count);
889 /* Enable storage key handling for the guest */
890 r = s390_enable_skey();
894 for (i = 0; i < args->count; i++) {
895 hva = gfn_to_hva(kvm, args->start_gfn + i);
896 if (kvm_is_error_hva(hva)) {
901 /* Lowest order bit is reserved */
902 if (keys[i] & 0x01) {
907 r = set_guest_storage_key(current->mm, hva,
908 (unsigned long)keys[i], 0);
917 long kvm_arch_vm_ioctl(struct file *filp,
918 unsigned int ioctl, unsigned long arg)
920 struct kvm *kvm = filp->private_data;
921 void __user *argp = (void __user *)arg;
922 struct kvm_device_attr attr;
926 case KVM_S390_INTERRUPT: {
927 struct kvm_s390_interrupt s390int;
930 if (copy_from_user(&s390int, argp, sizeof(s390int)))
932 r = kvm_s390_inject_vm(kvm, &s390int);
935 case KVM_ENABLE_CAP: {
936 struct kvm_enable_cap cap;
938 if (copy_from_user(&cap, argp, sizeof(cap)))
940 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
943 case KVM_CREATE_IRQCHIP: {
944 struct kvm_irq_routing_entry routing;
947 if (kvm->arch.use_irqchip) {
948 /* Set up dummy routing. */
949 memset(&routing, 0, sizeof(routing));
950 r = kvm_set_irq_routing(kvm, &routing, 0, 0);
954 case KVM_SET_DEVICE_ATTR: {
956 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
958 r = kvm_s390_vm_set_attr(kvm, &attr);
961 case KVM_GET_DEVICE_ATTR: {
963 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
965 r = kvm_s390_vm_get_attr(kvm, &attr);
968 case KVM_HAS_DEVICE_ATTR: {
970 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
972 r = kvm_s390_vm_has_attr(kvm, &attr);
975 case KVM_S390_GET_SKEYS: {
976 struct kvm_s390_skeys args;
979 if (copy_from_user(&args, argp,
980 sizeof(struct kvm_s390_skeys)))
982 r = kvm_s390_get_skeys(kvm, &args);
985 case KVM_S390_SET_SKEYS: {
986 struct kvm_s390_skeys args;
989 if (copy_from_user(&args, argp,
990 sizeof(struct kvm_s390_skeys)))
992 r = kvm_s390_set_skeys(kvm, &args);
1002 static int kvm_s390_query_ap_config(u8 *config)
1004 u32 fcn_code = 0x04000000UL;
1007 memset(config, 0, 128);
1011 ".long 0xb2af0000\n" /* PQAP(QCI) */
1017 : "r" (fcn_code), "r" (config)
1018 : "cc", "0", "2", "memory"
1024 static int kvm_s390_apxa_installed(void)
1029 if (test_facility(2) && test_facility(12)) {
1030 cc = kvm_s390_query_ap_config(config);
1033 pr_err("PQAP(QCI) failed with cc=%d", cc);
1035 return config[0] & 0x40;
1041 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1043 kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1045 if (kvm_s390_apxa_installed())
1046 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1048 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1051 static void kvm_s390_get_cpu_id(struct cpuid *cpu_id)
1054 cpu_id->version = 0xff;
1057 static int kvm_s390_crypto_init(struct kvm *kvm)
1059 if (!test_kvm_facility(kvm, 76))
1062 kvm->arch.crypto.crycb = kzalloc(sizeof(*kvm->arch.crypto.crycb),
1063 GFP_KERNEL | GFP_DMA);
1064 if (!kvm->arch.crypto.crycb)
1067 kvm_s390_set_crycb_format(kvm);
1069 /* Enable AES/DEA protected key functions by default */
1070 kvm->arch.crypto.aes_kw = 1;
1071 kvm->arch.crypto.dea_kw = 1;
1072 get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1073 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1074 get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1075 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1080 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1083 char debug_name[16];
1084 static unsigned long sca_offset;
1087 #ifdef CONFIG_KVM_S390_UCONTROL
1088 if (type & ~KVM_VM_S390_UCONTROL)
1090 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1097 rc = s390_enable_sie();
1103 kvm->arch.sca = (struct bsca_block *) get_zeroed_page(GFP_KERNEL);
1106 spin_lock(&kvm_lock);
1108 if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1110 kvm->arch.sca = (struct bsca_block *)
1111 ((char *) kvm->arch.sca + sca_offset);
1112 spin_unlock(&kvm_lock);
1114 sprintf(debug_name, "kvm-%u", current->pid);
1116 kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1121 * The architectural maximum amount of facilities is 16 kbit. To store
1122 * this amount, 2 kbyte of memory is required. Thus we need a full
1123 * page to hold the guest facility list (arch.model.fac->list) and the
1124 * facility mask (arch.model.fac->mask). Its address size has to be
1125 * 31 bits and word aligned.
1127 kvm->arch.model.fac =
1128 (struct kvm_s390_fac *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1129 if (!kvm->arch.model.fac)
1132 /* Populate the facility mask initially. */
1133 memcpy(kvm->arch.model.fac->mask, S390_lowcore.stfle_fac_list,
1134 S390_ARCH_FAC_LIST_SIZE_BYTE);
1135 for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1136 if (i < kvm_s390_fac_list_mask_size())
1137 kvm->arch.model.fac->mask[i] &= kvm_s390_fac_list_mask[i];
1139 kvm->arch.model.fac->mask[i] = 0UL;
1142 /* Populate the facility list initially. */
1143 memcpy(kvm->arch.model.fac->list, kvm->arch.model.fac->mask,
1144 S390_ARCH_FAC_LIST_SIZE_BYTE);
1146 kvm_s390_get_cpu_id(&kvm->arch.model.cpu_id);
1147 kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1149 if (kvm_s390_crypto_init(kvm) < 0)
1152 spin_lock_init(&kvm->arch.float_int.lock);
1153 for (i = 0; i < FIRQ_LIST_COUNT; i++)
1154 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1155 init_waitqueue_head(&kvm->arch.ipte_wq);
1156 mutex_init(&kvm->arch.ipte_mutex);
1158 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1159 VM_EVENT(kvm, 3, "vm created with type %lu", type);
1161 if (type & KVM_VM_S390_UCONTROL) {
1162 kvm->arch.gmap = NULL;
1164 kvm->arch.gmap = gmap_alloc(current->mm, (1UL << 44) - 1);
1165 if (!kvm->arch.gmap)
1167 kvm->arch.gmap->private = kvm;
1168 kvm->arch.gmap->pfault_enabled = 0;
1171 kvm->arch.css_support = 0;
1172 kvm->arch.use_irqchip = 0;
1173 kvm->arch.epoch = 0;
1175 spin_lock_init(&kvm->arch.start_stop_lock);
1176 KVM_EVENT(3, "vm 0x%p created by pid %u", kvm, current->pid);
1180 kfree(kvm->arch.crypto.crycb);
1181 free_page((unsigned long)kvm->arch.model.fac);
1182 debug_unregister(kvm->arch.dbf);
1183 free_page((unsigned long)(kvm->arch.sca));
1184 KVM_EVENT(3, "creation of vm failed: %d", rc);
1188 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1190 VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1191 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1192 kvm_s390_clear_local_irqs(vcpu);
1193 kvm_clear_async_pf_completion_queue(vcpu);
1194 if (!kvm_is_ucontrol(vcpu->kvm))
1198 if (kvm_is_ucontrol(vcpu->kvm))
1199 gmap_free(vcpu->arch.gmap);
1201 if (vcpu->kvm->arch.use_cmma)
1202 kvm_s390_vcpu_unsetup_cmma(vcpu);
1203 free_page((unsigned long)(vcpu->arch.sie_block));
1205 kvm_vcpu_uninit(vcpu);
1206 kmem_cache_free(kvm_vcpu_cache, vcpu);
1209 static void kvm_free_vcpus(struct kvm *kvm)
1212 struct kvm_vcpu *vcpu;
1214 kvm_for_each_vcpu(i, vcpu, kvm)
1215 kvm_arch_vcpu_destroy(vcpu);
1217 mutex_lock(&kvm->lock);
1218 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1219 kvm->vcpus[i] = NULL;
1221 atomic_set(&kvm->online_vcpus, 0);
1222 mutex_unlock(&kvm->lock);
1225 void kvm_arch_destroy_vm(struct kvm *kvm)
1227 kvm_free_vcpus(kvm);
1228 free_page((unsigned long)kvm->arch.model.fac);
1229 free_page((unsigned long)(kvm->arch.sca));
1230 debug_unregister(kvm->arch.dbf);
1231 kfree(kvm->arch.crypto.crycb);
1232 if (!kvm_is_ucontrol(kvm))
1233 gmap_free(kvm->arch.gmap);
1234 kvm_s390_destroy_adapters(kvm);
1235 kvm_s390_clear_float_irqs(kvm);
1236 KVM_EVENT(3, "vm 0x%p destroyed", kvm);
1239 /* Section: vcpu related */
1240 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1242 vcpu->arch.gmap = gmap_alloc(current->mm, -1UL);
1243 if (!vcpu->arch.gmap)
1245 vcpu->arch.gmap->private = vcpu->kvm;
1250 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
1252 struct bsca_block *sca = vcpu->kvm->arch.sca;
1254 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1255 if (sca->cpu[vcpu->vcpu_id].sda == (__u64) vcpu->arch.sie_block)
1256 sca->cpu[vcpu->vcpu_id].sda = 0;
1259 static void sca_add_vcpu(struct kvm_vcpu *vcpu, struct kvm *kvm,
1262 struct bsca_block *sca = kvm->arch.sca;
1264 if (!sca->cpu[id].sda)
1265 sca->cpu[id].sda = (__u64) vcpu->arch.sie_block;
1266 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1267 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
1268 set_bit_inv(id, (unsigned long *) &sca->mcn);
1271 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
1273 return id < KVM_MAX_VCPUS;
1276 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1278 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1279 kvm_clear_async_pf_completion_queue(vcpu);
1280 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
1286 if (test_kvm_facility(vcpu->kvm, 129))
1287 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
1289 if (kvm_is_ucontrol(vcpu->kvm))
1290 return __kvm_ucontrol_vcpu_init(vcpu);
1296 * Backs up the current FP/VX register save area on a particular
1297 * destination. Used to switch between different register save
1300 static inline void save_fpu_to(struct fpu *dst)
1302 dst->fpc = current->thread.fpu.fpc;
1303 dst->regs = current->thread.fpu.regs;
1307 * Switches the FP/VX register save area from which to lazy
1308 * restore register contents.
1310 static inline void load_fpu_from(struct fpu *from)
1312 current->thread.fpu.fpc = from->fpc;
1313 current->thread.fpu.regs = from->regs;
1316 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1318 /* Save host register state */
1320 save_fpu_to(&vcpu->arch.host_fpregs);
1322 if (test_kvm_facility(vcpu->kvm, 129)) {
1323 current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
1325 * Use the register save area in the SIE-control block
1326 * for register restore and save in kvm_arch_vcpu_put()
1328 current->thread.fpu.vxrs =
1329 (__vector128 *)&vcpu->run->s.regs.vrs;
1331 load_fpu_from(&vcpu->arch.guest_fpregs);
1333 if (test_fp_ctl(current->thread.fpu.fpc))
1334 /* User space provided an invalid FPC, let's clear it */
1335 current->thread.fpu.fpc = 0;
1337 save_access_regs(vcpu->arch.host_acrs);
1338 restore_access_regs(vcpu->run->s.regs.acrs);
1339 gmap_enable(vcpu->arch.gmap);
1340 atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1343 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1345 atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1346 gmap_disable(vcpu->arch.gmap);
1350 if (test_kvm_facility(vcpu->kvm, 129))
1352 * kvm_arch_vcpu_load() set up the register save area to
1353 * the &vcpu->run->s.regs.vrs and, thus, the vector registers
1354 * are already saved. Only the floating-point control must be
1357 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
1359 save_fpu_to(&vcpu->arch.guest_fpregs);
1360 load_fpu_from(&vcpu->arch.host_fpregs);
1362 save_access_regs(vcpu->run->s.regs.acrs);
1363 restore_access_regs(vcpu->arch.host_acrs);
1366 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
1368 /* this equals initial cpu reset in pop, but we don't switch to ESA */
1369 vcpu->arch.sie_block->gpsw.mask = 0UL;
1370 vcpu->arch.sie_block->gpsw.addr = 0UL;
1371 kvm_s390_set_prefix(vcpu, 0);
1372 vcpu->arch.sie_block->cputm = 0UL;
1373 vcpu->arch.sie_block->ckc = 0UL;
1374 vcpu->arch.sie_block->todpr = 0;
1375 memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
1376 vcpu->arch.sie_block->gcr[0] = 0xE0UL;
1377 vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
1378 vcpu->arch.guest_fpregs.fpc = 0;
1379 asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc));
1380 vcpu->arch.sie_block->gbea = 1;
1381 vcpu->arch.sie_block->pp = 0;
1382 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1383 kvm_clear_async_pf_completion_queue(vcpu);
1384 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
1385 kvm_s390_vcpu_stop(vcpu);
1386 kvm_s390_clear_local_irqs(vcpu);
1389 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
1391 mutex_lock(&vcpu->kvm->lock);
1393 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
1395 mutex_unlock(&vcpu->kvm->lock);
1396 if (!kvm_is_ucontrol(vcpu->kvm))
1397 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
1400 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
1402 if (!test_kvm_facility(vcpu->kvm, 76))
1405 vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
1407 if (vcpu->kvm->arch.crypto.aes_kw)
1408 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
1409 if (vcpu->kvm->arch.crypto.dea_kw)
1410 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
1412 vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
1415 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
1417 free_page(vcpu->arch.sie_block->cbrlo);
1418 vcpu->arch.sie_block->cbrlo = 0;
1421 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
1423 vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
1424 if (!vcpu->arch.sie_block->cbrlo)
1427 vcpu->arch.sie_block->ecb2 |= 0x80;
1428 vcpu->arch.sie_block->ecb2 &= ~0x08;
1432 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
1434 struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
1436 vcpu->arch.cpu_id = model->cpu_id;
1437 vcpu->arch.sie_block->ibc = model->ibc;
1438 vcpu->arch.sie_block->fac = (int) (long) model->fac->list;
1441 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1445 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
1449 if (test_kvm_facility(vcpu->kvm, 78))
1450 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
1451 else if (test_kvm_facility(vcpu->kvm, 8))
1452 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
1454 kvm_s390_vcpu_setup_model(vcpu);
1456 vcpu->arch.sie_block->ecb = 6;
1457 if (test_kvm_facility(vcpu->kvm, 50) && test_kvm_facility(vcpu->kvm, 73))
1458 vcpu->arch.sie_block->ecb |= 0x10;
1460 vcpu->arch.sie_block->ecb2 = 8;
1461 vcpu->arch.sie_block->eca = 0xC1002000U;
1463 vcpu->arch.sie_block->eca |= 1;
1464 if (sclp.has_sigpif)
1465 vcpu->arch.sie_block->eca |= 0x10000000U;
1466 if (test_kvm_facility(vcpu->kvm, 129)) {
1467 vcpu->arch.sie_block->eca |= 0x00020000;
1468 vcpu->arch.sie_block->ecd |= 0x20000000;
1470 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
1472 if (vcpu->kvm->arch.use_cmma) {
1473 rc = kvm_s390_vcpu_setup_cmma(vcpu);
1477 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1478 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
1480 kvm_s390_vcpu_crypto_setup(vcpu);
1485 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
1488 struct kvm_vcpu *vcpu;
1489 struct sie_page *sie_page;
1492 if (!sca_can_add_vcpu(kvm, id))
1497 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
1501 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
1505 vcpu->arch.sie_block = &sie_page->sie_block;
1506 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
1508 vcpu->arch.sie_block->icpua = id;
1509 if (!kvm_is_ucontrol(kvm)) {
1510 if (!kvm->arch.sca) {
1514 sca_add_vcpu(vcpu, kvm, id);
1517 spin_lock_init(&vcpu->arch.local_int.lock);
1518 vcpu->arch.local_int.float_int = &kvm->arch.float_int;
1519 vcpu->arch.local_int.wq = &vcpu->wq;
1520 vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
1523 * Allocate a save area for floating-point registers. If the vector
1524 * extension is available, register contents are saved in the SIE
1525 * control block. The allocated save area is still required in
1526 * particular places, for example, in kvm_s390_vcpu_store_status().
1528 vcpu->arch.guest_fpregs.fprs = kzalloc(sizeof(freg_t) * __NUM_FPRS,
1530 if (!vcpu->arch.guest_fpregs.fprs) {
1532 goto out_free_sie_block;
1535 rc = kvm_vcpu_init(vcpu, kvm, id);
1537 goto out_free_sie_block;
1538 VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu,
1539 vcpu->arch.sie_block);
1540 trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
1544 free_page((unsigned long)(vcpu->arch.sie_block));
1546 kmem_cache_free(kvm_vcpu_cache, vcpu);
1551 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
1553 return kvm_s390_vcpu_has_irq(vcpu, 0);
1556 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
1558 atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1562 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
1564 atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1567 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
1569 atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
1573 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
1575 atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
1579 * Kick a guest cpu out of SIE and wait until SIE is not running.
1580 * If the CPU is not running (e.g. waiting as idle) the function will
1581 * return immediately. */
1582 void exit_sie(struct kvm_vcpu *vcpu)
1584 atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
1585 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
1589 /* Kick a guest cpu out of SIE to process a request synchronously */
1590 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
1592 kvm_make_request(req, vcpu);
1593 kvm_s390_vcpu_request(vcpu);
1596 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address)
1599 struct kvm *kvm = gmap->private;
1600 struct kvm_vcpu *vcpu;
1602 kvm_for_each_vcpu(i, vcpu, kvm) {
1603 /* match against both prefix pages */
1604 if (kvm_s390_get_prefix(vcpu) == (address & ~0x1000UL)) {
1605 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx", address);
1606 kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
1611 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
1613 /* kvm common code refers to this, but never calls it */
1618 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
1619 struct kvm_one_reg *reg)
1624 case KVM_REG_S390_TODPR:
1625 r = put_user(vcpu->arch.sie_block->todpr,
1626 (u32 __user *)reg->addr);
1628 case KVM_REG_S390_EPOCHDIFF:
1629 r = put_user(vcpu->arch.sie_block->epoch,
1630 (u64 __user *)reg->addr);
1632 case KVM_REG_S390_CPU_TIMER:
1633 r = put_user(vcpu->arch.sie_block->cputm,
1634 (u64 __user *)reg->addr);
1636 case KVM_REG_S390_CLOCK_COMP:
1637 r = put_user(vcpu->arch.sie_block->ckc,
1638 (u64 __user *)reg->addr);
1640 case KVM_REG_S390_PFTOKEN:
1641 r = put_user(vcpu->arch.pfault_token,
1642 (u64 __user *)reg->addr);
1644 case KVM_REG_S390_PFCOMPARE:
1645 r = put_user(vcpu->arch.pfault_compare,
1646 (u64 __user *)reg->addr);
1648 case KVM_REG_S390_PFSELECT:
1649 r = put_user(vcpu->arch.pfault_select,
1650 (u64 __user *)reg->addr);
1652 case KVM_REG_S390_PP:
1653 r = put_user(vcpu->arch.sie_block->pp,
1654 (u64 __user *)reg->addr);
1656 case KVM_REG_S390_GBEA:
1657 r = put_user(vcpu->arch.sie_block->gbea,
1658 (u64 __user *)reg->addr);
1667 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
1668 struct kvm_one_reg *reg)
1673 case KVM_REG_S390_TODPR:
1674 r = get_user(vcpu->arch.sie_block->todpr,
1675 (u32 __user *)reg->addr);
1677 case KVM_REG_S390_EPOCHDIFF:
1678 r = get_user(vcpu->arch.sie_block->epoch,
1679 (u64 __user *)reg->addr);
1681 case KVM_REG_S390_CPU_TIMER:
1682 r = get_user(vcpu->arch.sie_block->cputm,
1683 (u64 __user *)reg->addr);
1685 case KVM_REG_S390_CLOCK_COMP:
1686 r = get_user(vcpu->arch.sie_block->ckc,
1687 (u64 __user *)reg->addr);
1689 case KVM_REG_S390_PFTOKEN:
1690 r = get_user(vcpu->arch.pfault_token,
1691 (u64 __user *)reg->addr);
1692 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
1693 kvm_clear_async_pf_completion_queue(vcpu);
1695 case KVM_REG_S390_PFCOMPARE:
1696 r = get_user(vcpu->arch.pfault_compare,
1697 (u64 __user *)reg->addr);
1699 case KVM_REG_S390_PFSELECT:
1700 r = get_user(vcpu->arch.pfault_select,
1701 (u64 __user *)reg->addr);
1703 case KVM_REG_S390_PP:
1704 r = get_user(vcpu->arch.sie_block->pp,
1705 (u64 __user *)reg->addr);
1707 case KVM_REG_S390_GBEA:
1708 r = get_user(vcpu->arch.sie_block->gbea,
1709 (u64 __user *)reg->addr);
1718 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
1720 kvm_s390_vcpu_initial_reset(vcpu);
1724 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1726 memcpy(&vcpu->run->s.regs.gprs, ®s->gprs, sizeof(regs->gprs));
1730 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1732 memcpy(®s->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
1736 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
1737 struct kvm_sregs *sregs)
1739 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
1740 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
1741 restore_access_regs(vcpu->run->s.regs.acrs);
1745 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
1746 struct kvm_sregs *sregs)
1748 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
1749 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
1753 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1755 if (test_fp_ctl(fpu->fpc))
1757 memcpy(vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs));
1758 vcpu->arch.guest_fpregs.fpc = fpu->fpc;
1760 load_fpu_from(&vcpu->arch.guest_fpregs);
1764 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1766 memcpy(&fpu->fprs, vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs));
1767 fpu->fpc = vcpu->arch.guest_fpregs.fpc;
1771 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
1775 if (!is_vcpu_stopped(vcpu))
1778 vcpu->run->psw_mask = psw.mask;
1779 vcpu->run->psw_addr = psw.addr;
1784 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
1785 struct kvm_translation *tr)
1787 return -EINVAL; /* not implemented yet */
1790 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
1791 KVM_GUESTDBG_USE_HW_BP | \
1792 KVM_GUESTDBG_ENABLE)
1794 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
1795 struct kvm_guest_debug *dbg)
1799 vcpu->guest_debug = 0;
1800 kvm_s390_clear_bp_data(vcpu);
1802 if (dbg->control & ~VALID_GUESTDBG_FLAGS)
1805 if (dbg->control & KVM_GUESTDBG_ENABLE) {
1806 vcpu->guest_debug = dbg->control;
1807 /* enforce guest PER */
1808 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1810 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
1811 rc = kvm_s390_import_bp_data(vcpu, dbg);
1813 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1814 vcpu->arch.guestdbg.last_bp = 0;
1818 vcpu->guest_debug = 0;
1819 kvm_s390_clear_bp_data(vcpu);
1820 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1826 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
1827 struct kvm_mp_state *mp_state)
1829 /* CHECK_STOP and LOAD are not supported yet */
1830 return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
1831 KVM_MP_STATE_OPERATING;
1834 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
1835 struct kvm_mp_state *mp_state)
1839 /* user space knows about this interface - let it control the state */
1840 vcpu->kvm->arch.user_cpu_state_ctrl = 1;
1842 switch (mp_state->mp_state) {
1843 case KVM_MP_STATE_STOPPED:
1844 kvm_s390_vcpu_stop(vcpu);
1846 case KVM_MP_STATE_OPERATING:
1847 kvm_s390_vcpu_start(vcpu);
1849 case KVM_MP_STATE_LOAD:
1850 case KVM_MP_STATE_CHECK_STOP:
1851 /* fall through - CHECK_STOP and LOAD are not supported yet */
1859 static bool ibs_enabled(struct kvm_vcpu *vcpu)
1861 return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
1864 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
1867 kvm_s390_vcpu_request_handled(vcpu);
1868 if (!vcpu->requests)
1871 * We use MMU_RELOAD just to re-arm the ipte notifier for the
1872 * guest prefix page. gmap_ipte_notify will wait on the ptl lock.
1873 * This ensures that the ipte instruction for this request has
1874 * already finished. We might race against a second unmapper that
1875 * wants to set the blocking bit. Lets just retry the request loop.
1877 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
1879 rc = gmap_ipte_notify(vcpu->arch.gmap,
1880 kvm_s390_get_prefix(vcpu),
1887 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
1888 vcpu->arch.sie_block->ihcpu = 0xffff;
1892 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
1893 if (!ibs_enabled(vcpu)) {
1894 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
1895 atomic_or(CPUSTAT_IBS,
1896 &vcpu->arch.sie_block->cpuflags);
1901 if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
1902 if (ibs_enabled(vcpu)) {
1903 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
1904 atomic_andnot(CPUSTAT_IBS,
1905 &vcpu->arch.sie_block->cpuflags);
1910 /* nothing to do, just clear the request */
1911 clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
1916 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
1918 struct kvm_vcpu *vcpu;
1921 mutex_lock(&kvm->lock);
1923 kvm->arch.epoch = tod - get_tod_clock();
1924 kvm_s390_vcpu_block_all(kvm);
1925 kvm_for_each_vcpu(i, vcpu, kvm)
1926 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
1927 kvm_s390_vcpu_unblock_all(kvm);
1929 mutex_unlock(&kvm->lock);
1933 * kvm_arch_fault_in_page - fault-in guest page if necessary
1934 * @vcpu: The corresponding virtual cpu
1935 * @gpa: Guest physical address
1936 * @writable: Whether the page should be writable or not
1938 * Make sure that a guest page has been faulted-in on the host.
1940 * Return: Zero on success, negative error code otherwise.
1942 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
1944 return gmap_fault(vcpu->arch.gmap, gpa,
1945 writable ? FAULT_FLAG_WRITE : 0);
1948 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
1949 unsigned long token)
1951 struct kvm_s390_interrupt inti;
1952 struct kvm_s390_irq irq;
1955 irq.u.ext.ext_params2 = token;
1956 irq.type = KVM_S390_INT_PFAULT_INIT;
1957 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
1959 inti.type = KVM_S390_INT_PFAULT_DONE;
1960 inti.parm64 = token;
1961 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
1965 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
1966 struct kvm_async_pf *work)
1968 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
1969 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
1972 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
1973 struct kvm_async_pf *work)
1975 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
1976 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
1979 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
1980 struct kvm_async_pf *work)
1982 /* s390 will always inject the page directly */
1985 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
1988 * s390 will always inject the page directly,
1989 * but we still want check_async_completion to cleanup
1994 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
1997 struct kvm_arch_async_pf arch;
2000 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2002 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
2003 vcpu->arch.pfault_compare)
2005 if (psw_extint_disabled(vcpu))
2007 if (kvm_s390_vcpu_has_irq(vcpu, 0))
2009 if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
2011 if (!vcpu->arch.gmap->pfault_enabled)
2014 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
2015 hva += current->thread.gmap_addr & ~PAGE_MASK;
2016 if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
2019 rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
2023 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
2028 * On s390 notifications for arriving pages will be delivered directly
2029 * to the guest but the house keeping for completed pfaults is
2030 * handled outside the worker.
2032 kvm_check_async_pf_completion(vcpu);
2034 memcpy(&vcpu->arch.sie_block->gg14, &vcpu->run->s.regs.gprs[14], 16);
2039 if (test_cpu_flag(CIF_MCCK_PENDING))
2042 if (!kvm_is_ucontrol(vcpu->kvm)) {
2043 rc = kvm_s390_deliver_pending_interrupts(vcpu);
2048 rc = kvm_s390_handle_requests(vcpu);
2052 if (guestdbg_enabled(vcpu)) {
2053 kvm_s390_backup_guest_per_regs(vcpu);
2054 kvm_s390_patch_guest_per_regs(vcpu);
2057 vcpu->arch.sie_block->icptcode = 0;
2058 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
2059 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
2060 trace_kvm_s390_sie_enter(vcpu, cpuflags);
2065 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
2067 psw_t *psw = &vcpu->arch.sie_block->gpsw;
2071 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
2072 trace_kvm_s390_sie_fault(vcpu);
2075 * We want to inject an addressing exception, which is defined as a
2076 * suppressing or terminating exception. However, since we came here
2077 * by a DAT access exception, the PSW still points to the faulting
2078 * instruction since DAT exceptions are nullifying. So we've got
2079 * to look up the current opcode to get the length of the instruction
2080 * to be able to forward the PSW.
2082 rc = read_guest(vcpu, psw->addr, 0, &opcode, 1);
2084 return kvm_s390_inject_prog_cond(vcpu, rc);
2085 psw->addr = __rewind_psw(*psw, -insn_length(opcode));
2087 return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
2090 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
2092 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
2093 vcpu->arch.sie_block->icptcode);
2094 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
2096 if (guestdbg_enabled(vcpu))
2097 kvm_s390_restore_guest_per_regs(vcpu);
2099 memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16);
2101 if (vcpu->arch.sie_block->icptcode > 0) {
2102 int rc = kvm_handle_sie_intercept(vcpu);
2104 if (rc != -EOPNOTSUPP)
2106 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
2107 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
2108 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
2109 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
2111 } else if (exit_reason != -EFAULT) {
2112 vcpu->stat.exit_null++;
2114 } else if (kvm_is_ucontrol(vcpu->kvm)) {
2115 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
2116 vcpu->run->s390_ucontrol.trans_exc_code =
2117 current->thread.gmap_addr;
2118 vcpu->run->s390_ucontrol.pgm_code = 0x10;
2120 } else if (current->thread.gmap_pfault) {
2121 trace_kvm_s390_major_guest_pfault(vcpu);
2122 current->thread.gmap_pfault = 0;
2123 if (kvm_arch_setup_async_pf(vcpu))
2125 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
2127 return vcpu_post_run_fault_in_sie(vcpu);
2130 static int __vcpu_run(struct kvm_vcpu *vcpu)
2132 int rc, exit_reason;
2135 * We try to hold kvm->srcu during most of vcpu_run (except when run-
2136 * ning the guest), so that memslots (and other stuff) are protected
2138 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2141 rc = vcpu_pre_run(vcpu);
2145 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2147 * As PF_VCPU will be used in fault handler, between
2148 * guest_enter and guest_exit should be no uaccess.
2150 local_irq_disable();
2151 __kvm_guest_enter();
2153 exit_reason = sie64a(vcpu->arch.sie_block,
2154 vcpu->run->s.regs.gprs);
2155 local_irq_disable();
2158 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2160 rc = vcpu_post_run(vcpu, exit_reason);
2161 } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
2163 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2167 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2169 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
2170 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
2171 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
2172 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
2173 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
2174 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
2175 /* some control register changes require a tlb flush */
2176 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2178 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
2179 vcpu->arch.sie_block->cputm = kvm_run->s.regs.cputm;
2180 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
2181 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
2182 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
2183 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
2185 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
2186 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
2187 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
2188 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
2189 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2190 kvm_clear_async_pf_completion_queue(vcpu);
2192 kvm_run->kvm_dirty_regs = 0;
2195 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2197 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
2198 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
2199 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
2200 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
2201 kvm_run->s.regs.cputm = vcpu->arch.sie_block->cputm;
2202 kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
2203 kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
2204 kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
2205 kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
2206 kvm_run->s.regs.pft = vcpu->arch.pfault_token;
2207 kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
2208 kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
2211 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2216 if (guestdbg_exit_pending(vcpu)) {
2217 kvm_s390_prepare_debug_exit(vcpu);
2221 if (vcpu->sigset_active)
2222 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
2224 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
2225 kvm_s390_vcpu_start(vcpu);
2226 } else if (is_vcpu_stopped(vcpu)) {
2227 pr_err_ratelimited("can't run stopped vcpu %d\n",
2232 sync_regs(vcpu, kvm_run);
2235 rc = __vcpu_run(vcpu);
2237 if (signal_pending(current) && !rc) {
2238 kvm_run->exit_reason = KVM_EXIT_INTR;
2242 if (guestdbg_exit_pending(vcpu) && !rc) {
2243 kvm_s390_prepare_debug_exit(vcpu);
2247 if (rc == -EREMOTE) {
2248 /* userspace support is needed, kvm_run has been prepared */
2252 store_regs(vcpu, kvm_run);
2254 if (vcpu->sigset_active)
2255 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
2257 vcpu->stat.exit_userspace++;
2262 * store status at address
2263 * we use have two special cases:
2264 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
2265 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
2267 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
2269 unsigned char archmode = 1;
2274 if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
2275 if (write_guest_abs(vcpu, 163, &archmode, 1))
2277 gpa = SAVE_AREA_BASE;
2278 } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
2279 if (write_guest_real(vcpu, 163, &archmode, 1))
2281 gpa = kvm_s390_real_to_abs(vcpu, SAVE_AREA_BASE);
2283 rc = write_guest_abs(vcpu, gpa + offsetof(struct save_area, fp_regs),
2284 vcpu->arch.guest_fpregs.fprs, 128);
2285 rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, gp_regs),
2286 vcpu->run->s.regs.gprs, 128);
2287 rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, psw),
2288 &vcpu->arch.sie_block->gpsw, 16);
2289 px = kvm_s390_get_prefix(vcpu);
2290 rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, pref_reg),
2292 rc |= write_guest_abs(vcpu,
2293 gpa + offsetof(struct save_area, fp_ctrl_reg),
2294 &vcpu->arch.guest_fpregs.fpc, 4);
2295 rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, tod_reg),
2296 &vcpu->arch.sie_block->todpr, 4);
2297 rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, timer),
2298 &vcpu->arch.sie_block->cputm, 8);
2299 clkcomp = vcpu->arch.sie_block->ckc >> 8;
2300 rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, clk_cmp),
2302 rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, acc_regs),
2303 &vcpu->run->s.regs.acrs, 64);
2304 rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, ctrl_regs),
2305 &vcpu->arch.sie_block->gcr, 128);
2306 return rc ? -EFAULT : 0;
2309 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
2312 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
2313 * copying in vcpu load/put. Lets update our copies before we save
2314 * it into the save area
2317 if (test_kvm_facility(vcpu->kvm, 129)) {
2319 * If the vector extension is available, the vector registers
2320 * which overlaps with floating-point registers are saved in
2321 * the SIE-control block. Hence, extract the floating-point
2322 * registers and the FPC value and store them in the
2323 * guest_fpregs structure.
2325 vcpu->arch.guest_fpregs.fpc = current->thread.fpu.fpc;
2326 convert_vx_to_fp(vcpu->arch.guest_fpregs.fprs,
2327 current->thread.fpu.vxrs);
2329 save_fpu_to(&vcpu->arch.guest_fpregs);
2330 save_access_regs(vcpu->run->s.regs.acrs);
2332 return kvm_s390_store_status_unloaded(vcpu, addr);
2336 * store additional status at address
2338 int kvm_s390_store_adtl_status_unloaded(struct kvm_vcpu *vcpu,
2341 /* Only bits 0-53 are used for address formation */
2342 if (!(gpa & ~0x3ff))
2345 return write_guest_abs(vcpu, gpa & ~0x3ff,
2346 (void *)&vcpu->run->s.regs.vrs, 512);
2349 int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu *vcpu, unsigned long addr)
2351 if (!test_kvm_facility(vcpu->kvm, 129))
2355 * The guest VXRS are in the host VXRs due to the lazy
2356 * copying in vcpu load/put. We can simply call save_fpu_regs()
2357 * to save the current register state because we are in the
2358 * middle of a load/put cycle.
2360 * Let's update our copies before we save it into the save area.
2364 return kvm_s390_store_adtl_status_unloaded(vcpu, addr);
2367 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2369 kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
2370 kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
2373 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
2376 struct kvm_vcpu *vcpu;
2378 kvm_for_each_vcpu(i, vcpu, kvm) {
2379 __disable_ibs_on_vcpu(vcpu);
2383 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2385 kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
2386 kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
2389 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
2391 int i, online_vcpus, started_vcpus = 0;
2393 if (!is_vcpu_stopped(vcpu))
2396 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
2397 /* Only one cpu at a time may enter/leave the STOPPED state. */
2398 spin_lock(&vcpu->kvm->arch.start_stop_lock);
2399 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2401 for (i = 0; i < online_vcpus; i++) {
2402 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
2406 if (started_vcpus == 0) {
2407 /* we're the only active VCPU -> speed it up */
2408 __enable_ibs_on_vcpu(vcpu);
2409 } else if (started_vcpus == 1) {
2411 * As we are starting a second VCPU, we have to disable
2412 * the IBS facility on all VCPUs to remove potentially
2413 * oustanding ENABLE requests.
2415 __disable_ibs_on_all_vcpus(vcpu->kvm);
2418 atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2420 * Another VCPU might have used IBS while we were offline.
2421 * Let's play safe and flush the VCPU at startup.
2423 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2424 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2428 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
2430 int i, online_vcpus, started_vcpus = 0;
2431 struct kvm_vcpu *started_vcpu = NULL;
2433 if (is_vcpu_stopped(vcpu))
2436 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
2437 /* Only one cpu at a time may enter/leave the STOPPED state. */
2438 spin_lock(&vcpu->kvm->arch.start_stop_lock);
2439 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2441 /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
2442 kvm_s390_clear_stop_irq(vcpu);
2444 atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2445 __disable_ibs_on_vcpu(vcpu);
2447 for (i = 0; i < online_vcpus; i++) {
2448 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
2450 started_vcpu = vcpu->kvm->vcpus[i];
2454 if (started_vcpus == 1) {
2456 * As we only have one VCPU left, we want to enable the
2457 * IBS facility for that VCPU to speed it up.
2459 __enable_ibs_on_vcpu(started_vcpu);
2462 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2466 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
2467 struct kvm_enable_cap *cap)
2475 case KVM_CAP_S390_CSS_SUPPORT:
2476 if (!vcpu->kvm->arch.css_support) {
2477 vcpu->kvm->arch.css_support = 1;
2478 VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
2479 trace_kvm_s390_enable_css(vcpu->kvm);
2490 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
2491 struct kvm_s390_mem_op *mop)
2493 void __user *uaddr = (void __user *)mop->buf;
2494 void *tmpbuf = NULL;
2496 const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
2497 | KVM_S390_MEMOP_F_CHECK_ONLY;
2499 if (mop->flags & ~supported_flags)
2502 if (mop->size > MEM_OP_MAX_SIZE)
2505 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
2506 tmpbuf = vmalloc(mop->size);
2511 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2514 case KVM_S390_MEMOP_LOGICAL_READ:
2515 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2516 r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, false);
2519 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2521 if (copy_to_user(uaddr, tmpbuf, mop->size))
2525 case KVM_S390_MEMOP_LOGICAL_WRITE:
2526 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2527 r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, true);
2530 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
2534 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2540 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
2542 if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
2543 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
2549 long kvm_arch_vcpu_ioctl(struct file *filp,
2550 unsigned int ioctl, unsigned long arg)
2552 struct kvm_vcpu *vcpu = filp->private_data;
2553 void __user *argp = (void __user *)arg;
2558 case KVM_S390_IRQ: {
2559 struct kvm_s390_irq s390irq;
2562 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
2564 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
2567 case KVM_S390_INTERRUPT: {
2568 struct kvm_s390_interrupt s390int;
2569 struct kvm_s390_irq s390irq;
2572 if (copy_from_user(&s390int, argp, sizeof(s390int)))
2574 if (s390int_to_s390irq(&s390int, &s390irq))
2576 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
2579 case KVM_S390_STORE_STATUS:
2580 idx = srcu_read_lock(&vcpu->kvm->srcu);
2581 r = kvm_s390_vcpu_store_status(vcpu, arg);
2582 srcu_read_unlock(&vcpu->kvm->srcu, idx);
2584 case KVM_S390_SET_INITIAL_PSW: {
2588 if (copy_from_user(&psw, argp, sizeof(psw)))
2590 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
2593 case KVM_S390_INITIAL_RESET:
2594 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
2596 case KVM_SET_ONE_REG:
2597 case KVM_GET_ONE_REG: {
2598 struct kvm_one_reg reg;
2600 if (copy_from_user(®, argp, sizeof(reg)))
2602 if (ioctl == KVM_SET_ONE_REG)
2603 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, ®);
2605 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, ®);
2608 #ifdef CONFIG_KVM_S390_UCONTROL
2609 case KVM_S390_UCAS_MAP: {
2610 struct kvm_s390_ucas_mapping ucasmap;
2612 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
2617 if (!kvm_is_ucontrol(vcpu->kvm)) {
2622 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
2623 ucasmap.vcpu_addr, ucasmap.length);
2626 case KVM_S390_UCAS_UNMAP: {
2627 struct kvm_s390_ucas_mapping ucasmap;
2629 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
2634 if (!kvm_is_ucontrol(vcpu->kvm)) {
2639 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
2644 case KVM_S390_VCPU_FAULT: {
2645 r = gmap_fault(vcpu->arch.gmap, arg, 0);
2648 case KVM_ENABLE_CAP:
2650 struct kvm_enable_cap cap;
2652 if (copy_from_user(&cap, argp, sizeof(cap)))
2654 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
2657 case KVM_S390_MEM_OP: {
2658 struct kvm_s390_mem_op mem_op;
2660 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
2661 r = kvm_s390_guest_mem_op(vcpu, &mem_op);
2666 case KVM_S390_SET_IRQ_STATE: {
2667 struct kvm_s390_irq_state irq_state;
2670 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
2672 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
2673 irq_state.len == 0 ||
2674 irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
2678 r = kvm_s390_set_irq_state(vcpu,
2679 (void __user *) irq_state.buf,
2683 case KVM_S390_GET_IRQ_STATE: {
2684 struct kvm_s390_irq_state irq_state;
2687 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
2689 if (irq_state.len == 0) {
2693 r = kvm_s390_get_irq_state(vcpu,
2694 (__u8 __user *) irq_state.buf,
2704 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
2706 #ifdef CONFIG_KVM_S390_UCONTROL
2707 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
2708 && (kvm_is_ucontrol(vcpu->kvm))) {
2709 vmf->page = virt_to_page(vcpu->arch.sie_block);
2710 get_page(vmf->page);
2714 return VM_FAULT_SIGBUS;
2717 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
2718 unsigned long npages)
2723 /* Section: memory related */
2724 int kvm_arch_prepare_memory_region(struct kvm *kvm,
2725 struct kvm_memory_slot *memslot,
2726 const struct kvm_userspace_memory_region *mem,
2727 enum kvm_mr_change change)
2729 /* A few sanity checks. We can have memory slots which have to be
2730 located/ended at a segment boundary (1MB). The memory in userland is
2731 ok to be fragmented into various different vmas. It is okay to mmap()
2732 and munmap() stuff in this slot after doing this call at any time */
2734 if (mem->userspace_addr & 0xffffful)
2737 if (mem->memory_size & 0xffffful)
2743 void kvm_arch_commit_memory_region(struct kvm *kvm,
2744 const struct kvm_userspace_memory_region *mem,
2745 const struct kvm_memory_slot *old,
2746 const struct kvm_memory_slot *new,
2747 enum kvm_mr_change change)
2751 /* If the basics of the memslot do not change, we do not want
2752 * to update the gmap. Every update causes several unnecessary
2753 * segment translation exceptions. This is usually handled just
2754 * fine by the normal fault handler + gmap, but it will also
2755 * cause faults on the prefix page of running guest CPUs.
2757 if (old->userspace_addr == mem->userspace_addr &&
2758 old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
2759 old->npages * PAGE_SIZE == mem->memory_size)
2762 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
2763 mem->guest_phys_addr, mem->memory_size);
2765 pr_warn("failed to commit memory region\n");
2769 static int __init kvm_s390_init(void)
2771 return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
2774 static void __exit kvm_s390_exit(void)
2779 module_init(kvm_s390_init);
2780 module_exit(kvm_s390_exit);
2783 * Enable autoloading of the kvm module.
2784 * Note that we add the module alias here instead of virt/kvm/kvm_main.c
2785 * since x86 takes a different approach.
2787 #include <linux/miscdevice.h>
2788 MODULE_ALIAS_MISCDEV(KVM_MINOR);
2789 MODULE_ALIAS("devname:kvm");