2 * hosting zSeries kernel virtual machines
4 * Copyright IBM Corp. 2008, 2009
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
10 * Author(s): Carsten Otte <cotte@de.ibm.com>
11 * Christian Borntraeger <borntraeger@de.ibm.com>
12 * Heiko Carstens <heiko.carstens@de.ibm.com>
13 * Christian Ehrhardt <ehrhardt@de.ibm.com>
14 * Jason J. Herne <jjherne@us.ibm.com>
17 #include <linux/compiler.h>
18 #include <linux/err.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/module.h>
25 #include <linux/random.h>
26 #include <linux/slab.h>
27 #include <linux/timer.h>
28 #include <linux/vmalloc.h>
29 #include <asm/asm-offsets.h>
30 #include <asm/lowcore.h>
32 #include <asm/pgtable.h>
34 #include <asm/switch_to.h>
40 #define KMSG_COMPONENT "kvm-s390"
42 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
44 #define CREATE_TRACE_POINTS
46 #include "trace-s390.h"
48 #define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */
50 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
51 (KVM_MAX_VCPUS + LOCAL_IRQS))
53 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
55 struct kvm_stats_debugfs_item debugfs_entries[] = {
56 { "userspace_handled", VCPU_STAT(exit_userspace) },
57 { "exit_null", VCPU_STAT(exit_null) },
58 { "exit_validity", VCPU_STAT(exit_validity) },
59 { "exit_stop_request", VCPU_STAT(exit_stop_request) },
60 { "exit_external_request", VCPU_STAT(exit_external_request) },
61 { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
62 { "exit_instruction", VCPU_STAT(exit_instruction) },
63 { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
64 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
65 { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
66 { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
67 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
68 { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
69 { "instruction_lctl", VCPU_STAT(instruction_lctl) },
70 { "instruction_stctl", VCPU_STAT(instruction_stctl) },
71 { "instruction_stctg", VCPU_STAT(instruction_stctg) },
72 { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
73 { "deliver_external_call", VCPU_STAT(deliver_external_call) },
74 { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
75 { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
76 { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
77 { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
78 { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
79 { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
80 { "exit_wait_state", VCPU_STAT(exit_wait_state) },
81 { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
82 { "instruction_stidp", VCPU_STAT(instruction_stidp) },
83 { "instruction_spx", VCPU_STAT(instruction_spx) },
84 { "instruction_stpx", VCPU_STAT(instruction_stpx) },
85 { "instruction_stap", VCPU_STAT(instruction_stap) },
86 { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
87 { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
88 { "instruction_stsch", VCPU_STAT(instruction_stsch) },
89 { "instruction_chsc", VCPU_STAT(instruction_chsc) },
90 { "instruction_essa", VCPU_STAT(instruction_essa) },
91 { "instruction_stsi", VCPU_STAT(instruction_stsi) },
92 { "instruction_stfl", VCPU_STAT(instruction_stfl) },
93 { "instruction_tprot", VCPU_STAT(instruction_tprot) },
94 { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
95 { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
96 { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
97 { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
98 { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
99 { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
100 { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
101 { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
102 { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
103 { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
104 { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
105 { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
106 { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
107 { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
108 { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
109 { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
110 { "diagnose_10", VCPU_STAT(diagnose_10) },
111 { "diagnose_44", VCPU_STAT(diagnose_44) },
112 { "diagnose_9c", VCPU_STAT(diagnose_9c) },
113 { "diagnose_258", VCPU_STAT(diagnose_258) },
114 { "diagnose_308", VCPU_STAT(diagnose_308) },
115 { "diagnose_500", VCPU_STAT(diagnose_500) },
119 /* upper facilities limit for kvm */
120 unsigned long kvm_s390_fac_list_mask[] = {
121 0xffe6fffbfcfdfc40UL,
122 0x005e800000000000UL,
125 unsigned long kvm_s390_fac_list_mask_size(void)
127 BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
128 return ARRAY_SIZE(kvm_s390_fac_list_mask);
131 static struct gmap_notifier gmap_notifier;
132 debug_info_t *kvm_s390_dbf;
134 /* Section: not file related */
135 int kvm_arch_hardware_enable(void)
137 /* every s390 is virtualization enabled ;-) */
141 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address);
144 * This callback is executed during stop_machine(). All CPUs are therefore
145 * temporarily stopped. In order not to change guest behavior, we have to
146 * disable preemption whenever we touch the epoch of kvm and the VCPUs,
147 * so a CPU won't be stopped while calculating with the epoch.
149 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
153 struct kvm_vcpu *vcpu;
155 unsigned long long *delta = v;
157 list_for_each_entry(kvm, &vm_list, vm_list) {
158 kvm->arch.epoch -= *delta;
159 kvm_for_each_vcpu(i, vcpu, kvm) {
160 vcpu->arch.sie_block->epoch -= *delta;
166 static struct notifier_block kvm_clock_notifier = {
167 .notifier_call = kvm_clock_sync,
170 int kvm_arch_hardware_setup(void)
172 gmap_notifier.notifier_call = kvm_gmap_notifier;
173 gmap_register_ipte_notifier(&gmap_notifier);
174 atomic_notifier_chain_register(&s390_epoch_delta_notifier,
175 &kvm_clock_notifier);
179 void kvm_arch_hardware_unsetup(void)
181 gmap_unregister_ipte_notifier(&gmap_notifier);
182 atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
183 &kvm_clock_notifier);
186 int kvm_arch_init(void *opaque)
188 kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
192 if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
193 debug_unregister(kvm_s390_dbf);
197 /* Register floating interrupt controller interface. */
198 return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
201 void kvm_arch_exit(void)
203 debug_unregister(kvm_s390_dbf);
206 /* Section: device related */
207 long kvm_arch_dev_ioctl(struct file *filp,
208 unsigned int ioctl, unsigned long arg)
210 if (ioctl == KVM_S390_ENABLE_SIE)
211 return s390_enable_sie();
215 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
220 case KVM_CAP_S390_PSW:
221 case KVM_CAP_S390_GMAP:
222 case KVM_CAP_SYNC_MMU:
223 #ifdef CONFIG_KVM_S390_UCONTROL
224 case KVM_CAP_S390_UCONTROL:
226 case KVM_CAP_ASYNC_PF:
227 case KVM_CAP_SYNC_REGS:
228 case KVM_CAP_ONE_REG:
229 case KVM_CAP_ENABLE_CAP:
230 case KVM_CAP_S390_CSS_SUPPORT:
231 case KVM_CAP_IOEVENTFD:
232 case KVM_CAP_DEVICE_CTRL:
233 case KVM_CAP_ENABLE_CAP_VM:
234 case KVM_CAP_S390_IRQCHIP:
235 case KVM_CAP_VM_ATTRIBUTES:
236 case KVM_CAP_MP_STATE:
237 case KVM_CAP_S390_INJECT_IRQ:
238 case KVM_CAP_S390_USER_SIGP:
239 case KVM_CAP_S390_USER_STSI:
240 case KVM_CAP_S390_SKEYS:
241 case KVM_CAP_S390_IRQ_STATE:
244 case KVM_CAP_S390_MEM_OP:
247 case KVM_CAP_NR_VCPUS:
248 case KVM_CAP_MAX_VCPUS:
249 r = sclp.has_esca ? KVM_S390_ESCA_CPU_SLOTS
250 : KVM_S390_BSCA_CPU_SLOTS;
252 case KVM_CAP_NR_MEMSLOTS:
253 r = KVM_USER_MEM_SLOTS;
255 case KVM_CAP_S390_COW:
256 r = MACHINE_HAS_ESOP;
258 case KVM_CAP_S390_VECTOR_REGISTERS:
261 case KVM_CAP_S390_RI:
262 r = test_facility(64);
270 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
271 struct kvm_memory_slot *memslot)
273 gfn_t cur_gfn, last_gfn;
274 unsigned long address;
275 struct gmap *gmap = kvm->arch.gmap;
277 /* Loop over all guest pages */
278 last_gfn = memslot->base_gfn + memslot->npages;
279 for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
280 address = gfn_to_hva_memslot(memslot, cur_gfn);
282 if (gmap_test_and_clear_dirty(address, gmap))
283 mark_page_dirty(kvm, cur_gfn);
284 if (fatal_signal_pending(current))
290 /* Section: vm related */
291 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
294 * Get (and clear) the dirty memory log for a memory slot.
296 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
297 struct kvm_dirty_log *log)
301 struct kvm_memslots *slots;
302 struct kvm_memory_slot *memslot;
305 mutex_lock(&kvm->slots_lock);
308 if (log->slot >= KVM_USER_MEM_SLOTS)
311 slots = kvm_memslots(kvm);
312 memslot = id_to_memslot(slots, log->slot);
314 if (!memslot->dirty_bitmap)
317 kvm_s390_sync_dirty_log(kvm, memslot);
318 r = kvm_get_dirty_log(kvm, log, &is_dirty);
322 /* Clear the dirty log */
324 n = kvm_dirty_bitmap_bytes(memslot);
325 memset(memslot->dirty_bitmap, 0, n);
329 mutex_unlock(&kvm->slots_lock);
333 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
341 case KVM_CAP_S390_IRQCHIP:
342 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
343 kvm->arch.use_irqchip = 1;
346 case KVM_CAP_S390_USER_SIGP:
347 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
348 kvm->arch.user_sigp = 1;
351 case KVM_CAP_S390_VECTOR_REGISTERS:
352 mutex_lock(&kvm->lock);
353 if (atomic_read(&kvm->online_vcpus)) {
355 } else if (MACHINE_HAS_VX) {
356 set_kvm_facility(kvm->arch.model.fac->mask, 129);
357 set_kvm_facility(kvm->arch.model.fac->list, 129);
361 mutex_unlock(&kvm->lock);
362 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
363 r ? "(not available)" : "(success)");
365 case KVM_CAP_S390_RI:
367 mutex_lock(&kvm->lock);
368 if (atomic_read(&kvm->online_vcpus)) {
370 } else if (test_facility(64)) {
371 set_kvm_facility(kvm->arch.model.fac->mask, 64);
372 set_kvm_facility(kvm->arch.model.fac->list, 64);
375 mutex_unlock(&kvm->lock);
376 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
377 r ? "(not available)" : "(success)");
379 case KVM_CAP_S390_USER_STSI:
380 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
381 kvm->arch.user_stsi = 1;
391 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
395 switch (attr->attr) {
396 case KVM_S390_VM_MEM_LIMIT_SIZE:
398 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
399 kvm->arch.mem_limit);
400 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
410 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
414 switch (attr->attr) {
415 case KVM_S390_VM_MEM_ENABLE_CMMA:
416 /* enable CMMA only for z10 and later (EDAT_1) */
418 if (!MACHINE_IS_LPAR || !MACHINE_HAS_EDAT1)
422 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
423 mutex_lock(&kvm->lock);
424 if (atomic_read(&kvm->online_vcpus) == 0) {
425 kvm->arch.use_cmma = 1;
428 mutex_unlock(&kvm->lock);
430 case KVM_S390_VM_MEM_CLR_CMMA:
432 if (!kvm->arch.use_cmma)
435 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
436 mutex_lock(&kvm->lock);
437 idx = srcu_read_lock(&kvm->srcu);
438 s390_reset_cmma(kvm->arch.gmap->mm);
439 srcu_read_unlock(&kvm->srcu, idx);
440 mutex_unlock(&kvm->lock);
443 case KVM_S390_VM_MEM_LIMIT_SIZE: {
444 unsigned long new_limit;
446 if (kvm_is_ucontrol(kvm))
449 if (get_user(new_limit, (u64 __user *)attr->addr))
452 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
453 new_limit > kvm->arch.mem_limit)
459 /* gmap_alloc takes last usable address */
460 if (new_limit != KVM_S390_NO_MEM_LIMIT)
464 mutex_lock(&kvm->lock);
465 if (atomic_read(&kvm->online_vcpus) == 0) {
466 /* gmap_alloc will round the limit up */
467 struct gmap *new = gmap_alloc(current->mm, new_limit);
472 gmap_free(kvm->arch.gmap);
474 kvm->arch.gmap = new;
478 mutex_unlock(&kvm->lock);
479 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
480 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
481 (void *) kvm->arch.gmap->asce);
491 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
493 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
495 struct kvm_vcpu *vcpu;
498 if (!test_kvm_facility(kvm, 76))
501 mutex_lock(&kvm->lock);
502 switch (attr->attr) {
503 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
505 kvm->arch.crypto.crycb->aes_wrapping_key_mask,
506 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
507 kvm->arch.crypto.aes_kw = 1;
508 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
510 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
512 kvm->arch.crypto.crycb->dea_wrapping_key_mask,
513 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
514 kvm->arch.crypto.dea_kw = 1;
515 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
517 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
518 kvm->arch.crypto.aes_kw = 0;
519 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
520 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
521 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
523 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
524 kvm->arch.crypto.dea_kw = 0;
525 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
526 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
527 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
530 mutex_unlock(&kvm->lock);
534 kvm_for_each_vcpu(i, vcpu, kvm) {
535 kvm_s390_vcpu_crypto_setup(vcpu);
538 mutex_unlock(&kvm->lock);
542 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
546 if (copy_from_user(>od_high, (void __user *)attr->addr,
552 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
557 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
561 if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod)))
564 kvm_s390_set_tod_clock(kvm, gtod);
565 VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
569 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
576 switch (attr->attr) {
577 case KVM_S390_VM_TOD_HIGH:
578 ret = kvm_s390_set_tod_high(kvm, attr);
580 case KVM_S390_VM_TOD_LOW:
581 ret = kvm_s390_set_tod_low(kvm, attr);
590 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
594 if (copy_to_user((void __user *)attr->addr, >od_high,
597 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
602 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
606 gtod = kvm_s390_get_tod_clock_fast(kvm);
607 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod)))
609 VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
614 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
621 switch (attr->attr) {
622 case KVM_S390_VM_TOD_HIGH:
623 ret = kvm_s390_get_tod_high(kvm, attr);
625 case KVM_S390_VM_TOD_LOW:
626 ret = kvm_s390_get_tod_low(kvm, attr);
635 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
637 struct kvm_s390_vm_cpu_processor *proc;
640 mutex_lock(&kvm->lock);
641 if (atomic_read(&kvm->online_vcpus)) {
645 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
650 if (!copy_from_user(proc, (void __user *)attr->addr,
652 memcpy(&kvm->arch.model.cpu_id, &proc->cpuid,
653 sizeof(struct cpuid));
654 kvm->arch.model.ibc = proc->ibc;
655 memcpy(kvm->arch.model.fac->list, proc->fac_list,
656 S390_ARCH_FAC_LIST_SIZE_BYTE);
661 mutex_unlock(&kvm->lock);
665 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
669 switch (attr->attr) {
670 case KVM_S390_VM_CPU_PROCESSOR:
671 ret = kvm_s390_set_processor(kvm, attr);
677 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
679 struct kvm_s390_vm_cpu_processor *proc;
682 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
687 memcpy(&proc->cpuid, &kvm->arch.model.cpu_id, sizeof(struct cpuid));
688 proc->ibc = kvm->arch.model.ibc;
689 memcpy(&proc->fac_list, kvm->arch.model.fac->list, S390_ARCH_FAC_LIST_SIZE_BYTE);
690 if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
697 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
699 struct kvm_s390_vm_cpu_machine *mach;
702 mach = kzalloc(sizeof(*mach), GFP_KERNEL);
707 get_cpu_id((struct cpuid *) &mach->cpuid);
708 mach->ibc = sclp.ibc;
709 memcpy(&mach->fac_mask, kvm->arch.model.fac->mask,
710 S390_ARCH_FAC_LIST_SIZE_BYTE);
711 memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
712 S390_ARCH_FAC_LIST_SIZE_BYTE);
713 if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
720 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
724 switch (attr->attr) {
725 case KVM_S390_VM_CPU_PROCESSOR:
726 ret = kvm_s390_get_processor(kvm, attr);
728 case KVM_S390_VM_CPU_MACHINE:
729 ret = kvm_s390_get_machine(kvm, attr);
735 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
739 switch (attr->group) {
740 case KVM_S390_VM_MEM_CTRL:
741 ret = kvm_s390_set_mem_control(kvm, attr);
743 case KVM_S390_VM_TOD:
744 ret = kvm_s390_set_tod(kvm, attr);
746 case KVM_S390_VM_CPU_MODEL:
747 ret = kvm_s390_set_cpu_model(kvm, attr);
749 case KVM_S390_VM_CRYPTO:
750 ret = kvm_s390_vm_set_crypto(kvm, attr);
760 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
764 switch (attr->group) {
765 case KVM_S390_VM_MEM_CTRL:
766 ret = kvm_s390_get_mem_control(kvm, attr);
768 case KVM_S390_VM_TOD:
769 ret = kvm_s390_get_tod(kvm, attr);
771 case KVM_S390_VM_CPU_MODEL:
772 ret = kvm_s390_get_cpu_model(kvm, attr);
782 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
786 switch (attr->group) {
787 case KVM_S390_VM_MEM_CTRL:
788 switch (attr->attr) {
789 case KVM_S390_VM_MEM_ENABLE_CMMA:
790 case KVM_S390_VM_MEM_CLR_CMMA:
791 case KVM_S390_VM_MEM_LIMIT_SIZE:
799 case KVM_S390_VM_TOD:
800 switch (attr->attr) {
801 case KVM_S390_VM_TOD_LOW:
802 case KVM_S390_VM_TOD_HIGH:
810 case KVM_S390_VM_CPU_MODEL:
811 switch (attr->attr) {
812 case KVM_S390_VM_CPU_PROCESSOR:
813 case KVM_S390_VM_CPU_MACHINE:
821 case KVM_S390_VM_CRYPTO:
822 switch (attr->attr) {
823 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
824 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
825 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
826 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
842 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
846 unsigned long curkey;
849 if (args->flags != 0)
852 /* Is this guest using storage keys? */
853 if (!mm_use_skey(current->mm))
854 return KVM_S390_GET_SKEYS_NONE;
856 /* Enforce sane limit on memory allocation */
857 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
860 keys = kmalloc_array(args->count, sizeof(uint8_t),
861 GFP_KERNEL | __GFP_NOWARN);
863 keys = vmalloc(sizeof(uint8_t) * args->count);
867 for (i = 0; i < args->count; i++) {
868 hva = gfn_to_hva(kvm, args->start_gfn + i);
869 if (kvm_is_error_hva(hva)) {
874 curkey = get_guest_storage_key(current->mm, hva);
875 if (IS_ERR_VALUE(curkey)) {
882 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
883 sizeof(uint8_t) * args->count);
891 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
897 if (args->flags != 0)
900 /* Enforce sane limit on memory allocation */
901 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
904 keys = kmalloc_array(args->count, sizeof(uint8_t),
905 GFP_KERNEL | __GFP_NOWARN);
907 keys = vmalloc(sizeof(uint8_t) * args->count);
911 r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
912 sizeof(uint8_t) * args->count);
918 /* Enable storage key handling for the guest */
919 r = s390_enable_skey();
923 for (i = 0; i < args->count; i++) {
924 hva = gfn_to_hva(kvm, args->start_gfn + i);
925 if (kvm_is_error_hva(hva)) {
930 /* Lowest order bit is reserved */
931 if (keys[i] & 0x01) {
936 r = set_guest_storage_key(current->mm, hva,
937 (unsigned long)keys[i], 0);
946 long kvm_arch_vm_ioctl(struct file *filp,
947 unsigned int ioctl, unsigned long arg)
949 struct kvm *kvm = filp->private_data;
950 void __user *argp = (void __user *)arg;
951 struct kvm_device_attr attr;
955 case KVM_S390_INTERRUPT: {
956 struct kvm_s390_interrupt s390int;
959 if (copy_from_user(&s390int, argp, sizeof(s390int)))
961 r = kvm_s390_inject_vm(kvm, &s390int);
964 case KVM_ENABLE_CAP: {
965 struct kvm_enable_cap cap;
967 if (copy_from_user(&cap, argp, sizeof(cap)))
969 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
972 case KVM_CREATE_IRQCHIP: {
973 struct kvm_irq_routing_entry routing;
976 if (kvm->arch.use_irqchip) {
977 /* Set up dummy routing. */
978 memset(&routing, 0, sizeof(routing));
979 r = kvm_set_irq_routing(kvm, &routing, 0, 0);
983 case KVM_SET_DEVICE_ATTR: {
985 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
987 r = kvm_s390_vm_set_attr(kvm, &attr);
990 case KVM_GET_DEVICE_ATTR: {
992 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
994 r = kvm_s390_vm_get_attr(kvm, &attr);
997 case KVM_HAS_DEVICE_ATTR: {
999 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1001 r = kvm_s390_vm_has_attr(kvm, &attr);
1004 case KVM_S390_GET_SKEYS: {
1005 struct kvm_s390_skeys args;
1008 if (copy_from_user(&args, argp,
1009 sizeof(struct kvm_s390_skeys)))
1011 r = kvm_s390_get_skeys(kvm, &args);
1014 case KVM_S390_SET_SKEYS: {
1015 struct kvm_s390_skeys args;
1018 if (copy_from_user(&args, argp,
1019 sizeof(struct kvm_s390_skeys)))
1021 r = kvm_s390_set_skeys(kvm, &args);
1031 static int kvm_s390_query_ap_config(u8 *config)
1033 u32 fcn_code = 0x04000000UL;
1036 memset(config, 0, 128);
1040 ".long 0xb2af0000\n" /* PQAP(QCI) */
1046 : "r" (fcn_code), "r" (config)
1047 : "cc", "0", "2", "memory"
1053 static int kvm_s390_apxa_installed(void)
1058 if (test_facility(12)) {
1059 cc = kvm_s390_query_ap_config(config);
1062 pr_err("PQAP(QCI) failed with cc=%d", cc);
1064 return config[0] & 0x40;
1070 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1072 kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1074 if (kvm_s390_apxa_installed())
1075 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1077 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1080 static void kvm_s390_get_cpu_id(struct cpuid *cpu_id)
1083 cpu_id->version = 0xff;
1086 static int kvm_s390_crypto_init(struct kvm *kvm)
1088 if (!test_kvm_facility(kvm, 76))
1091 kvm->arch.crypto.crycb = kzalloc(sizeof(*kvm->arch.crypto.crycb),
1092 GFP_KERNEL | GFP_DMA);
1093 if (!kvm->arch.crypto.crycb)
1096 kvm_s390_set_crycb_format(kvm);
1098 /* Enable AES/DEA protected key functions by default */
1099 kvm->arch.crypto.aes_kw = 1;
1100 kvm->arch.crypto.dea_kw = 1;
1101 get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1102 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1103 get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1104 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1109 static void sca_dispose(struct kvm *kvm)
1111 if (kvm->arch.use_esca)
1112 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1114 free_page((unsigned long)(kvm->arch.sca));
1115 kvm->arch.sca = NULL;
1118 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1121 char debug_name[16];
1122 static unsigned long sca_offset;
1125 #ifdef CONFIG_KVM_S390_UCONTROL
1126 if (type & ~KVM_VM_S390_UCONTROL)
1128 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1135 rc = s390_enable_sie();
1141 kvm->arch.use_esca = 0; /* start with basic SCA */
1142 rwlock_init(&kvm->arch.sca_lock);
1143 kvm->arch.sca = (struct bsca_block *) get_zeroed_page(GFP_KERNEL);
1146 spin_lock(&kvm_lock);
1148 if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1150 kvm->arch.sca = (struct bsca_block *)
1151 ((char *) kvm->arch.sca + sca_offset);
1152 spin_unlock(&kvm_lock);
1154 sprintf(debug_name, "kvm-%u", current->pid);
1156 kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1161 * The architectural maximum amount of facilities is 16 kbit. To store
1162 * this amount, 2 kbyte of memory is required. Thus we need a full
1163 * page to hold the guest facility list (arch.model.fac->list) and the
1164 * facility mask (arch.model.fac->mask). Its address size has to be
1165 * 31 bits and word aligned.
1167 kvm->arch.model.fac =
1168 (struct kvm_s390_fac *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1169 if (!kvm->arch.model.fac)
1172 /* Populate the facility mask initially. */
1173 memcpy(kvm->arch.model.fac->mask, S390_lowcore.stfle_fac_list,
1174 S390_ARCH_FAC_LIST_SIZE_BYTE);
1175 for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1176 if (i < kvm_s390_fac_list_mask_size())
1177 kvm->arch.model.fac->mask[i] &= kvm_s390_fac_list_mask[i];
1179 kvm->arch.model.fac->mask[i] = 0UL;
1182 /* Populate the facility list initially. */
1183 memcpy(kvm->arch.model.fac->list, kvm->arch.model.fac->mask,
1184 S390_ARCH_FAC_LIST_SIZE_BYTE);
1186 kvm_s390_get_cpu_id(&kvm->arch.model.cpu_id);
1187 kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1189 if (kvm_s390_crypto_init(kvm) < 0)
1192 spin_lock_init(&kvm->arch.float_int.lock);
1193 for (i = 0; i < FIRQ_LIST_COUNT; i++)
1194 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1195 init_waitqueue_head(&kvm->arch.ipte_wq);
1196 mutex_init(&kvm->arch.ipte_mutex);
1198 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1199 VM_EVENT(kvm, 3, "vm created with type %lu", type);
1201 if (type & KVM_VM_S390_UCONTROL) {
1202 kvm->arch.gmap = NULL;
1203 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1205 if (sclp.hamax == U64_MAX)
1206 kvm->arch.mem_limit = TASK_MAX_SIZE;
1208 kvm->arch.mem_limit = min_t(unsigned long, TASK_MAX_SIZE,
1210 kvm->arch.gmap = gmap_alloc(current->mm, kvm->arch.mem_limit - 1);
1211 if (!kvm->arch.gmap)
1213 kvm->arch.gmap->private = kvm;
1214 kvm->arch.gmap->pfault_enabled = 0;
1217 kvm->arch.css_support = 0;
1218 kvm->arch.use_irqchip = 0;
1219 kvm->arch.epoch = 0;
1221 spin_lock_init(&kvm->arch.start_stop_lock);
1222 KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1226 kfree(kvm->arch.crypto.crycb);
1227 free_page((unsigned long)kvm->arch.model.fac);
1228 debug_unregister(kvm->arch.dbf);
1230 KVM_EVENT(3, "creation of vm failed: %d", rc);
1234 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1236 VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1237 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1238 kvm_s390_clear_local_irqs(vcpu);
1239 kvm_clear_async_pf_completion_queue(vcpu);
1240 if (!kvm_is_ucontrol(vcpu->kvm))
1243 if (kvm_is_ucontrol(vcpu->kvm))
1244 gmap_free(vcpu->arch.gmap);
1246 if (vcpu->kvm->arch.use_cmma)
1247 kvm_s390_vcpu_unsetup_cmma(vcpu);
1248 free_page((unsigned long)(vcpu->arch.sie_block));
1250 kvm_vcpu_uninit(vcpu);
1251 kmem_cache_free(kvm_vcpu_cache, vcpu);
1254 static void kvm_free_vcpus(struct kvm *kvm)
1257 struct kvm_vcpu *vcpu;
1259 kvm_for_each_vcpu(i, vcpu, kvm)
1260 kvm_arch_vcpu_destroy(vcpu);
1262 mutex_lock(&kvm->lock);
1263 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1264 kvm->vcpus[i] = NULL;
1266 atomic_set(&kvm->online_vcpus, 0);
1267 mutex_unlock(&kvm->lock);
1270 void kvm_arch_destroy_vm(struct kvm *kvm)
1272 kvm_free_vcpus(kvm);
1273 free_page((unsigned long)kvm->arch.model.fac);
1275 debug_unregister(kvm->arch.dbf);
1276 kfree(kvm->arch.crypto.crycb);
1277 if (!kvm_is_ucontrol(kvm))
1278 gmap_free(kvm->arch.gmap);
1279 kvm_s390_destroy_adapters(kvm);
1280 kvm_s390_clear_float_irqs(kvm);
1281 KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
1284 /* Section: vcpu related */
1285 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1287 vcpu->arch.gmap = gmap_alloc(current->mm, -1UL);
1288 if (!vcpu->arch.gmap)
1290 vcpu->arch.gmap->private = vcpu->kvm;
1295 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
1297 read_lock(&vcpu->kvm->arch.sca_lock);
1298 if (vcpu->kvm->arch.use_esca) {
1299 struct esca_block *sca = vcpu->kvm->arch.sca;
1301 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1302 sca->cpu[vcpu->vcpu_id].sda = 0;
1304 struct bsca_block *sca = vcpu->kvm->arch.sca;
1306 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1307 sca->cpu[vcpu->vcpu_id].sda = 0;
1309 read_unlock(&vcpu->kvm->arch.sca_lock);
1312 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
1314 read_lock(&vcpu->kvm->arch.sca_lock);
1315 if (vcpu->kvm->arch.use_esca) {
1316 struct esca_block *sca = vcpu->kvm->arch.sca;
1318 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1319 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1320 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
1321 vcpu->arch.sie_block->ecb2 |= 0x04U;
1322 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1324 struct bsca_block *sca = vcpu->kvm->arch.sca;
1326 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1327 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1328 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
1329 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1331 read_unlock(&vcpu->kvm->arch.sca_lock);
1334 /* Basic SCA to Extended SCA data copy routines */
1335 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
1338 d->sigp_ctrl.c = s->sigp_ctrl.c;
1339 d->sigp_ctrl.scn = s->sigp_ctrl.scn;
1342 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
1346 d->ipte_control = s->ipte_control;
1348 for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
1349 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
1352 static int sca_switch_to_extended(struct kvm *kvm)
1354 struct bsca_block *old_sca = kvm->arch.sca;
1355 struct esca_block *new_sca;
1356 struct kvm_vcpu *vcpu;
1357 unsigned int vcpu_idx;
1360 new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
1364 scaoh = (u32)((u64)(new_sca) >> 32);
1365 scaol = (u32)(u64)(new_sca) & ~0x3fU;
1367 kvm_s390_vcpu_block_all(kvm);
1368 write_lock(&kvm->arch.sca_lock);
1370 sca_copy_b_to_e(new_sca, old_sca);
1372 kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
1373 vcpu->arch.sie_block->scaoh = scaoh;
1374 vcpu->arch.sie_block->scaol = scaol;
1375 vcpu->arch.sie_block->ecb2 |= 0x04U;
1377 kvm->arch.sca = new_sca;
1378 kvm->arch.use_esca = 1;
1380 write_unlock(&kvm->arch.sca_lock);
1381 kvm_s390_vcpu_unblock_all(kvm);
1383 free_page((unsigned long)old_sca);
1385 VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
1386 old_sca, kvm->arch.sca);
1390 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
1394 if (id < KVM_S390_BSCA_CPU_SLOTS)
1399 mutex_lock(&kvm->lock);
1400 rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
1401 mutex_unlock(&kvm->lock);
1403 return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
1406 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1408 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1409 kvm_clear_async_pf_completion_queue(vcpu);
1410 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
1416 if (test_kvm_facility(vcpu->kvm, 64))
1417 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
1418 /* fprs can be synchronized via vrs, even if the guest has no vx. With
1419 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
1422 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
1424 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
1426 if (kvm_is_ucontrol(vcpu->kvm))
1427 return __kvm_ucontrol_vcpu_init(vcpu);
1432 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1434 /* Save host register state */
1436 vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
1437 vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
1440 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
1442 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
1443 current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
1444 if (test_fp_ctl(current->thread.fpu.fpc))
1445 /* User space provided an invalid FPC, let's clear it */
1446 current->thread.fpu.fpc = 0;
1448 save_access_regs(vcpu->arch.host_acrs);
1449 restore_access_regs(vcpu->run->s.regs.acrs);
1450 gmap_enable(vcpu->arch.gmap);
1451 atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1454 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1456 atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1457 gmap_disable(vcpu->arch.gmap);
1459 /* Save guest register state */
1461 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
1463 /* Restore host register state */
1464 current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
1465 current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
1467 save_access_regs(vcpu->run->s.regs.acrs);
1468 restore_access_regs(vcpu->arch.host_acrs);
1471 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
1473 /* this equals initial cpu reset in pop, but we don't switch to ESA */
1474 vcpu->arch.sie_block->gpsw.mask = 0UL;
1475 vcpu->arch.sie_block->gpsw.addr = 0UL;
1476 kvm_s390_set_prefix(vcpu, 0);
1477 vcpu->arch.sie_block->cputm = 0UL;
1478 vcpu->arch.sie_block->ckc = 0UL;
1479 vcpu->arch.sie_block->todpr = 0;
1480 memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
1481 vcpu->arch.sie_block->gcr[0] = 0xE0UL;
1482 vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
1483 /* make sure the new fpc will be lazily loaded */
1485 current->thread.fpu.fpc = 0;
1486 vcpu->arch.sie_block->gbea = 1;
1487 vcpu->arch.sie_block->pp = 0;
1488 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1489 kvm_clear_async_pf_completion_queue(vcpu);
1490 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
1491 kvm_s390_vcpu_stop(vcpu);
1492 kvm_s390_clear_local_irqs(vcpu);
1495 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
1497 mutex_lock(&vcpu->kvm->lock);
1499 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
1501 mutex_unlock(&vcpu->kvm->lock);
1502 if (!kvm_is_ucontrol(vcpu->kvm)) {
1503 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
1509 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
1511 if (!test_kvm_facility(vcpu->kvm, 76))
1514 vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
1516 if (vcpu->kvm->arch.crypto.aes_kw)
1517 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
1518 if (vcpu->kvm->arch.crypto.dea_kw)
1519 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
1521 vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
1524 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
1526 free_page(vcpu->arch.sie_block->cbrlo);
1527 vcpu->arch.sie_block->cbrlo = 0;
1530 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
1532 vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
1533 if (!vcpu->arch.sie_block->cbrlo)
1536 vcpu->arch.sie_block->ecb2 |= 0x80;
1537 vcpu->arch.sie_block->ecb2 &= ~0x08;
1541 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
1543 struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
1545 vcpu->arch.cpu_id = model->cpu_id;
1546 vcpu->arch.sie_block->ibc = model->ibc;
1547 vcpu->arch.sie_block->fac = (int) (long) model->fac->list;
1550 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1554 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
1558 if (test_kvm_facility(vcpu->kvm, 78))
1559 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
1560 else if (test_kvm_facility(vcpu->kvm, 8))
1561 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
1563 kvm_s390_vcpu_setup_model(vcpu);
1565 vcpu->arch.sie_block->ecb = 6;
1566 if (test_kvm_facility(vcpu->kvm, 50) && test_kvm_facility(vcpu->kvm, 73))
1567 vcpu->arch.sie_block->ecb |= 0x10;
1569 vcpu->arch.sie_block->ecb2 = 8;
1570 vcpu->arch.sie_block->eca = 0xC1002000U;
1572 vcpu->arch.sie_block->eca |= 1;
1573 if (sclp.has_sigpif)
1574 vcpu->arch.sie_block->eca |= 0x10000000U;
1575 if (test_kvm_facility(vcpu->kvm, 64))
1576 vcpu->arch.sie_block->ecb3 |= 0x01;
1577 if (test_kvm_facility(vcpu->kvm, 129)) {
1578 vcpu->arch.sie_block->eca |= 0x00020000;
1579 vcpu->arch.sie_block->ecd |= 0x20000000;
1581 vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
1582 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
1584 if (vcpu->kvm->arch.use_cmma) {
1585 rc = kvm_s390_vcpu_setup_cmma(vcpu);
1589 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1590 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
1592 kvm_s390_vcpu_crypto_setup(vcpu);
1597 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
1600 struct kvm_vcpu *vcpu;
1601 struct sie_page *sie_page;
1604 if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
1609 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
1613 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
1617 vcpu->arch.sie_block = &sie_page->sie_block;
1618 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
1620 vcpu->arch.sie_block->icpua = id;
1621 spin_lock_init(&vcpu->arch.local_int.lock);
1622 vcpu->arch.local_int.float_int = &kvm->arch.float_int;
1623 vcpu->arch.local_int.wq = &vcpu->wq;
1624 vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
1626 rc = kvm_vcpu_init(vcpu, kvm, id);
1628 goto out_free_sie_block;
1629 VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
1630 vcpu->arch.sie_block);
1631 trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
1635 free_page((unsigned long)(vcpu->arch.sie_block));
1637 kmem_cache_free(kvm_vcpu_cache, vcpu);
1642 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
1644 return kvm_s390_vcpu_has_irq(vcpu, 0);
1647 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
1649 atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1653 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
1655 atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1658 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
1660 atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
1664 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
1666 atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
1670 * Kick a guest cpu out of SIE and wait until SIE is not running.
1671 * If the CPU is not running (e.g. waiting as idle) the function will
1672 * return immediately. */
1673 void exit_sie(struct kvm_vcpu *vcpu)
1675 atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
1676 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
1680 /* Kick a guest cpu out of SIE to process a request synchronously */
1681 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
1683 kvm_make_request(req, vcpu);
1684 kvm_s390_vcpu_request(vcpu);
1687 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address)
1690 struct kvm *kvm = gmap->private;
1691 struct kvm_vcpu *vcpu;
1693 kvm_for_each_vcpu(i, vcpu, kvm) {
1694 /* match against both prefix pages */
1695 if (kvm_s390_get_prefix(vcpu) == (address & ~0x1000UL)) {
1696 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx", address);
1697 kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
1702 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
1704 /* kvm common code refers to this, but never calls it */
1709 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
1710 struct kvm_one_reg *reg)
1715 case KVM_REG_S390_TODPR:
1716 r = put_user(vcpu->arch.sie_block->todpr,
1717 (u32 __user *)reg->addr);
1719 case KVM_REG_S390_EPOCHDIFF:
1720 r = put_user(vcpu->arch.sie_block->epoch,
1721 (u64 __user *)reg->addr);
1723 case KVM_REG_S390_CPU_TIMER:
1724 r = put_user(vcpu->arch.sie_block->cputm,
1725 (u64 __user *)reg->addr);
1727 case KVM_REG_S390_CLOCK_COMP:
1728 r = put_user(vcpu->arch.sie_block->ckc,
1729 (u64 __user *)reg->addr);
1731 case KVM_REG_S390_PFTOKEN:
1732 r = put_user(vcpu->arch.pfault_token,
1733 (u64 __user *)reg->addr);
1735 case KVM_REG_S390_PFCOMPARE:
1736 r = put_user(vcpu->arch.pfault_compare,
1737 (u64 __user *)reg->addr);
1739 case KVM_REG_S390_PFSELECT:
1740 r = put_user(vcpu->arch.pfault_select,
1741 (u64 __user *)reg->addr);
1743 case KVM_REG_S390_PP:
1744 r = put_user(vcpu->arch.sie_block->pp,
1745 (u64 __user *)reg->addr);
1747 case KVM_REG_S390_GBEA:
1748 r = put_user(vcpu->arch.sie_block->gbea,
1749 (u64 __user *)reg->addr);
1758 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
1759 struct kvm_one_reg *reg)
1764 case KVM_REG_S390_TODPR:
1765 r = get_user(vcpu->arch.sie_block->todpr,
1766 (u32 __user *)reg->addr);
1768 case KVM_REG_S390_EPOCHDIFF:
1769 r = get_user(vcpu->arch.sie_block->epoch,
1770 (u64 __user *)reg->addr);
1772 case KVM_REG_S390_CPU_TIMER:
1773 r = get_user(vcpu->arch.sie_block->cputm,
1774 (u64 __user *)reg->addr);
1776 case KVM_REG_S390_CLOCK_COMP:
1777 r = get_user(vcpu->arch.sie_block->ckc,
1778 (u64 __user *)reg->addr);
1780 case KVM_REG_S390_PFTOKEN:
1781 r = get_user(vcpu->arch.pfault_token,
1782 (u64 __user *)reg->addr);
1783 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
1784 kvm_clear_async_pf_completion_queue(vcpu);
1786 case KVM_REG_S390_PFCOMPARE:
1787 r = get_user(vcpu->arch.pfault_compare,
1788 (u64 __user *)reg->addr);
1790 case KVM_REG_S390_PFSELECT:
1791 r = get_user(vcpu->arch.pfault_select,
1792 (u64 __user *)reg->addr);
1794 case KVM_REG_S390_PP:
1795 r = get_user(vcpu->arch.sie_block->pp,
1796 (u64 __user *)reg->addr);
1798 case KVM_REG_S390_GBEA:
1799 r = get_user(vcpu->arch.sie_block->gbea,
1800 (u64 __user *)reg->addr);
1809 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
1811 kvm_s390_vcpu_initial_reset(vcpu);
1815 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1817 memcpy(&vcpu->run->s.regs.gprs, ®s->gprs, sizeof(regs->gprs));
1821 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1823 memcpy(®s->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
1827 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
1828 struct kvm_sregs *sregs)
1830 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
1831 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
1832 restore_access_regs(vcpu->run->s.regs.acrs);
1836 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
1837 struct kvm_sregs *sregs)
1839 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
1840 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
1844 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1846 /* make sure the new values will be lazily loaded */
1848 if (test_fp_ctl(fpu->fpc))
1850 current->thread.fpu.fpc = fpu->fpc;
1852 convert_fp_to_vx(current->thread.fpu.vxrs, (freg_t *)fpu->fprs);
1854 memcpy(current->thread.fpu.fprs, &fpu->fprs, sizeof(fpu->fprs));
1858 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1860 /* make sure we have the latest values */
1863 convert_vx_to_fp((freg_t *)fpu->fprs, current->thread.fpu.vxrs);
1865 memcpy(fpu->fprs, current->thread.fpu.fprs, sizeof(fpu->fprs));
1866 fpu->fpc = current->thread.fpu.fpc;
1870 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
1874 if (!is_vcpu_stopped(vcpu))
1877 vcpu->run->psw_mask = psw.mask;
1878 vcpu->run->psw_addr = psw.addr;
1883 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
1884 struct kvm_translation *tr)
1886 return -EINVAL; /* not implemented yet */
1889 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
1890 KVM_GUESTDBG_USE_HW_BP | \
1891 KVM_GUESTDBG_ENABLE)
1893 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
1894 struct kvm_guest_debug *dbg)
1898 vcpu->guest_debug = 0;
1899 kvm_s390_clear_bp_data(vcpu);
1901 if (dbg->control & ~VALID_GUESTDBG_FLAGS)
1904 if (dbg->control & KVM_GUESTDBG_ENABLE) {
1905 vcpu->guest_debug = dbg->control;
1906 /* enforce guest PER */
1907 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1909 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
1910 rc = kvm_s390_import_bp_data(vcpu, dbg);
1912 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1913 vcpu->arch.guestdbg.last_bp = 0;
1917 vcpu->guest_debug = 0;
1918 kvm_s390_clear_bp_data(vcpu);
1919 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1925 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
1926 struct kvm_mp_state *mp_state)
1928 /* CHECK_STOP and LOAD are not supported yet */
1929 return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
1930 KVM_MP_STATE_OPERATING;
1933 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
1934 struct kvm_mp_state *mp_state)
1938 /* user space knows about this interface - let it control the state */
1939 vcpu->kvm->arch.user_cpu_state_ctrl = 1;
1941 switch (mp_state->mp_state) {
1942 case KVM_MP_STATE_STOPPED:
1943 kvm_s390_vcpu_stop(vcpu);
1945 case KVM_MP_STATE_OPERATING:
1946 kvm_s390_vcpu_start(vcpu);
1948 case KVM_MP_STATE_LOAD:
1949 case KVM_MP_STATE_CHECK_STOP:
1950 /* fall through - CHECK_STOP and LOAD are not supported yet */
1958 static bool ibs_enabled(struct kvm_vcpu *vcpu)
1960 return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
1963 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
1966 kvm_s390_vcpu_request_handled(vcpu);
1967 if (!vcpu->requests)
1970 * We use MMU_RELOAD just to re-arm the ipte notifier for the
1971 * guest prefix page. gmap_ipte_notify will wait on the ptl lock.
1972 * This ensures that the ipte instruction for this request has
1973 * already finished. We might race against a second unmapper that
1974 * wants to set the blocking bit. Lets just retry the request loop.
1976 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
1978 rc = gmap_ipte_notify(vcpu->arch.gmap,
1979 kvm_s390_get_prefix(vcpu),
1986 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
1987 vcpu->arch.sie_block->ihcpu = 0xffff;
1991 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
1992 if (!ibs_enabled(vcpu)) {
1993 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
1994 atomic_or(CPUSTAT_IBS,
1995 &vcpu->arch.sie_block->cpuflags);
2000 if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2001 if (ibs_enabled(vcpu)) {
2002 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2003 atomic_andnot(CPUSTAT_IBS,
2004 &vcpu->arch.sie_block->cpuflags);
2009 /* nothing to do, just clear the request */
2010 clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
2015 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2017 struct kvm_vcpu *vcpu;
2020 mutex_lock(&kvm->lock);
2022 kvm->arch.epoch = tod - get_tod_clock();
2023 kvm_s390_vcpu_block_all(kvm);
2024 kvm_for_each_vcpu(i, vcpu, kvm)
2025 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2026 kvm_s390_vcpu_unblock_all(kvm);
2028 mutex_unlock(&kvm->lock);
2032 * kvm_arch_fault_in_page - fault-in guest page if necessary
2033 * @vcpu: The corresponding virtual cpu
2034 * @gpa: Guest physical address
2035 * @writable: Whether the page should be writable or not
2037 * Make sure that a guest page has been faulted-in on the host.
2039 * Return: Zero on success, negative error code otherwise.
2041 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
2043 return gmap_fault(vcpu->arch.gmap, gpa,
2044 writable ? FAULT_FLAG_WRITE : 0);
2047 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
2048 unsigned long token)
2050 struct kvm_s390_interrupt inti;
2051 struct kvm_s390_irq irq;
2054 irq.u.ext.ext_params2 = token;
2055 irq.type = KVM_S390_INT_PFAULT_INIT;
2056 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
2058 inti.type = KVM_S390_INT_PFAULT_DONE;
2059 inti.parm64 = token;
2060 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
2064 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
2065 struct kvm_async_pf *work)
2067 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
2068 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
2071 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
2072 struct kvm_async_pf *work)
2074 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
2075 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
2078 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
2079 struct kvm_async_pf *work)
2081 /* s390 will always inject the page directly */
2084 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
2087 * s390 will always inject the page directly,
2088 * but we still want check_async_completion to cleanup
2093 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
2096 struct kvm_arch_async_pf arch;
2099 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2101 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
2102 vcpu->arch.pfault_compare)
2104 if (psw_extint_disabled(vcpu))
2106 if (kvm_s390_vcpu_has_irq(vcpu, 0))
2108 if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
2110 if (!vcpu->arch.gmap->pfault_enabled)
2113 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
2114 hva += current->thread.gmap_addr & ~PAGE_MASK;
2115 if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
2118 rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
2122 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
2127 * On s390 notifications for arriving pages will be delivered directly
2128 * to the guest but the house keeping for completed pfaults is
2129 * handled outside the worker.
2131 kvm_check_async_pf_completion(vcpu);
2133 vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
2134 vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
2139 if (test_cpu_flag(CIF_MCCK_PENDING))
2142 if (!kvm_is_ucontrol(vcpu->kvm)) {
2143 rc = kvm_s390_deliver_pending_interrupts(vcpu);
2148 rc = kvm_s390_handle_requests(vcpu);
2152 if (guestdbg_enabled(vcpu)) {
2153 kvm_s390_backup_guest_per_regs(vcpu);
2154 kvm_s390_patch_guest_per_regs(vcpu);
2157 vcpu->arch.sie_block->icptcode = 0;
2158 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
2159 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
2160 trace_kvm_s390_sie_enter(vcpu, cpuflags);
2165 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
2167 struct kvm_s390_pgm_info pgm_info = {
2168 .code = PGM_ADDRESSING,
2173 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
2174 trace_kvm_s390_sie_fault(vcpu);
2177 * We want to inject an addressing exception, which is defined as a
2178 * suppressing or terminating exception. However, since we came here
2179 * by a DAT access exception, the PSW still points to the faulting
2180 * instruction since DAT exceptions are nullifying. So we've got
2181 * to look up the current opcode to get the length of the instruction
2182 * to be able to forward the PSW.
2184 rc = read_guest_instr(vcpu, &opcode, 1);
2185 ilen = insn_length(opcode);
2189 /* Instruction-Fetching Exceptions - we can't detect the ilen.
2190 * Forward by arbitrary ilc, injection will take care of
2191 * nullification if necessary.
2193 pgm_info = vcpu->arch.pgm;
2196 pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
2197 kvm_s390_forward_psw(vcpu, ilen);
2198 return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
2201 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
2203 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
2204 vcpu->arch.sie_block->icptcode);
2205 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
2207 if (guestdbg_enabled(vcpu))
2208 kvm_s390_restore_guest_per_regs(vcpu);
2210 vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
2211 vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
2213 if (vcpu->arch.sie_block->icptcode > 0) {
2214 int rc = kvm_handle_sie_intercept(vcpu);
2216 if (rc != -EOPNOTSUPP)
2218 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
2219 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
2220 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
2221 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
2223 } else if (exit_reason != -EFAULT) {
2224 vcpu->stat.exit_null++;
2226 } else if (kvm_is_ucontrol(vcpu->kvm)) {
2227 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
2228 vcpu->run->s390_ucontrol.trans_exc_code =
2229 current->thread.gmap_addr;
2230 vcpu->run->s390_ucontrol.pgm_code = 0x10;
2232 } else if (current->thread.gmap_pfault) {
2233 trace_kvm_s390_major_guest_pfault(vcpu);
2234 current->thread.gmap_pfault = 0;
2235 if (kvm_arch_setup_async_pf(vcpu))
2237 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
2239 return vcpu_post_run_fault_in_sie(vcpu);
2242 static int __vcpu_run(struct kvm_vcpu *vcpu)
2244 int rc, exit_reason;
2247 * We try to hold kvm->srcu during most of vcpu_run (except when run-
2248 * ning the guest), so that memslots (and other stuff) are protected
2250 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2253 rc = vcpu_pre_run(vcpu);
2257 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2259 * As PF_VCPU will be used in fault handler, between
2260 * guest_enter and guest_exit should be no uaccess.
2262 local_irq_disable();
2263 __kvm_guest_enter();
2265 exit_reason = sie64a(vcpu->arch.sie_block,
2266 vcpu->run->s.regs.gprs);
2267 local_irq_disable();
2270 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2272 rc = vcpu_post_run(vcpu, exit_reason);
2273 } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
2275 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2279 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2281 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
2282 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
2283 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
2284 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
2285 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
2286 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
2287 /* some control register changes require a tlb flush */
2288 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2290 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
2291 vcpu->arch.sie_block->cputm = kvm_run->s.regs.cputm;
2292 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
2293 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
2294 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
2295 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
2297 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
2298 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
2299 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
2300 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
2301 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2302 kvm_clear_async_pf_completion_queue(vcpu);
2304 kvm_run->kvm_dirty_regs = 0;
2307 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2309 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
2310 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
2311 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
2312 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
2313 kvm_run->s.regs.cputm = vcpu->arch.sie_block->cputm;
2314 kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
2315 kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
2316 kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
2317 kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
2318 kvm_run->s.regs.pft = vcpu->arch.pfault_token;
2319 kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
2320 kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
2323 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2328 if (guestdbg_exit_pending(vcpu)) {
2329 kvm_s390_prepare_debug_exit(vcpu);
2333 if (vcpu->sigset_active)
2334 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
2336 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
2337 kvm_s390_vcpu_start(vcpu);
2338 } else if (is_vcpu_stopped(vcpu)) {
2339 pr_err_ratelimited("can't run stopped vcpu %d\n",
2344 sync_regs(vcpu, kvm_run);
2347 rc = __vcpu_run(vcpu);
2349 if (signal_pending(current) && !rc) {
2350 kvm_run->exit_reason = KVM_EXIT_INTR;
2354 if (guestdbg_exit_pending(vcpu) && !rc) {
2355 kvm_s390_prepare_debug_exit(vcpu);
2359 if (rc == -EREMOTE) {
2360 /* userspace support is needed, kvm_run has been prepared */
2364 store_regs(vcpu, kvm_run);
2366 if (vcpu->sigset_active)
2367 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
2369 vcpu->stat.exit_userspace++;
2374 * store status at address
2375 * we use have two special cases:
2376 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
2377 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
2379 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
2381 unsigned char archmode = 1;
2382 freg_t fprs[NUM_FPRS];
2387 px = kvm_s390_get_prefix(vcpu);
2388 if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
2389 if (write_guest_abs(vcpu, 163, &archmode, 1))
2392 } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
2393 if (write_guest_real(vcpu, 163, &archmode, 1))
2397 gpa -= __LC_FPREGS_SAVE_AREA;
2399 /* manually convert vector registers if necessary */
2400 if (MACHINE_HAS_VX) {
2401 convert_vx_to_fp(fprs, current->thread.fpu.vxrs);
2402 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2405 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2406 vcpu->run->s.regs.fprs, 128);
2408 rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
2409 vcpu->run->s.regs.gprs, 128);
2410 rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
2411 &vcpu->arch.sie_block->gpsw, 16);
2412 rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
2414 rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
2415 &vcpu->run->s.regs.fpc, 4);
2416 rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
2417 &vcpu->arch.sie_block->todpr, 4);
2418 rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
2419 &vcpu->arch.sie_block->cputm, 8);
2420 clkcomp = vcpu->arch.sie_block->ckc >> 8;
2421 rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
2423 rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
2424 &vcpu->run->s.regs.acrs, 64);
2425 rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
2426 &vcpu->arch.sie_block->gcr, 128);
2427 return rc ? -EFAULT : 0;
2430 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
2433 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
2434 * copying in vcpu load/put. Lets update our copies before we save
2435 * it into the save area
2438 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
2439 save_access_regs(vcpu->run->s.regs.acrs);
2441 return kvm_s390_store_status_unloaded(vcpu, addr);
2445 * store additional status at address
2447 int kvm_s390_store_adtl_status_unloaded(struct kvm_vcpu *vcpu,
2450 /* Only bits 0-53 are used for address formation */
2451 if (!(gpa & ~0x3ff))
2454 return write_guest_abs(vcpu, gpa & ~0x3ff,
2455 (void *)&vcpu->run->s.regs.vrs, 512);
2458 int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu *vcpu, unsigned long addr)
2460 if (!test_kvm_facility(vcpu->kvm, 129))
2464 * The guest VXRS are in the host VXRs due to the lazy
2465 * copying in vcpu load/put. We can simply call save_fpu_regs()
2466 * to save the current register state because we are in the
2467 * middle of a load/put cycle.
2469 * Let's update our copies before we save it into the save area.
2473 return kvm_s390_store_adtl_status_unloaded(vcpu, addr);
2476 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2478 kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
2479 kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
2482 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
2485 struct kvm_vcpu *vcpu;
2487 kvm_for_each_vcpu(i, vcpu, kvm) {
2488 __disable_ibs_on_vcpu(vcpu);
2492 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2494 kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
2495 kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
2498 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
2500 int i, online_vcpus, started_vcpus = 0;
2502 if (!is_vcpu_stopped(vcpu))
2505 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
2506 /* Only one cpu at a time may enter/leave the STOPPED state. */
2507 spin_lock(&vcpu->kvm->arch.start_stop_lock);
2508 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2510 for (i = 0; i < online_vcpus; i++) {
2511 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
2515 if (started_vcpus == 0) {
2516 /* we're the only active VCPU -> speed it up */
2517 __enable_ibs_on_vcpu(vcpu);
2518 } else if (started_vcpus == 1) {
2520 * As we are starting a second VCPU, we have to disable
2521 * the IBS facility on all VCPUs to remove potentially
2522 * oustanding ENABLE requests.
2524 __disable_ibs_on_all_vcpus(vcpu->kvm);
2527 atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2529 * Another VCPU might have used IBS while we were offline.
2530 * Let's play safe and flush the VCPU at startup.
2532 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2533 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2537 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
2539 int i, online_vcpus, started_vcpus = 0;
2540 struct kvm_vcpu *started_vcpu = NULL;
2542 if (is_vcpu_stopped(vcpu))
2545 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
2546 /* Only one cpu at a time may enter/leave the STOPPED state. */
2547 spin_lock(&vcpu->kvm->arch.start_stop_lock);
2548 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2550 /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
2551 kvm_s390_clear_stop_irq(vcpu);
2553 atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2554 __disable_ibs_on_vcpu(vcpu);
2556 for (i = 0; i < online_vcpus; i++) {
2557 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
2559 started_vcpu = vcpu->kvm->vcpus[i];
2563 if (started_vcpus == 1) {
2565 * As we only have one VCPU left, we want to enable the
2566 * IBS facility for that VCPU to speed it up.
2568 __enable_ibs_on_vcpu(started_vcpu);
2571 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2575 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
2576 struct kvm_enable_cap *cap)
2584 case KVM_CAP_S390_CSS_SUPPORT:
2585 if (!vcpu->kvm->arch.css_support) {
2586 vcpu->kvm->arch.css_support = 1;
2587 VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
2588 trace_kvm_s390_enable_css(vcpu->kvm);
2599 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
2600 struct kvm_s390_mem_op *mop)
2602 void __user *uaddr = (void __user *)mop->buf;
2603 void *tmpbuf = NULL;
2605 const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
2606 | KVM_S390_MEMOP_F_CHECK_ONLY;
2608 if (mop->flags & ~supported_flags)
2611 if (mop->size > MEM_OP_MAX_SIZE)
2614 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
2615 tmpbuf = vmalloc(mop->size);
2620 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2623 case KVM_S390_MEMOP_LOGICAL_READ:
2624 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2625 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
2626 mop->size, GACC_FETCH);
2629 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2631 if (copy_to_user(uaddr, tmpbuf, mop->size))
2635 case KVM_S390_MEMOP_LOGICAL_WRITE:
2636 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2637 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
2638 mop->size, GACC_STORE);
2641 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
2645 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2651 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
2653 if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
2654 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
2660 long kvm_arch_vcpu_ioctl(struct file *filp,
2661 unsigned int ioctl, unsigned long arg)
2663 struct kvm_vcpu *vcpu = filp->private_data;
2664 void __user *argp = (void __user *)arg;
2669 case KVM_S390_IRQ: {
2670 struct kvm_s390_irq s390irq;
2673 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
2675 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
2678 case KVM_S390_INTERRUPT: {
2679 struct kvm_s390_interrupt s390int;
2680 struct kvm_s390_irq s390irq;
2683 if (copy_from_user(&s390int, argp, sizeof(s390int)))
2685 if (s390int_to_s390irq(&s390int, &s390irq))
2687 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
2690 case KVM_S390_STORE_STATUS:
2691 idx = srcu_read_lock(&vcpu->kvm->srcu);
2692 r = kvm_s390_vcpu_store_status(vcpu, arg);
2693 srcu_read_unlock(&vcpu->kvm->srcu, idx);
2695 case KVM_S390_SET_INITIAL_PSW: {
2699 if (copy_from_user(&psw, argp, sizeof(psw)))
2701 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
2704 case KVM_S390_INITIAL_RESET:
2705 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
2707 case KVM_SET_ONE_REG:
2708 case KVM_GET_ONE_REG: {
2709 struct kvm_one_reg reg;
2711 if (copy_from_user(®, argp, sizeof(reg)))
2713 if (ioctl == KVM_SET_ONE_REG)
2714 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, ®);
2716 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, ®);
2719 #ifdef CONFIG_KVM_S390_UCONTROL
2720 case KVM_S390_UCAS_MAP: {
2721 struct kvm_s390_ucas_mapping ucasmap;
2723 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
2728 if (!kvm_is_ucontrol(vcpu->kvm)) {
2733 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
2734 ucasmap.vcpu_addr, ucasmap.length);
2737 case KVM_S390_UCAS_UNMAP: {
2738 struct kvm_s390_ucas_mapping ucasmap;
2740 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
2745 if (!kvm_is_ucontrol(vcpu->kvm)) {
2750 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
2755 case KVM_S390_VCPU_FAULT: {
2756 r = gmap_fault(vcpu->arch.gmap, arg, 0);
2759 case KVM_ENABLE_CAP:
2761 struct kvm_enable_cap cap;
2763 if (copy_from_user(&cap, argp, sizeof(cap)))
2765 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
2768 case KVM_S390_MEM_OP: {
2769 struct kvm_s390_mem_op mem_op;
2771 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
2772 r = kvm_s390_guest_mem_op(vcpu, &mem_op);
2777 case KVM_S390_SET_IRQ_STATE: {
2778 struct kvm_s390_irq_state irq_state;
2781 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
2783 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
2784 irq_state.len == 0 ||
2785 irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
2789 r = kvm_s390_set_irq_state(vcpu,
2790 (void __user *) irq_state.buf,
2794 case KVM_S390_GET_IRQ_STATE: {
2795 struct kvm_s390_irq_state irq_state;
2798 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
2800 if (irq_state.len == 0) {
2804 r = kvm_s390_get_irq_state(vcpu,
2805 (__u8 __user *) irq_state.buf,
2815 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
2817 #ifdef CONFIG_KVM_S390_UCONTROL
2818 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
2819 && (kvm_is_ucontrol(vcpu->kvm))) {
2820 vmf->page = virt_to_page(vcpu->arch.sie_block);
2821 get_page(vmf->page);
2825 return VM_FAULT_SIGBUS;
2828 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
2829 unsigned long npages)
2834 /* Section: memory related */
2835 int kvm_arch_prepare_memory_region(struct kvm *kvm,
2836 struct kvm_memory_slot *memslot,
2837 const struct kvm_userspace_memory_region *mem,
2838 enum kvm_mr_change change)
2840 /* A few sanity checks. We can have memory slots which have to be
2841 located/ended at a segment boundary (1MB). The memory in userland is
2842 ok to be fragmented into various different vmas. It is okay to mmap()
2843 and munmap() stuff in this slot after doing this call at any time */
2845 if (mem->userspace_addr & 0xffffful)
2848 if (mem->memory_size & 0xffffful)
2851 if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
2857 void kvm_arch_commit_memory_region(struct kvm *kvm,
2858 const struct kvm_userspace_memory_region *mem,
2859 const struct kvm_memory_slot *old,
2860 const struct kvm_memory_slot *new,
2861 enum kvm_mr_change change)
2865 /* If the basics of the memslot do not change, we do not want
2866 * to update the gmap. Every update causes several unnecessary
2867 * segment translation exceptions. This is usually handled just
2868 * fine by the normal fault handler + gmap, but it will also
2869 * cause faults on the prefix page of running guest CPUs.
2871 if (old->userspace_addr == mem->userspace_addr &&
2872 old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
2873 old->npages * PAGE_SIZE == mem->memory_size)
2876 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
2877 mem->guest_phys_addr, mem->memory_size);
2879 pr_warn("failed to commit memory region\n");
2883 static int __init kvm_s390_init(void)
2885 if (!sclp.has_sief2) {
2886 pr_info("SIE not available\n");
2890 return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
2893 static void __exit kvm_s390_exit(void)
2898 module_init(kvm_s390_init);
2899 module_exit(kvm_s390_exit);
2902 * Enable autoloading of the kvm module.
2903 * Note that we add the module alias here instead of virt/kvm/kvm_main.c
2904 * since x86 takes a different approach.
2906 #include <linux/miscdevice.h>
2907 MODULE_ALIAS_MISCDEV(KVM_MINOR);
2908 MODULE_ALIAS("devname:kvm");