2 * hosting zSeries kernel virtual machines
4 * Copyright IBM Corp. 2008, 2009
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
10 * Author(s): Carsten Otte <cotte@de.ibm.com>
11 * Christian Borntraeger <borntraeger@de.ibm.com>
12 * Heiko Carstens <heiko.carstens@de.ibm.com>
13 * Christian Ehrhardt <ehrhardt@de.ibm.com>
14 * Jason J. Herne <jjherne@us.ibm.com>
17 #include <linux/compiler.h>
18 #include <linux/err.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/mman.h>
25 #include <linux/module.h>
26 #include <linux/moduleparam.h>
27 #include <linux/random.h>
28 #include <linux/slab.h>
29 #include <linux/timer.h>
30 #include <linux/vmalloc.h>
31 #include <linux/bitmap.h>
32 #include <linux/sched/signal.h>
34 #include <asm/asm-offsets.h>
35 #include <asm/lowcore.h>
37 #include <asm/pgtable.h>
40 #include <asm/switch_to.h>
43 #include <asm/cpacf.h>
44 #include <asm/timex.h>
48 #define KMSG_COMPONENT "kvm-s390"
50 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
52 #define CREATE_TRACE_POINTS
54 #include "trace-s390.h"
56 #define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */
58 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
59 (KVM_MAX_VCPUS + LOCAL_IRQS))
61 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
63 struct kvm_stats_debugfs_item debugfs_entries[] = {
64 { "userspace_handled", VCPU_STAT(exit_userspace) },
65 { "exit_null", VCPU_STAT(exit_null) },
66 { "exit_validity", VCPU_STAT(exit_validity) },
67 { "exit_stop_request", VCPU_STAT(exit_stop_request) },
68 { "exit_external_request", VCPU_STAT(exit_external_request) },
69 { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
70 { "exit_instruction", VCPU_STAT(exit_instruction) },
71 { "exit_pei", VCPU_STAT(exit_pei) },
72 { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
73 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
74 { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
75 { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
76 { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
77 { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
78 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
79 { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
80 { "instruction_lctl", VCPU_STAT(instruction_lctl) },
81 { "instruction_stctl", VCPU_STAT(instruction_stctl) },
82 { "instruction_stctg", VCPU_STAT(instruction_stctg) },
83 { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
84 { "deliver_external_call", VCPU_STAT(deliver_external_call) },
85 { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
86 { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
87 { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
88 { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
89 { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
90 { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
91 { "exit_wait_state", VCPU_STAT(exit_wait_state) },
92 { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
93 { "instruction_stidp", VCPU_STAT(instruction_stidp) },
94 { "instruction_spx", VCPU_STAT(instruction_spx) },
95 { "instruction_stpx", VCPU_STAT(instruction_stpx) },
96 { "instruction_stap", VCPU_STAT(instruction_stap) },
97 { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
98 { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
99 { "instruction_stsch", VCPU_STAT(instruction_stsch) },
100 { "instruction_chsc", VCPU_STAT(instruction_chsc) },
101 { "instruction_essa", VCPU_STAT(instruction_essa) },
102 { "instruction_stsi", VCPU_STAT(instruction_stsi) },
103 { "instruction_stfl", VCPU_STAT(instruction_stfl) },
104 { "instruction_tprot", VCPU_STAT(instruction_tprot) },
105 { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
106 { "instruction_sie", VCPU_STAT(instruction_sie) },
107 { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
108 { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
109 { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
110 { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
111 { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
112 { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
113 { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
114 { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
115 { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
116 { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
117 { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
118 { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
119 { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
120 { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
121 { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
122 { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
123 { "diagnose_10", VCPU_STAT(diagnose_10) },
124 { "diagnose_44", VCPU_STAT(diagnose_44) },
125 { "diagnose_9c", VCPU_STAT(diagnose_9c) },
126 { "diagnose_258", VCPU_STAT(diagnose_258) },
127 { "diagnose_308", VCPU_STAT(diagnose_308) },
128 { "diagnose_500", VCPU_STAT(diagnose_500) },
132 /* allow nested virtualization in KVM (if enabled by user space) */
134 module_param(nested, int, S_IRUGO);
135 MODULE_PARM_DESC(nested, "Nested virtualization support");
137 /* upper facilities limit for kvm */
138 unsigned long kvm_s390_fac_list_mask[16] = { FACILITIES_KVM };
140 unsigned long kvm_s390_fac_list_mask_size(void)
142 BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
143 return ARRAY_SIZE(kvm_s390_fac_list_mask);
146 /* available cpu features supported by kvm */
147 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
148 /* available subfunctions indicated via query / "test bit" */
149 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
151 static struct gmap_notifier gmap_notifier;
152 static struct gmap_notifier vsie_gmap_notifier;
153 debug_info_t *kvm_s390_dbf;
155 /* Section: not file related */
156 int kvm_arch_hardware_enable(void)
158 /* every s390 is virtualization enabled ;-) */
162 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
166 * This callback is executed during stop_machine(). All CPUs are therefore
167 * temporarily stopped. In order not to change guest behavior, we have to
168 * disable preemption whenever we touch the epoch of kvm and the VCPUs,
169 * so a CPU won't be stopped while calculating with the epoch.
171 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
175 struct kvm_vcpu *vcpu;
177 unsigned long long *delta = v;
179 list_for_each_entry(kvm, &vm_list, vm_list) {
180 kvm->arch.epoch -= *delta;
181 kvm_for_each_vcpu(i, vcpu, kvm) {
182 vcpu->arch.sie_block->epoch -= *delta;
183 if (vcpu->arch.cputm_enabled)
184 vcpu->arch.cputm_start += *delta;
185 if (vcpu->arch.vsie_block)
186 vcpu->arch.vsie_block->epoch -= *delta;
192 static struct notifier_block kvm_clock_notifier = {
193 .notifier_call = kvm_clock_sync,
196 int kvm_arch_hardware_setup(void)
198 gmap_notifier.notifier_call = kvm_gmap_notifier;
199 gmap_register_pte_notifier(&gmap_notifier);
200 vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
201 gmap_register_pte_notifier(&vsie_gmap_notifier);
202 atomic_notifier_chain_register(&s390_epoch_delta_notifier,
203 &kvm_clock_notifier);
207 void kvm_arch_hardware_unsetup(void)
209 gmap_unregister_pte_notifier(&gmap_notifier);
210 gmap_unregister_pte_notifier(&vsie_gmap_notifier);
211 atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
212 &kvm_clock_notifier);
215 static void allow_cpu_feat(unsigned long nr)
217 set_bit_inv(nr, kvm_s390_available_cpu_feat);
220 static inline int plo_test_bit(unsigned char nr)
222 register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
226 /* Parameter registers are ignored for "test bit" */
236 static void kvm_s390_cpu_feat_init(void)
240 for (i = 0; i < 256; ++i) {
242 kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
245 if (test_facility(28)) /* TOD-clock steering */
246 ptff(kvm_s390_available_subfunc.ptff,
247 sizeof(kvm_s390_available_subfunc.ptff),
250 if (test_facility(17)) { /* MSA */
251 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
252 kvm_s390_available_subfunc.kmac);
253 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
254 kvm_s390_available_subfunc.kmc);
255 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
256 kvm_s390_available_subfunc.km);
257 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
258 kvm_s390_available_subfunc.kimd);
259 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
260 kvm_s390_available_subfunc.klmd);
262 if (test_facility(76)) /* MSA3 */
263 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
264 kvm_s390_available_subfunc.pckmo);
265 if (test_facility(77)) { /* MSA4 */
266 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
267 kvm_s390_available_subfunc.kmctr);
268 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
269 kvm_s390_available_subfunc.kmf);
270 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
271 kvm_s390_available_subfunc.kmo);
272 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
273 kvm_s390_available_subfunc.pcc);
275 if (test_facility(57)) /* MSA5 */
276 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
277 kvm_s390_available_subfunc.ppno);
279 if (MACHINE_HAS_ESOP)
280 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
282 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
283 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
285 if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
286 !test_facility(3) || !nested)
288 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
289 if (sclp.has_64bscao)
290 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
292 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
294 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
296 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
298 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
300 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
302 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
304 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
305 * all skey handling functions read/set the skey from the PGSTE
306 * instead of the real storage key.
308 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
309 * pages being detected as preserved although they are resident.
311 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
312 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
314 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
315 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
316 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
318 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
319 * cannot easily shadow the SCA because of the ipte lock.
323 int kvm_arch_init(void *opaque)
325 kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
329 if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
330 debug_unregister(kvm_s390_dbf);
334 kvm_s390_cpu_feat_init();
336 /* Register floating interrupt controller interface. */
337 return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
340 void kvm_arch_exit(void)
342 debug_unregister(kvm_s390_dbf);
345 /* Section: device related */
346 long kvm_arch_dev_ioctl(struct file *filp,
347 unsigned int ioctl, unsigned long arg)
349 if (ioctl == KVM_S390_ENABLE_SIE)
350 return s390_enable_sie();
354 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
359 case KVM_CAP_S390_PSW:
360 case KVM_CAP_S390_GMAP:
361 case KVM_CAP_SYNC_MMU:
362 #ifdef CONFIG_KVM_S390_UCONTROL
363 case KVM_CAP_S390_UCONTROL:
365 case KVM_CAP_ASYNC_PF:
366 case KVM_CAP_SYNC_REGS:
367 case KVM_CAP_ONE_REG:
368 case KVM_CAP_ENABLE_CAP:
369 case KVM_CAP_S390_CSS_SUPPORT:
370 case KVM_CAP_IOEVENTFD:
371 case KVM_CAP_DEVICE_CTRL:
372 case KVM_CAP_ENABLE_CAP_VM:
373 case KVM_CAP_S390_IRQCHIP:
374 case KVM_CAP_VM_ATTRIBUTES:
375 case KVM_CAP_MP_STATE:
376 case KVM_CAP_IMMEDIATE_EXIT:
377 case KVM_CAP_S390_INJECT_IRQ:
378 case KVM_CAP_S390_USER_SIGP:
379 case KVM_CAP_S390_USER_STSI:
380 case KVM_CAP_S390_SKEYS:
381 case KVM_CAP_S390_IRQ_STATE:
382 case KVM_CAP_S390_USER_INSTR0:
385 case KVM_CAP_S390_MEM_OP:
388 case KVM_CAP_NR_VCPUS:
389 case KVM_CAP_MAX_VCPUS:
390 r = KVM_S390_BSCA_CPU_SLOTS;
391 if (!kvm_s390_use_sca_entries())
393 else if (sclp.has_esca && sclp.has_64bscao)
394 r = KVM_S390_ESCA_CPU_SLOTS;
396 case KVM_CAP_NR_MEMSLOTS:
397 r = KVM_USER_MEM_SLOTS;
399 case KVM_CAP_S390_COW:
400 r = MACHINE_HAS_ESOP;
402 case KVM_CAP_S390_VECTOR_REGISTERS:
405 case KVM_CAP_S390_RI:
406 r = test_facility(64);
414 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
415 struct kvm_memory_slot *memslot)
417 gfn_t cur_gfn, last_gfn;
418 unsigned long address;
419 struct gmap *gmap = kvm->arch.gmap;
421 /* Loop over all guest pages */
422 last_gfn = memslot->base_gfn + memslot->npages;
423 for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
424 address = gfn_to_hva_memslot(memslot, cur_gfn);
426 if (test_and_clear_guest_dirty(gmap->mm, address))
427 mark_page_dirty(kvm, cur_gfn);
428 if (fatal_signal_pending(current))
434 /* Section: vm related */
435 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
438 * Get (and clear) the dirty memory log for a memory slot.
440 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
441 struct kvm_dirty_log *log)
445 struct kvm_memslots *slots;
446 struct kvm_memory_slot *memslot;
449 if (kvm_is_ucontrol(kvm))
452 mutex_lock(&kvm->slots_lock);
455 if (log->slot >= KVM_USER_MEM_SLOTS)
458 slots = kvm_memslots(kvm);
459 memslot = id_to_memslot(slots, log->slot);
461 if (!memslot->dirty_bitmap)
464 kvm_s390_sync_dirty_log(kvm, memslot);
465 r = kvm_get_dirty_log(kvm, log, &is_dirty);
469 /* Clear the dirty log */
471 n = kvm_dirty_bitmap_bytes(memslot);
472 memset(memslot->dirty_bitmap, 0, n);
476 mutex_unlock(&kvm->slots_lock);
480 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
483 struct kvm_vcpu *vcpu;
485 kvm_for_each_vcpu(i, vcpu, kvm) {
486 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
490 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
498 case KVM_CAP_S390_IRQCHIP:
499 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
500 kvm->arch.use_irqchip = 1;
503 case KVM_CAP_S390_USER_SIGP:
504 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
505 kvm->arch.user_sigp = 1;
508 case KVM_CAP_S390_VECTOR_REGISTERS:
509 mutex_lock(&kvm->lock);
510 if (kvm->created_vcpus) {
512 } else if (MACHINE_HAS_VX) {
513 set_kvm_facility(kvm->arch.model.fac_mask, 129);
514 set_kvm_facility(kvm->arch.model.fac_list, 129);
515 if (test_facility(134)) {
516 set_kvm_facility(kvm->arch.model.fac_mask, 134);
517 set_kvm_facility(kvm->arch.model.fac_list, 134);
519 if (test_facility(135)) {
520 set_kvm_facility(kvm->arch.model.fac_mask, 135);
521 set_kvm_facility(kvm->arch.model.fac_list, 135);
526 mutex_unlock(&kvm->lock);
527 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
528 r ? "(not available)" : "(success)");
530 case KVM_CAP_S390_RI:
532 mutex_lock(&kvm->lock);
533 if (kvm->created_vcpus) {
535 } else if (test_facility(64)) {
536 set_kvm_facility(kvm->arch.model.fac_mask, 64);
537 set_kvm_facility(kvm->arch.model.fac_list, 64);
540 mutex_unlock(&kvm->lock);
541 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
542 r ? "(not available)" : "(success)");
544 case KVM_CAP_S390_USER_STSI:
545 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
546 kvm->arch.user_stsi = 1;
549 case KVM_CAP_S390_USER_INSTR0:
550 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
551 kvm->arch.user_instr0 = 1;
552 icpt_operexc_on_all_vcpus(kvm);
562 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
566 switch (attr->attr) {
567 case KVM_S390_VM_MEM_LIMIT_SIZE:
569 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
570 kvm->arch.mem_limit);
571 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
581 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
585 switch (attr->attr) {
586 case KVM_S390_VM_MEM_ENABLE_CMMA:
592 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
593 mutex_lock(&kvm->lock);
594 if (!kvm->created_vcpus) {
595 kvm->arch.use_cmma = 1;
598 mutex_unlock(&kvm->lock);
600 case KVM_S390_VM_MEM_CLR_CMMA:
605 if (!kvm->arch.use_cmma)
608 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
609 mutex_lock(&kvm->lock);
610 idx = srcu_read_lock(&kvm->srcu);
611 s390_reset_cmma(kvm->arch.gmap->mm);
612 srcu_read_unlock(&kvm->srcu, idx);
613 mutex_unlock(&kvm->lock);
616 case KVM_S390_VM_MEM_LIMIT_SIZE: {
617 unsigned long new_limit;
619 if (kvm_is_ucontrol(kvm))
622 if (get_user(new_limit, (u64 __user *)attr->addr))
625 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
626 new_limit > kvm->arch.mem_limit)
632 /* gmap_create takes last usable address */
633 if (new_limit != KVM_S390_NO_MEM_LIMIT)
637 mutex_lock(&kvm->lock);
638 if (!kvm->created_vcpus) {
639 /* gmap_create will round the limit up */
640 struct gmap *new = gmap_create(current->mm, new_limit);
645 gmap_remove(kvm->arch.gmap);
647 kvm->arch.gmap = new;
651 mutex_unlock(&kvm->lock);
652 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
653 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
654 (void *) kvm->arch.gmap->asce);
664 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
666 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
668 struct kvm_vcpu *vcpu;
671 if (!test_kvm_facility(kvm, 76))
674 mutex_lock(&kvm->lock);
675 switch (attr->attr) {
676 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
678 kvm->arch.crypto.crycb->aes_wrapping_key_mask,
679 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
680 kvm->arch.crypto.aes_kw = 1;
681 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
683 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
685 kvm->arch.crypto.crycb->dea_wrapping_key_mask,
686 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
687 kvm->arch.crypto.dea_kw = 1;
688 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
690 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
691 kvm->arch.crypto.aes_kw = 0;
692 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
693 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
694 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
696 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
697 kvm->arch.crypto.dea_kw = 0;
698 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
699 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
700 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
703 mutex_unlock(&kvm->lock);
707 kvm_for_each_vcpu(i, vcpu, kvm) {
708 kvm_s390_vcpu_crypto_setup(vcpu);
711 mutex_unlock(&kvm->lock);
715 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
719 if (copy_from_user(>od_high, (void __user *)attr->addr,
725 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
730 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
734 if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod)))
737 kvm_s390_set_tod_clock(kvm, gtod);
738 VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
742 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
749 switch (attr->attr) {
750 case KVM_S390_VM_TOD_HIGH:
751 ret = kvm_s390_set_tod_high(kvm, attr);
753 case KVM_S390_VM_TOD_LOW:
754 ret = kvm_s390_set_tod_low(kvm, attr);
763 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
767 if (copy_to_user((void __user *)attr->addr, >od_high,
770 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
775 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
779 gtod = kvm_s390_get_tod_clock_fast(kvm);
780 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod)))
782 VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
787 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
794 switch (attr->attr) {
795 case KVM_S390_VM_TOD_HIGH:
796 ret = kvm_s390_get_tod_high(kvm, attr);
798 case KVM_S390_VM_TOD_LOW:
799 ret = kvm_s390_get_tod_low(kvm, attr);
808 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
810 struct kvm_s390_vm_cpu_processor *proc;
811 u16 lowest_ibc, unblocked_ibc;
814 mutex_lock(&kvm->lock);
815 if (kvm->created_vcpus) {
819 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
824 if (!copy_from_user(proc, (void __user *)attr->addr,
826 kvm->arch.model.cpuid = proc->cpuid;
827 lowest_ibc = sclp.ibc >> 16 & 0xfff;
828 unblocked_ibc = sclp.ibc & 0xfff;
829 if (lowest_ibc && proc->ibc) {
830 if (proc->ibc > unblocked_ibc)
831 kvm->arch.model.ibc = unblocked_ibc;
832 else if (proc->ibc < lowest_ibc)
833 kvm->arch.model.ibc = lowest_ibc;
835 kvm->arch.model.ibc = proc->ibc;
837 memcpy(kvm->arch.model.fac_list, proc->fac_list,
838 S390_ARCH_FAC_LIST_SIZE_BYTE);
839 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
841 kvm->arch.model.cpuid);
842 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
843 kvm->arch.model.fac_list[0],
844 kvm->arch.model.fac_list[1],
845 kvm->arch.model.fac_list[2]);
850 mutex_unlock(&kvm->lock);
854 static int kvm_s390_set_processor_feat(struct kvm *kvm,
855 struct kvm_device_attr *attr)
857 struct kvm_s390_vm_cpu_feat data;
860 if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
862 if (!bitmap_subset((unsigned long *) data.feat,
863 kvm_s390_available_cpu_feat,
864 KVM_S390_VM_CPU_FEAT_NR_BITS))
867 mutex_lock(&kvm->lock);
868 if (!atomic_read(&kvm->online_vcpus)) {
869 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
870 KVM_S390_VM_CPU_FEAT_NR_BITS);
873 mutex_unlock(&kvm->lock);
877 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
878 struct kvm_device_attr *attr)
881 * Once supported by kernel + hw, we have to store the subfunctions
882 * in kvm->arch and remember that user space configured them.
887 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
891 switch (attr->attr) {
892 case KVM_S390_VM_CPU_PROCESSOR:
893 ret = kvm_s390_set_processor(kvm, attr);
895 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
896 ret = kvm_s390_set_processor_feat(kvm, attr);
898 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
899 ret = kvm_s390_set_processor_subfunc(kvm, attr);
905 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
907 struct kvm_s390_vm_cpu_processor *proc;
910 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
915 proc->cpuid = kvm->arch.model.cpuid;
916 proc->ibc = kvm->arch.model.ibc;
917 memcpy(&proc->fac_list, kvm->arch.model.fac_list,
918 S390_ARCH_FAC_LIST_SIZE_BYTE);
919 VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
921 kvm->arch.model.cpuid);
922 VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
923 kvm->arch.model.fac_list[0],
924 kvm->arch.model.fac_list[1],
925 kvm->arch.model.fac_list[2]);
926 if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
933 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
935 struct kvm_s390_vm_cpu_machine *mach;
938 mach = kzalloc(sizeof(*mach), GFP_KERNEL);
943 get_cpu_id((struct cpuid *) &mach->cpuid);
944 mach->ibc = sclp.ibc;
945 memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
946 S390_ARCH_FAC_LIST_SIZE_BYTE);
947 memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
948 sizeof(S390_lowcore.stfle_fac_list));
949 VM_EVENT(kvm, 3, "GET: host ibc: 0x%4.4x, host cpuid: 0x%16.16llx",
951 kvm->arch.model.cpuid);
952 VM_EVENT(kvm, 3, "GET: host facmask: 0x%16.16llx.%16.16llx.%16.16llx",
956 VM_EVENT(kvm, 3, "GET: host faclist: 0x%16.16llx.%16.16llx.%16.16llx",
960 if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
967 static int kvm_s390_get_processor_feat(struct kvm *kvm,
968 struct kvm_device_attr *attr)
970 struct kvm_s390_vm_cpu_feat data;
972 bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
973 KVM_S390_VM_CPU_FEAT_NR_BITS);
974 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
979 static int kvm_s390_get_machine_feat(struct kvm *kvm,
980 struct kvm_device_attr *attr)
982 struct kvm_s390_vm_cpu_feat data;
984 bitmap_copy((unsigned long *) data.feat,
985 kvm_s390_available_cpu_feat,
986 KVM_S390_VM_CPU_FEAT_NR_BITS);
987 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
992 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
993 struct kvm_device_attr *attr)
996 * Once we can actually configure subfunctions (kernel + hw support),
997 * we have to check if they were already set by user space, if so copy
998 * them from kvm->arch.
1003 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1004 struct kvm_device_attr *attr)
1006 if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1007 sizeof(struct kvm_s390_vm_cpu_subfunc)))
1011 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1015 switch (attr->attr) {
1016 case KVM_S390_VM_CPU_PROCESSOR:
1017 ret = kvm_s390_get_processor(kvm, attr);
1019 case KVM_S390_VM_CPU_MACHINE:
1020 ret = kvm_s390_get_machine(kvm, attr);
1022 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1023 ret = kvm_s390_get_processor_feat(kvm, attr);
1025 case KVM_S390_VM_CPU_MACHINE_FEAT:
1026 ret = kvm_s390_get_machine_feat(kvm, attr);
1028 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1029 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1031 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1032 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1038 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1042 switch (attr->group) {
1043 case KVM_S390_VM_MEM_CTRL:
1044 ret = kvm_s390_set_mem_control(kvm, attr);
1046 case KVM_S390_VM_TOD:
1047 ret = kvm_s390_set_tod(kvm, attr);
1049 case KVM_S390_VM_CPU_MODEL:
1050 ret = kvm_s390_set_cpu_model(kvm, attr);
1052 case KVM_S390_VM_CRYPTO:
1053 ret = kvm_s390_vm_set_crypto(kvm, attr);
1063 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1067 switch (attr->group) {
1068 case KVM_S390_VM_MEM_CTRL:
1069 ret = kvm_s390_get_mem_control(kvm, attr);
1071 case KVM_S390_VM_TOD:
1072 ret = kvm_s390_get_tod(kvm, attr);
1074 case KVM_S390_VM_CPU_MODEL:
1075 ret = kvm_s390_get_cpu_model(kvm, attr);
1085 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1089 switch (attr->group) {
1090 case KVM_S390_VM_MEM_CTRL:
1091 switch (attr->attr) {
1092 case KVM_S390_VM_MEM_ENABLE_CMMA:
1093 case KVM_S390_VM_MEM_CLR_CMMA:
1094 ret = sclp.has_cmma ? 0 : -ENXIO;
1096 case KVM_S390_VM_MEM_LIMIT_SIZE:
1104 case KVM_S390_VM_TOD:
1105 switch (attr->attr) {
1106 case KVM_S390_VM_TOD_LOW:
1107 case KVM_S390_VM_TOD_HIGH:
1115 case KVM_S390_VM_CPU_MODEL:
1116 switch (attr->attr) {
1117 case KVM_S390_VM_CPU_PROCESSOR:
1118 case KVM_S390_VM_CPU_MACHINE:
1119 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1120 case KVM_S390_VM_CPU_MACHINE_FEAT:
1121 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1124 /* configuring subfunctions is not supported yet */
1125 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1131 case KVM_S390_VM_CRYPTO:
1132 switch (attr->attr) {
1133 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1134 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1135 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1136 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1152 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1158 if (args->flags != 0)
1161 /* Is this guest using storage keys? */
1162 if (!mm_use_skey(current->mm))
1163 return KVM_S390_GET_SKEYS_NONE;
1165 /* Enforce sane limit on memory allocation */
1166 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1169 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1173 down_read(¤t->mm->mmap_sem);
1174 for (i = 0; i < args->count; i++) {
1175 hva = gfn_to_hva(kvm, args->start_gfn + i);
1176 if (kvm_is_error_hva(hva)) {
1181 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1185 up_read(¤t->mm->mmap_sem);
1188 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1189 sizeof(uint8_t) * args->count);
1198 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1204 if (args->flags != 0)
1207 /* Enforce sane limit on memory allocation */
1208 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1211 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1215 r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1216 sizeof(uint8_t) * args->count);
1222 /* Enable storage key handling for the guest */
1223 r = s390_enable_skey();
1227 down_read(¤t->mm->mmap_sem);
1228 for (i = 0; i < args->count; i++) {
1229 hva = gfn_to_hva(kvm, args->start_gfn + i);
1230 if (kvm_is_error_hva(hva)) {
1235 /* Lowest order bit is reserved */
1236 if (keys[i] & 0x01) {
1241 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1245 up_read(¤t->mm->mmap_sem);
1251 long kvm_arch_vm_ioctl(struct file *filp,
1252 unsigned int ioctl, unsigned long arg)
1254 struct kvm *kvm = filp->private_data;
1255 void __user *argp = (void __user *)arg;
1256 struct kvm_device_attr attr;
1260 case KVM_S390_INTERRUPT: {
1261 struct kvm_s390_interrupt s390int;
1264 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1266 r = kvm_s390_inject_vm(kvm, &s390int);
1269 case KVM_ENABLE_CAP: {
1270 struct kvm_enable_cap cap;
1272 if (copy_from_user(&cap, argp, sizeof(cap)))
1274 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1277 case KVM_CREATE_IRQCHIP: {
1278 struct kvm_irq_routing_entry routing;
1281 if (kvm->arch.use_irqchip) {
1282 /* Set up dummy routing. */
1283 memset(&routing, 0, sizeof(routing));
1284 r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1288 case KVM_SET_DEVICE_ATTR: {
1290 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1292 r = kvm_s390_vm_set_attr(kvm, &attr);
1295 case KVM_GET_DEVICE_ATTR: {
1297 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1299 r = kvm_s390_vm_get_attr(kvm, &attr);
1302 case KVM_HAS_DEVICE_ATTR: {
1304 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1306 r = kvm_s390_vm_has_attr(kvm, &attr);
1309 case KVM_S390_GET_SKEYS: {
1310 struct kvm_s390_skeys args;
1313 if (copy_from_user(&args, argp,
1314 sizeof(struct kvm_s390_skeys)))
1316 r = kvm_s390_get_skeys(kvm, &args);
1319 case KVM_S390_SET_SKEYS: {
1320 struct kvm_s390_skeys args;
1323 if (copy_from_user(&args, argp,
1324 sizeof(struct kvm_s390_skeys)))
1326 r = kvm_s390_set_skeys(kvm, &args);
1336 static int kvm_s390_query_ap_config(u8 *config)
1338 u32 fcn_code = 0x04000000UL;
1341 memset(config, 0, 128);
1345 ".long 0xb2af0000\n" /* PQAP(QCI) */
1351 : "r" (fcn_code), "r" (config)
1352 : "cc", "0", "2", "memory"
1358 static int kvm_s390_apxa_installed(void)
1363 if (test_facility(12)) {
1364 cc = kvm_s390_query_ap_config(config);
1367 pr_err("PQAP(QCI) failed with cc=%d", cc);
1369 return config[0] & 0x40;
1375 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1377 kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1379 if (kvm_s390_apxa_installed())
1380 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1382 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1385 static u64 kvm_s390_get_initial_cpuid(void)
1390 cpuid.version = 0xff;
1391 return *((u64 *) &cpuid);
1394 static void kvm_s390_crypto_init(struct kvm *kvm)
1396 if (!test_kvm_facility(kvm, 76))
1399 kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1400 kvm_s390_set_crycb_format(kvm);
1402 /* Enable AES/DEA protected key functions by default */
1403 kvm->arch.crypto.aes_kw = 1;
1404 kvm->arch.crypto.dea_kw = 1;
1405 get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1406 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1407 get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1408 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1411 static void sca_dispose(struct kvm *kvm)
1413 if (kvm->arch.use_esca)
1414 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1416 free_page((unsigned long)(kvm->arch.sca));
1417 kvm->arch.sca = NULL;
1420 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1422 gfp_t alloc_flags = GFP_KERNEL;
1424 char debug_name[16];
1425 static unsigned long sca_offset;
1428 #ifdef CONFIG_KVM_S390_UCONTROL
1429 if (type & ~KVM_VM_S390_UCONTROL)
1431 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1438 rc = s390_enable_sie();
1444 ratelimit_state_init(&kvm->arch.sthyi_limit, 5 * HZ, 500);
1446 kvm->arch.use_esca = 0; /* start with basic SCA */
1447 if (!sclp.has_64bscao)
1448 alloc_flags |= GFP_DMA;
1449 rwlock_init(&kvm->arch.sca_lock);
1450 kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1453 spin_lock(&kvm_lock);
1455 if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1457 kvm->arch.sca = (struct bsca_block *)
1458 ((char *) kvm->arch.sca + sca_offset);
1459 spin_unlock(&kvm_lock);
1461 sprintf(debug_name, "kvm-%u", current->pid);
1463 kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1467 kvm->arch.sie_page2 =
1468 (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1469 if (!kvm->arch.sie_page2)
1472 /* Populate the facility mask initially. */
1473 memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1474 sizeof(S390_lowcore.stfle_fac_list));
1475 for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1476 if (i < kvm_s390_fac_list_mask_size())
1477 kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1479 kvm->arch.model.fac_mask[i] = 0UL;
1482 /* Populate the facility list initially. */
1483 kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1484 memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1485 S390_ARCH_FAC_LIST_SIZE_BYTE);
1487 set_kvm_facility(kvm->arch.model.fac_mask, 74);
1488 set_kvm_facility(kvm->arch.model.fac_list, 74);
1490 kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
1491 kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1493 kvm_s390_crypto_init(kvm);
1495 spin_lock_init(&kvm->arch.float_int.lock);
1496 for (i = 0; i < FIRQ_LIST_COUNT; i++)
1497 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1498 init_waitqueue_head(&kvm->arch.ipte_wq);
1499 mutex_init(&kvm->arch.ipte_mutex);
1501 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1502 VM_EVENT(kvm, 3, "vm created with type %lu", type);
1504 if (type & KVM_VM_S390_UCONTROL) {
1505 kvm->arch.gmap = NULL;
1506 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1508 if (sclp.hamax == U64_MAX)
1509 kvm->arch.mem_limit = TASK_SIZE_MAX;
1511 kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
1513 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
1514 if (!kvm->arch.gmap)
1516 kvm->arch.gmap->private = kvm;
1517 kvm->arch.gmap->pfault_enabled = 0;
1520 kvm->arch.css_support = 0;
1521 kvm->arch.use_irqchip = 0;
1522 kvm->arch.epoch = 0;
1524 spin_lock_init(&kvm->arch.start_stop_lock);
1525 kvm_s390_vsie_init(kvm);
1526 KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1530 free_page((unsigned long)kvm->arch.sie_page2);
1531 debug_unregister(kvm->arch.dbf);
1533 KVM_EVENT(3, "creation of vm failed: %d", rc);
1537 bool kvm_arch_has_vcpu_debugfs(void)
1542 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
1547 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1549 VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1550 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1551 kvm_s390_clear_local_irqs(vcpu);
1552 kvm_clear_async_pf_completion_queue(vcpu);
1553 if (!kvm_is_ucontrol(vcpu->kvm))
1556 if (kvm_is_ucontrol(vcpu->kvm))
1557 gmap_remove(vcpu->arch.gmap);
1559 if (vcpu->kvm->arch.use_cmma)
1560 kvm_s390_vcpu_unsetup_cmma(vcpu);
1561 free_page((unsigned long)(vcpu->arch.sie_block));
1563 kvm_vcpu_uninit(vcpu);
1564 kmem_cache_free(kvm_vcpu_cache, vcpu);
1567 static void kvm_free_vcpus(struct kvm *kvm)
1570 struct kvm_vcpu *vcpu;
1572 kvm_for_each_vcpu(i, vcpu, kvm)
1573 kvm_arch_vcpu_destroy(vcpu);
1575 mutex_lock(&kvm->lock);
1576 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1577 kvm->vcpus[i] = NULL;
1579 atomic_set(&kvm->online_vcpus, 0);
1580 mutex_unlock(&kvm->lock);
1583 void kvm_arch_destroy_vm(struct kvm *kvm)
1585 kvm_free_vcpus(kvm);
1587 debug_unregister(kvm->arch.dbf);
1588 free_page((unsigned long)kvm->arch.sie_page2);
1589 if (!kvm_is_ucontrol(kvm))
1590 gmap_remove(kvm->arch.gmap);
1591 kvm_s390_destroy_adapters(kvm);
1592 kvm_s390_clear_float_irqs(kvm);
1593 kvm_s390_vsie_destroy(kvm);
1594 KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
1597 /* Section: vcpu related */
1598 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1600 vcpu->arch.gmap = gmap_create(current->mm, -1UL);
1601 if (!vcpu->arch.gmap)
1603 vcpu->arch.gmap->private = vcpu->kvm;
1608 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
1610 if (!kvm_s390_use_sca_entries())
1612 read_lock(&vcpu->kvm->arch.sca_lock);
1613 if (vcpu->kvm->arch.use_esca) {
1614 struct esca_block *sca = vcpu->kvm->arch.sca;
1616 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1617 sca->cpu[vcpu->vcpu_id].sda = 0;
1619 struct bsca_block *sca = vcpu->kvm->arch.sca;
1621 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1622 sca->cpu[vcpu->vcpu_id].sda = 0;
1624 read_unlock(&vcpu->kvm->arch.sca_lock);
1627 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
1629 if (!kvm_s390_use_sca_entries()) {
1630 struct bsca_block *sca = vcpu->kvm->arch.sca;
1632 /* we still need the basic sca for the ipte control */
1633 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1634 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
1636 read_lock(&vcpu->kvm->arch.sca_lock);
1637 if (vcpu->kvm->arch.use_esca) {
1638 struct esca_block *sca = vcpu->kvm->arch.sca;
1640 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1641 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1642 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
1643 vcpu->arch.sie_block->ecb2 |= 0x04U;
1644 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1646 struct bsca_block *sca = vcpu->kvm->arch.sca;
1648 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1649 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1650 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
1651 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1653 read_unlock(&vcpu->kvm->arch.sca_lock);
1656 /* Basic SCA to Extended SCA data copy routines */
1657 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
1660 d->sigp_ctrl.c = s->sigp_ctrl.c;
1661 d->sigp_ctrl.scn = s->sigp_ctrl.scn;
1664 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
1668 d->ipte_control = s->ipte_control;
1670 for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
1671 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
1674 static int sca_switch_to_extended(struct kvm *kvm)
1676 struct bsca_block *old_sca = kvm->arch.sca;
1677 struct esca_block *new_sca;
1678 struct kvm_vcpu *vcpu;
1679 unsigned int vcpu_idx;
1682 new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
1686 scaoh = (u32)((u64)(new_sca) >> 32);
1687 scaol = (u32)(u64)(new_sca) & ~0x3fU;
1689 kvm_s390_vcpu_block_all(kvm);
1690 write_lock(&kvm->arch.sca_lock);
1692 sca_copy_b_to_e(new_sca, old_sca);
1694 kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
1695 vcpu->arch.sie_block->scaoh = scaoh;
1696 vcpu->arch.sie_block->scaol = scaol;
1697 vcpu->arch.sie_block->ecb2 |= 0x04U;
1699 kvm->arch.sca = new_sca;
1700 kvm->arch.use_esca = 1;
1702 write_unlock(&kvm->arch.sca_lock);
1703 kvm_s390_vcpu_unblock_all(kvm);
1705 free_page((unsigned long)old_sca);
1707 VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
1708 old_sca, kvm->arch.sca);
1712 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
1716 if (!kvm_s390_use_sca_entries()) {
1717 if (id < KVM_MAX_VCPUS)
1721 if (id < KVM_S390_BSCA_CPU_SLOTS)
1723 if (!sclp.has_esca || !sclp.has_64bscao)
1726 mutex_lock(&kvm->lock);
1727 rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
1728 mutex_unlock(&kvm->lock);
1730 return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
1733 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1735 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1736 kvm_clear_async_pf_completion_queue(vcpu);
1737 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
1743 kvm_s390_set_prefix(vcpu, 0);
1744 if (test_kvm_facility(vcpu->kvm, 64))
1745 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
1746 /* fprs can be synchronized via vrs, even if the guest has no vx. With
1747 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
1750 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
1752 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
1754 if (kvm_is_ucontrol(vcpu->kvm))
1755 return __kvm_ucontrol_vcpu_init(vcpu);
1760 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1761 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1763 WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
1764 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1765 vcpu->arch.cputm_start = get_tod_clock_fast();
1766 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1769 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1770 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1772 WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
1773 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1774 vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1775 vcpu->arch.cputm_start = 0;
1776 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1779 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1780 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1782 WARN_ON_ONCE(vcpu->arch.cputm_enabled);
1783 vcpu->arch.cputm_enabled = true;
1784 __start_cpu_timer_accounting(vcpu);
1787 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1788 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1790 WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
1791 __stop_cpu_timer_accounting(vcpu);
1792 vcpu->arch.cputm_enabled = false;
1795 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1797 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1798 __enable_cpu_timer_accounting(vcpu);
1802 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1804 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1805 __disable_cpu_timer_accounting(vcpu);
1809 /* set the cpu timer - may only be called from the VCPU thread itself */
1810 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
1812 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1813 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1814 if (vcpu->arch.cputm_enabled)
1815 vcpu->arch.cputm_start = get_tod_clock_fast();
1816 vcpu->arch.sie_block->cputm = cputm;
1817 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1821 /* update and get the cpu timer - can also be called from other VCPU threads */
1822 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
1827 if (unlikely(!vcpu->arch.cputm_enabled))
1828 return vcpu->arch.sie_block->cputm;
1830 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1832 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
1834 * If the writer would ever execute a read in the critical
1835 * section, e.g. in irq context, we have a deadlock.
1837 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
1838 value = vcpu->arch.sie_block->cputm;
1839 /* if cputm_start is 0, accounting is being started/stopped */
1840 if (likely(vcpu->arch.cputm_start))
1841 value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1842 } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
1847 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1850 gmap_enable(vcpu->arch.enabled_gmap);
1851 atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1852 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1853 __start_cpu_timer_accounting(vcpu);
1857 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1860 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1861 __stop_cpu_timer_accounting(vcpu);
1862 atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1863 vcpu->arch.enabled_gmap = gmap_get_enabled();
1864 gmap_disable(vcpu->arch.enabled_gmap);
1868 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
1870 /* this equals initial cpu reset in pop, but we don't switch to ESA */
1871 vcpu->arch.sie_block->gpsw.mask = 0UL;
1872 vcpu->arch.sie_block->gpsw.addr = 0UL;
1873 kvm_s390_set_prefix(vcpu, 0);
1874 kvm_s390_set_cpu_timer(vcpu, 0);
1875 vcpu->arch.sie_block->ckc = 0UL;
1876 vcpu->arch.sie_block->todpr = 0;
1877 memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
1878 vcpu->arch.sie_block->gcr[0] = 0xE0UL;
1879 vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
1880 /* make sure the new fpc will be lazily loaded */
1882 current->thread.fpu.fpc = 0;
1883 vcpu->arch.sie_block->gbea = 1;
1884 vcpu->arch.sie_block->pp = 0;
1885 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1886 kvm_clear_async_pf_completion_queue(vcpu);
1887 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
1888 kvm_s390_vcpu_stop(vcpu);
1889 kvm_s390_clear_local_irqs(vcpu);
1892 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
1894 mutex_lock(&vcpu->kvm->lock);
1896 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
1898 mutex_unlock(&vcpu->kvm->lock);
1899 if (!kvm_is_ucontrol(vcpu->kvm)) {
1900 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
1903 if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
1904 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
1905 /* make vcpu_load load the right gmap on the first trigger */
1906 vcpu->arch.enabled_gmap = vcpu->arch.gmap;
1909 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
1911 if (!test_kvm_facility(vcpu->kvm, 76))
1914 vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
1916 if (vcpu->kvm->arch.crypto.aes_kw)
1917 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
1918 if (vcpu->kvm->arch.crypto.dea_kw)
1919 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
1921 vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
1924 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
1926 free_page(vcpu->arch.sie_block->cbrlo);
1927 vcpu->arch.sie_block->cbrlo = 0;
1930 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
1932 vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
1933 if (!vcpu->arch.sie_block->cbrlo)
1936 vcpu->arch.sie_block->ecb2 |= 0x80;
1937 vcpu->arch.sie_block->ecb2 &= ~0x08;
1941 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
1943 struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
1945 vcpu->arch.sie_block->ibc = model->ibc;
1946 if (test_kvm_facility(vcpu->kvm, 7))
1947 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
1950 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1954 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
1958 if (test_kvm_facility(vcpu->kvm, 78))
1959 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
1960 else if (test_kvm_facility(vcpu->kvm, 8))
1961 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
1963 kvm_s390_vcpu_setup_model(vcpu);
1965 /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
1966 if (MACHINE_HAS_ESOP)
1967 vcpu->arch.sie_block->ecb |= 0x02;
1968 if (test_kvm_facility(vcpu->kvm, 9))
1969 vcpu->arch.sie_block->ecb |= 0x04;
1970 if (test_kvm_facility(vcpu->kvm, 73))
1971 vcpu->arch.sie_block->ecb |= 0x10;
1973 if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
1974 vcpu->arch.sie_block->ecb2 |= 0x08;
1975 if (test_kvm_facility(vcpu->kvm, 130))
1976 vcpu->arch.sie_block->ecb2 |= 0x20;
1977 vcpu->arch.sie_block->eca = 0x1002000U;
1979 vcpu->arch.sie_block->eca |= 0x80000000U;
1981 vcpu->arch.sie_block->eca |= 0x40000000U;
1983 vcpu->arch.sie_block->eca |= 1;
1984 if (sclp.has_sigpif)
1985 vcpu->arch.sie_block->eca |= 0x10000000U;
1986 if (test_kvm_facility(vcpu->kvm, 129)) {
1987 vcpu->arch.sie_block->eca |= 0x00020000;
1988 vcpu->arch.sie_block->ecd |= 0x20000000;
1990 vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
1991 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
1993 if (vcpu->kvm->arch.use_cmma) {
1994 rc = kvm_s390_vcpu_setup_cmma(vcpu);
1998 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1999 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
2001 kvm_s390_vcpu_crypto_setup(vcpu);
2006 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
2009 struct kvm_vcpu *vcpu;
2010 struct sie_page *sie_page;
2013 if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
2018 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2022 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2026 vcpu->arch.sie_block = &sie_page->sie_block;
2027 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2029 /* the real guest size will always be smaller than msl */
2030 vcpu->arch.sie_block->mso = 0;
2031 vcpu->arch.sie_block->msl = sclp.hamax;
2033 vcpu->arch.sie_block->icpua = id;
2034 spin_lock_init(&vcpu->arch.local_int.lock);
2035 vcpu->arch.local_int.float_int = &kvm->arch.float_int;
2036 vcpu->arch.local_int.wq = &vcpu->wq;
2037 vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
2038 seqcount_init(&vcpu->arch.cputm_seqcount);
2040 rc = kvm_vcpu_init(vcpu, kvm, id);
2042 goto out_free_sie_block;
2043 VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2044 vcpu->arch.sie_block);
2045 trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2049 free_page((unsigned long)(vcpu->arch.sie_block));
2051 kmem_cache_free(kvm_vcpu_cache, vcpu);
2056 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2058 return kvm_s390_vcpu_has_irq(vcpu, 0);
2061 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2063 atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2067 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2069 atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2072 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2074 atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2078 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2080 atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2084 * Kick a guest cpu out of SIE and wait until SIE is not running.
2085 * If the CPU is not running (e.g. waiting as idle) the function will
2086 * return immediately. */
2087 void exit_sie(struct kvm_vcpu *vcpu)
2089 atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
2090 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2094 /* Kick a guest cpu out of SIE to process a request synchronously */
2095 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2097 kvm_make_request(req, vcpu);
2098 kvm_s390_vcpu_request(vcpu);
2101 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2104 struct kvm *kvm = gmap->private;
2105 struct kvm_vcpu *vcpu;
2106 unsigned long prefix;
2109 if (gmap_is_shadow(gmap))
2111 if (start >= 1UL << 31)
2112 /* We are only interested in prefix pages */
2114 kvm_for_each_vcpu(i, vcpu, kvm) {
2115 /* match against both prefix pages */
2116 prefix = kvm_s390_get_prefix(vcpu);
2117 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2118 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2120 kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2125 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2127 /* kvm common code refers to this, but never calls it */
2132 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2133 struct kvm_one_reg *reg)
2138 case KVM_REG_S390_TODPR:
2139 r = put_user(vcpu->arch.sie_block->todpr,
2140 (u32 __user *)reg->addr);
2142 case KVM_REG_S390_EPOCHDIFF:
2143 r = put_user(vcpu->arch.sie_block->epoch,
2144 (u64 __user *)reg->addr);
2146 case KVM_REG_S390_CPU_TIMER:
2147 r = put_user(kvm_s390_get_cpu_timer(vcpu),
2148 (u64 __user *)reg->addr);
2150 case KVM_REG_S390_CLOCK_COMP:
2151 r = put_user(vcpu->arch.sie_block->ckc,
2152 (u64 __user *)reg->addr);
2154 case KVM_REG_S390_PFTOKEN:
2155 r = put_user(vcpu->arch.pfault_token,
2156 (u64 __user *)reg->addr);
2158 case KVM_REG_S390_PFCOMPARE:
2159 r = put_user(vcpu->arch.pfault_compare,
2160 (u64 __user *)reg->addr);
2162 case KVM_REG_S390_PFSELECT:
2163 r = put_user(vcpu->arch.pfault_select,
2164 (u64 __user *)reg->addr);
2166 case KVM_REG_S390_PP:
2167 r = put_user(vcpu->arch.sie_block->pp,
2168 (u64 __user *)reg->addr);
2170 case KVM_REG_S390_GBEA:
2171 r = put_user(vcpu->arch.sie_block->gbea,
2172 (u64 __user *)reg->addr);
2181 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2182 struct kvm_one_reg *reg)
2188 case KVM_REG_S390_TODPR:
2189 r = get_user(vcpu->arch.sie_block->todpr,
2190 (u32 __user *)reg->addr);
2192 case KVM_REG_S390_EPOCHDIFF:
2193 r = get_user(vcpu->arch.sie_block->epoch,
2194 (u64 __user *)reg->addr);
2196 case KVM_REG_S390_CPU_TIMER:
2197 r = get_user(val, (u64 __user *)reg->addr);
2199 kvm_s390_set_cpu_timer(vcpu, val);
2201 case KVM_REG_S390_CLOCK_COMP:
2202 r = get_user(vcpu->arch.sie_block->ckc,
2203 (u64 __user *)reg->addr);
2205 case KVM_REG_S390_PFTOKEN:
2206 r = get_user(vcpu->arch.pfault_token,
2207 (u64 __user *)reg->addr);
2208 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2209 kvm_clear_async_pf_completion_queue(vcpu);
2211 case KVM_REG_S390_PFCOMPARE:
2212 r = get_user(vcpu->arch.pfault_compare,
2213 (u64 __user *)reg->addr);
2215 case KVM_REG_S390_PFSELECT:
2216 r = get_user(vcpu->arch.pfault_select,
2217 (u64 __user *)reg->addr);
2219 case KVM_REG_S390_PP:
2220 r = get_user(vcpu->arch.sie_block->pp,
2221 (u64 __user *)reg->addr);
2223 case KVM_REG_S390_GBEA:
2224 r = get_user(vcpu->arch.sie_block->gbea,
2225 (u64 __user *)reg->addr);
2234 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2236 kvm_s390_vcpu_initial_reset(vcpu);
2240 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2242 memcpy(&vcpu->run->s.regs.gprs, ®s->gprs, sizeof(regs->gprs));
2246 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2248 memcpy(®s->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2252 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2253 struct kvm_sregs *sregs)
2255 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2256 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2260 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2261 struct kvm_sregs *sregs)
2263 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2264 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2268 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2270 if (test_fp_ctl(fpu->fpc))
2272 vcpu->run->s.regs.fpc = fpu->fpc;
2274 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
2275 (freg_t *) fpu->fprs);
2277 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
2281 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2283 /* make sure we have the latest values */
2286 convert_vx_to_fp((freg_t *) fpu->fprs,
2287 (__vector128 *) vcpu->run->s.regs.vrs);
2289 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
2290 fpu->fpc = vcpu->run->s.regs.fpc;
2294 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2298 if (!is_vcpu_stopped(vcpu))
2301 vcpu->run->psw_mask = psw.mask;
2302 vcpu->run->psw_addr = psw.addr;
2307 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2308 struct kvm_translation *tr)
2310 return -EINVAL; /* not implemented yet */
2313 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2314 KVM_GUESTDBG_USE_HW_BP | \
2315 KVM_GUESTDBG_ENABLE)
2317 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2318 struct kvm_guest_debug *dbg)
2322 vcpu->guest_debug = 0;
2323 kvm_s390_clear_bp_data(vcpu);
2325 if (dbg->control & ~VALID_GUESTDBG_FLAGS)
2327 if (!sclp.has_gpere)
2330 if (dbg->control & KVM_GUESTDBG_ENABLE) {
2331 vcpu->guest_debug = dbg->control;
2332 /* enforce guest PER */
2333 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2335 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2336 rc = kvm_s390_import_bp_data(vcpu, dbg);
2338 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2339 vcpu->arch.guestdbg.last_bp = 0;
2343 vcpu->guest_debug = 0;
2344 kvm_s390_clear_bp_data(vcpu);
2345 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2351 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2352 struct kvm_mp_state *mp_state)
2354 /* CHECK_STOP and LOAD are not supported yet */
2355 return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2356 KVM_MP_STATE_OPERATING;
2359 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2360 struct kvm_mp_state *mp_state)
2364 /* user space knows about this interface - let it control the state */
2365 vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2367 switch (mp_state->mp_state) {
2368 case KVM_MP_STATE_STOPPED:
2369 kvm_s390_vcpu_stop(vcpu);
2371 case KVM_MP_STATE_OPERATING:
2372 kvm_s390_vcpu_start(vcpu);
2374 case KVM_MP_STATE_LOAD:
2375 case KVM_MP_STATE_CHECK_STOP:
2376 /* fall through - CHECK_STOP and LOAD are not supported yet */
2384 static bool ibs_enabled(struct kvm_vcpu *vcpu)
2386 return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
2389 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2392 kvm_s390_vcpu_request_handled(vcpu);
2393 if (!vcpu->requests)
2396 * We use MMU_RELOAD just to re-arm the ipte notifier for the
2397 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
2398 * This ensures that the ipte instruction for this request has
2399 * already finished. We might race against a second unmapper that
2400 * wants to set the blocking bit. Lets just retry the request loop.
2402 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2404 rc = gmap_mprotect_notify(vcpu->arch.gmap,
2405 kvm_s390_get_prefix(vcpu),
2406 PAGE_SIZE * 2, PROT_WRITE);
2408 kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
2414 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2415 vcpu->arch.sie_block->ihcpu = 0xffff;
2419 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
2420 if (!ibs_enabled(vcpu)) {
2421 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
2422 atomic_or(CPUSTAT_IBS,
2423 &vcpu->arch.sie_block->cpuflags);
2428 if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2429 if (ibs_enabled(vcpu)) {
2430 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2431 atomic_andnot(CPUSTAT_IBS,
2432 &vcpu->arch.sie_block->cpuflags);
2437 if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
2438 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2442 /* nothing to do, just clear the request */
2443 clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
2448 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2450 struct kvm_vcpu *vcpu;
2453 mutex_lock(&kvm->lock);
2455 kvm->arch.epoch = tod - get_tod_clock();
2456 kvm_s390_vcpu_block_all(kvm);
2457 kvm_for_each_vcpu(i, vcpu, kvm)
2458 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2459 kvm_s390_vcpu_unblock_all(kvm);
2461 mutex_unlock(&kvm->lock);
2465 * kvm_arch_fault_in_page - fault-in guest page if necessary
2466 * @vcpu: The corresponding virtual cpu
2467 * @gpa: Guest physical address
2468 * @writable: Whether the page should be writable or not
2470 * Make sure that a guest page has been faulted-in on the host.
2472 * Return: Zero on success, negative error code otherwise.
2474 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
2476 return gmap_fault(vcpu->arch.gmap, gpa,
2477 writable ? FAULT_FLAG_WRITE : 0);
2480 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
2481 unsigned long token)
2483 struct kvm_s390_interrupt inti;
2484 struct kvm_s390_irq irq;
2487 irq.u.ext.ext_params2 = token;
2488 irq.type = KVM_S390_INT_PFAULT_INIT;
2489 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
2491 inti.type = KVM_S390_INT_PFAULT_DONE;
2492 inti.parm64 = token;
2493 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
2497 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
2498 struct kvm_async_pf *work)
2500 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
2501 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
2504 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
2505 struct kvm_async_pf *work)
2507 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
2508 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
2511 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
2512 struct kvm_async_pf *work)
2514 /* s390 will always inject the page directly */
2517 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
2520 * s390 will always inject the page directly,
2521 * but we still want check_async_completion to cleanup
2526 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
2529 struct kvm_arch_async_pf arch;
2532 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2534 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
2535 vcpu->arch.pfault_compare)
2537 if (psw_extint_disabled(vcpu))
2539 if (kvm_s390_vcpu_has_irq(vcpu, 0))
2541 if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
2543 if (!vcpu->arch.gmap->pfault_enabled)
2546 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
2547 hva += current->thread.gmap_addr & ~PAGE_MASK;
2548 if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
2551 rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
2555 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
2560 * On s390 notifications for arriving pages will be delivered directly
2561 * to the guest but the house keeping for completed pfaults is
2562 * handled outside the worker.
2564 kvm_check_async_pf_completion(vcpu);
2566 vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
2567 vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
2572 if (test_cpu_flag(CIF_MCCK_PENDING))
2575 if (!kvm_is_ucontrol(vcpu->kvm)) {
2576 rc = kvm_s390_deliver_pending_interrupts(vcpu);
2581 rc = kvm_s390_handle_requests(vcpu);
2585 if (guestdbg_enabled(vcpu)) {
2586 kvm_s390_backup_guest_per_regs(vcpu);
2587 kvm_s390_patch_guest_per_regs(vcpu);
2590 vcpu->arch.sie_block->icptcode = 0;
2591 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
2592 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
2593 trace_kvm_s390_sie_enter(vcpu, cpuflags);
2598 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
2600 struct kvm_s390_pgm_info pgm_info = {
2601 .code = PGM_ADDRESSING,
2606 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
2607 trace_kvm_s390_sie_fault(vcpu);
2610 * We want to inject an addressing exception, which is defined as a
2611 * suppressing or terminating exception. However, since we came here
2612 * by a DAT access exception, the PSW still points to the faulting
2613 * instruction since DAT exceptions are nullifying. So we've got
2614 * to look up the current opcode to get the length of the instruction
2615 * to be able to forward the PSW.
2617 rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
2618 ilen = insn_length(opcode);
2622 /* Instruction-Fetching Exceptions - we can't detect the ilen.
2623 * Forward by arbitrary ilc, injection will take care of
2624 * nullification if necessary.
2626 pgm_info = vcpu->arch.pgm;
2629 pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
2630 kvm_s390_forward_psw(vcpu, ilen);
2631 return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
2634 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
2636 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
2637 vcpu->arch.sie_block->icptcode);
2638 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
2640 if (guestdbg_enabled(vcpu))
2641 kvm_s390_restore_guest_per_regs(vcpu);
2643 vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
2644 vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
2646 if (vcpu->arch.sie_block->icptcode > 0) {
2647 int rc = kvm_handle_sie_intercept(vcpu);
2649 if (rc != -EOPNOTSUPP)
2651 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
2652 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
2653 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
2654 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
2656 } else if (exit_reason != -EFAULT) {
2657 vcpu->stat.exit_null++;
2659 } else if (kvm_is_ucontrol(vcpu->kvm)) {
2660 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
2661 vcpu->run->s390_ucontrol.trans_exc_code =
2662 current->thread.gmap_addr;
2663 vcpu->run->s390_ucontrol.pgm_code = 0x10;
2665 } else if (current->thread.gmap_pfault) {
2666 trace_kvm_s390_major_guest_pfault(vcpu);
2667 current->thread.gmap_pfault = 0;
2668 if (kvm_arch_setup_async_pf(vcpu))
2670 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
2672 return vcpu_post_run_fault_in_sie(vcpu);
2675 static int __vcpu_run(struct kvm_vcpu *vcpu)
2677 int rc, exit_reason;
2680 * We try to hold kvm->srcu during most of vcpu_run (except when run-
2681 * ning the guest), so that memslots (and other stuff) are protected
2683 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2686 rc = vcpu_pre_run(vcpu);
2690 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2692 * As PF_VCPU will be used in fault handler, between
2693 * guest_enter and guest_exit should be no uaccess.
2695 local_irq_disable();
2696 guest_enter_irqoff();
2697 __disable_cpu_timer_accounting(vcpu);
2699 exit_reason = sie64a(vcpu->arch.sie_block,
2700 vcpu->run->s.regs.gprs);
2701 local_irq_disable();
2702 __enable_cpu_timer_accounting(vcpu);
2703 guest_exit_irqoff();
2705 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2707 rc = vcpu_post_run(vcpu, exit_reason);
2708 } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
2710 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2714 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2716 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
2717 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
2718 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
2719 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
2720 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
2721 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
2722 /* some control register changes require a tlb flush */
2723 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2725 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
2726 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
2727 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
2728 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
2729 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
2730 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
2732 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
2733 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
2734 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
2735 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
2736 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2737 kvm_clear_async_pf_completion_queue(vcpu);
2740 * If userspace sets the riccb (e.g. after migration) to a valid state,
2741 * we should enable RI here instead of doing the lazy enablement.
2743 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
2744 test_kvm_facility(vcpu->kvm, 64)) {
2745 struct runtime_instr_cb *riccb =
2746 (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
2749 vcpu->arch.sie_block->ecb3 |= 0x01;
2751 save_access_regs(vcpu->arch.host_acrs);
2752 restore_access_regs(vcpu->run->s.regs.acrs);
2753 /* save host (userspace) fprs/vrs */
2755 vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
2756 vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
2758 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
2760 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
2761 current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
2762 if (test_fp_ctl(current->thread.fpu.fpc))
2763 /* User space provided an invalid FPC, let's clear it */
2764 current->thread.fpu.fpc = 0;
2766 kvm_run->kvm_dirty_regs = 0;
2769 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2771 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
2772 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
2773 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
2774 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
2775 kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
2776 kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
2777 kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
2778 kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
2779 kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
2780 kvm_run->s.regs.pft = vcpu->arch.pfault_token;
2781 kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
2782 kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
2783 save_access_regs(vcpu->run->s.regs.acrs);
2784 restore_access_regs(vcpu->arch.host_acrs);
2785 /* Save guest register state */
2787 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
2788 /* Restore will be done lazily at return */
2789 current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
2790 current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
2794 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2799 if (kvm_run->immediate_exit)
2802 if (guestdbg_exit_pending(vcpu)) {
2803 kvm_s390_prepare_debug_exit(vcpu);
2807 if (vcpu->sigset_active)
2808 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
2810 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
2811 kvm_s390_vcpu_start(vcpu);
2812 } else if (is_vcpu_stopped(vcpu)) {
2813 pr_err_ratelimited("can't run stopped vcpu %d\n",
2818 sync_regs(vcpu, kvm_run);
2819 enable_cpu_timer_accounting(vcpu);
2822 rc = __vcpu_run(vcpu);
2824 if (signal_pending(current) && !rc) {
2825 kvm_run->exit_reason = KVM_EXIT_INTR;
2829 if (guestdbg_exit_pending(vcpu) && !rc) {
2830 kvm_s390_prepare_debug_exit(vcpu);
2834 if (rc == -EREMOTE) {
2835 /* userspace support is needed, kvm_run has been prepared */
2839 disable_cpu_timer_accounting(vcpu);
2840 store_regs(vcpu, kvm_run);
2842 if (vcpu->sigset_active)
2843 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
2845 vcpu->stat.exit_userspace++;
2850 * store status at address
2851 * we use have two special cases:
2852 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
2853 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
2855 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
2857 unsigned char archmode = 1;
2858 freg_t fprs[NUM_FPRS];
2863 px = kvm_s390_get_prefix(vcpu);
2864 if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
2865 if (write_guest_abs(vcpu, 163, &archmode, 1))
2868 } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
2869 if (write_guest_real(vcpu, 163, &archmode, 1))
2873 gpa -= __LC_FPREGS_SAVE_AREA;
2875 /* manually convert vector registers if necessary */
2876 if (MACHINE_HAS_VX) {
2877 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
2878 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2881 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2882 vcpu->run->s.regs.fprs, 128);
2884 rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
2885 vcpu->run->s.regs.gprs, 128);
2886 rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
2887 &vcpu->arch.sie_block->gpsw, 16);
2888 rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
2890 rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
2891 &vcpu->run->s.regs.fpc, 4);
2892 rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
2893 &vcpu->arch.sie_block->todpr, 4);
2894 cputm = kvm_s390_get_cpu_timer(vcpu);
2895 rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
2897 clkcomp = vcpu->arch.sie_block->ckc >> 8;
2898 rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
2900 rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
2901 &vcpu->run->s.regs.acrs, 64);
2902 rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
2903 &vcpu->arch.sie_block->gcr, 128);
2904 return rc ? -EFAULT : 0;
2907 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
2910 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
2911 * switch in the run ioctl. Let's update our copies before we save
2912 * it into the save area
2915 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
2916 save_access_regs(vcpu->run->s.regs.acrs);
2918 return kvm_s390_store_status_unloaded(vcpu, addr);
2921 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2923 kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
2924 kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
2927 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
2930 struct kvm_vcpu *vcpu;
2932 kvm_for_each_vcpu(i, vcpu, kvm) {
2933 __disable_ibs_on_vcpu(vcpu);
2937 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2941 kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
2942 kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
2945 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
2947 int i, online_vcpus, started_vcpus = 0;
2949 if (!is_vcpu_stopped(vcpu))
2952 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
2953 /* Only one cpu at a time may enter/leave the STOPPED state. */
2954 spin_lock(&vcpu->kvm->arch.start_stop_lock);
2955 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2957 for (i = 0; i < online_vcpus; i++) {
2958 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
2962 if (started_vcpus == 0) {
2963 /* we're the only active VCPU -> speed it up */
2964 __enable_ibs_on_vcpu(vcpu);
2965 } else if (started_vcpus == 1) {
2967 * As we are starting a second VCPU, we have to disable
2968 * the IBS facility on all VCPUs to remove potentially
2969 * oustanding ENABLE requests.
2971 __disable_ibs_on_all_vcpus(vcpu->kvm);
2974 atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2976 * Another VCPU might have used IBS while we were offline.
2977 * Let's play safe and flush the VCPU at startup.
2979 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2980 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2984 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
2986 int i, online_vcpus, started_vcpus = 0;
2987 struct kvm_vcpu *started_vcpu = NULL;
2989 if (is_vcpu_stopped(vcpu))
2992 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
2993 /* Only one cpu at a time may enter/leave the STOPPED state. */
2994 spin_lock(&vcpu->kvm->arch.start_stop_lock);
2995 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2997 /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
2998 kvm_s390_clear_stop_irq(vcpu);
3000 atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
3001 __disable_ibs_on_vcpu(vcpu);
3003 for (i = 0; i < online_vcpus; i++) {
3004 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
3006 started_vcpu = vcpu->kvm->vcpus[i];
3010 if (started_vcpus == 1) {
3012 * As we only have one VCPU left, we want to enable the
3013 * IBS facility for that VCPU to speed it up.
3015 __enable_ibs_on_vcpu(started_vcpu);
3018 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3022 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
3023 struct kvm_enable_cap *cap)
3031 case KVM_CAP_S390_CSS_SUPPORT:
3032 if (!vcpu->kvm->arch.css_support) {
3033 vcpu->kvm->arch.css_support = 1;
3034 VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
3035 trace_kvm_s390_enable_css(vcpu->kvm);
3046 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3047 struct kvm_s390_mem_op *mop)
3049 void __user *uaddr = (void __user *)mop->buf;
3050 void *tmpbuf = NULL;
3052 const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3053 | KVM_S390_MEMOP_F_CHECK_ONLY;
3055 if (mop->flags & ~supported_flags)
3058 if (mop->size > MEM_OP_MAX_SIZE)
3061 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3062 tmpbuf = vmalloc(mop->size);
3067 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3070 case KVM_S390_MEMOP_LOGICAL_READ:
3071 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3072 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3073 mop->size, GACC_FETCH);
3076 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3078 if (copy_to_user(uaddr, tmpbuf, mop->size))
3082 case KVM_S390_MEMOP_LOGICAL_WRITE:
3083 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3084 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3085 mop->size, GACC_STORE);
3088 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
3092 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3098 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
3100 if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
3101 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
3107 long kvm_arch_vcpu_ioctl(struct file *filp,
3108 unsigned int ioctl, unsigned long arg)
3110 struct kvm_vcpu *vcpu = filp->private_data;
3111 void __user *argp = (void __user *)arg;
3116 case KVM_S390_IRQ: {
3117 struct kvm_s390_irq s390irq;
3120 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3122 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3125 case KVM_S390_INTERRUPT: {
3126 struct kvm_s390_interrupt s390int;
3127 struct kvm_s390_irq s390irq;
3130 if (copy_from_user(&s390int, argp, sizeof(s390int)))
3132 if (s390int_to_s390irq(&s390int, &s390irq))
3134 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3137 case KVM_S390_STORE_STATUS:
3138 idx = srcu_read_lock(&vcpu->kvm->srcu);
3139 r = kvm_s390_vcpu_store_status(vcpu, arg);
3140 srcu_read_unlock(&vcpu->kvm->srcu, idx);
3142 case KVM_S390_SET_INITIAL_PSW: {
3146 if (copy_from_user(&psw, argp, sizeof(psw)))
3148 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3151 case KVM_S390_INITIAL_RESET:
3152 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3154 case KVM_SET_ONE_REG:
3155 case KVM_GET_ONE_REG: {
3156 struct kvm_one_reg reg;
3158 if (copy_from_user(®, argp, sizeof(reg)))
3160 if (ioctl == KVM_SET_ONE_REG)
3161 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, ®);
3163 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, ®);
3166 #ifdef CONFIG_KVM_S390_UCONTROL
3167 case KVM_S390_UCAS_MAP: {
3168 struct kvm_s390_ucas_mapping ucasmap;
3170 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3175 if (!kvm_is_ucontrol(vcpu->kvm)) {
3180 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
3181 ucasmap.vcpu_addr, ucasmap.length);
3184 case KVM_S390_UCAS_UNMAP: {
3185 struct kvm_s390_ucas_mapping ucasmap;
3187 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3192 if (!kvm_is_ucontrol(vcpu->kvm)) {
3197 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
3202 case KVM_S390_VCPU_FAULT: {
3203 r = gmap_fault(vcpu->arch.gmap, arg, 0);
3206 case KVM_ENABLE_CAP:
3208 struct kvm_enable_cap cap;
3210 if (copy_from_user(&cap, argp, sizeof(cap)))
3212 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
3215 case KVM_S390_MEM_OP: {
3216 struct kvm_s390_mem_op mem_op;
3218 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3219 r = kvm_s390_guest_mem_op(vcpu, &mem_op);
3224 case KVM_S390_SET_IRQ_STATE: {
3225 struct kvm_s390_irq_state irq_state;
3228 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3230 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
3231 irq_state.len == 0 ||
3232 irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
3236 r = kvm_s390_set_irq_state(vcpu,
3237 (void __user *) irq_state.buf,
3241 case KVM_S390_GET_IRQ_STATE: {
3242 struct kvm_s390_irq_state irq_state;
3245 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3247 if (irq_state.len == 0) {
3251 r = kvm_s390_get_irq_state(vcpu,
3252 (__u8 __user *) irq_state.buf,
3262 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3264 #ifdef CONFIG_KVM_S390_UCONTROL
3265 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
3266 && (kvm_is_ucontrol(vcpu->kvm))) {
3267 vmf->page = virt_to_page(vcpu->arch.sie_block);
3268 get_page(vmf->page);
3272 return VM_FAULT_SIGBUS;
3275 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
3276 unsigned long npages)
3281 /* Section: memory related */
3282 int kvm_arch_prepare_memory_region(struct kvm *kvm,
3283 struct kvm_memory_slot *memslot,
3284 const struct kvm_userspace_memory_region *mem,
3285 enum kvm_mr_change change)
3287 /* A few sanity checks. We can have memory slots which have to be
3288 located/ended at a segment boundary (1MB). The memory in userland is
3289 ok to be fragmented into various different vmas. It is okay to mmap()
3290 and munmap() stuff in this slot after doing this call at any time */
3292 if (mem->userspace_addr & 0xffffful)
3295 if (mem->memory_size & 0xffffful)
3298 if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
3304 void kvm_arch_commit_memory_region(struct kvm *kvm,
3305 const struct kvm_userspace_memory_region *mem,
3306 const struct kvm_memory_slot *old,
3307 const struct kvm_memory_slot *new,
3308 enum kvm_mr_change change)
3312 /* If the basics of the memslot do not change, we do not want
3313 * to update the gmap. Every update causes several unnecessary
3314 * segment translation exceptions. This is usually handled just
3315 * fine by the normal fault handler + gmap, but it will also
3316 * cause faults on the prefix page of running guest CPUs.
3318 if (old->userspace_addr == mem->userspace_addr &&
3319 old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
3320 old->npages * PAGE_SIZE == mem->memory_size)
3323 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
3324 mem->guest_phys_addr, mem->memory_size);
3326 pr_warn("failed to commit memory region\n");
3330 static inline unsigned long nonhyp_mask(int i)
3332 unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
3334 return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
3337 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
3339 vcpu->valid_wakeup = false;
3342 static int __init kvm_s390_init(void)
3346 if (!sclp.has_sief2) {
3347 pr_info("SIE not available\n");
3351 for (i = 0; i < 16; i++)
3352 kvm_s390_fac_list_mask[i] |=
3353 S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
3355 return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
3358 static void __exit kvm_s390_exit(void)
3363 module_init(kvm_s390_init);
3364 module_exit(kvm_s390_exit);
3367 * Enable autoloading of the kvm module.
3368 * Note that we add the module alias here instead of virt/kvm/kvm_main.c
3369 * since x86 takes a different approach.
3371 #include <linux/miscdevice.h>
3372 MODULE_ALIAS_MISCDEV(KVM_MINOR);
3373 MODULE_ALIAS("devname:kvm");