2 * hosting zSeries kernel virtual machines
4 * Copyright IBM Corp. 2008, 2009
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
10 * Author(s): Carsten Otte <cotte@de.ibm.com>
11 * Christian Borntraeger <borntraeger@de.ibm.com>
12 * Heiko Carstens <heiko.carstens@de.ibm.com>
13 * Christian Ehrhardt <ehrhardt@de.ibm.com>
14 * Jason J. Herne <jjherne@us.ibm.com>
17 #include <linux/compiler.h>
18 #include <linux/err.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/mman.h>
25 #include <linux/module.h>
26 #include <linux/random.h>
27 #include <linux/slab.h>
28 #include <linux/timer.h>
29 #include <linux/vmalloc.h>
30 #include <linux/bitmap.h>
31 #include <asm/asm-offsets.h>
32 #include <asm/lowcore.h>
34 #include <asm/pgtable.h>
37 #include <asm/switch_to.h>
40 #include <asm/cpacf.h>
41 #include <asm/timex.h>
45 #define KMSG_COMPONENT "kvm-s390"
47 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
49 #define CREATE_TRACE_POINTS
51 #include "trace-s390.h"
53 #define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */
55 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
56 (KVM_MAX_VCPUS + LOCAL_IRQS))
58 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
60 struct kvm_stats_debugfs_item debugfs_entries[] = {
61 { "userspace_handled", VCPU_STAT(exit_userspace) },
62 { "exit_null", VCPU_STAT(exit_null) },
63 { "exit_validity", VCPU_STAT(exit_validity) },
64 { "exit_stop_request", VCPU_STAT(exit_stop_request) },
65 { "exit_external_request", VCPU_STAT(exit_external_request) },
66 { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
67 { "exit_instruction", VCPU_STAT(exit_instruction) },
68 { "exit_pei", VCPU_STAT(exit_pei) },
69 { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
70 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
71 { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
72 { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
73 { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
74 { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
75 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
76 { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
77 { "instruction_lctl", VCPU_STAT(instruction_lctl) },
78 { "instruction_stctl", VCPU_STAT(instruction_stctl) },
79 { "instruction_stctg", VCPU_STAT(instruction_stctg) },
80 { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
81 { "deliver_external_call", VCPU_STAT(deliver_external_call) },
82 { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
83 { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
84 { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
85 { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
86 { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
87 { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
88 { "exit_wait_state", VCPU_STAT(exit_wait_state) },
89 { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
90 { "instruction_stidp", VCPU_STAT(instruction_stidp) },
91 { "instruction_spx", VCPU_STAT(instruction_spx) },
92 { "instruction_stpx", VCPU_STAT(instruction_stpx) },
93 { "instruction_stap", VCPU_STAT(instruction_stap) },
94 { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
95 { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
96 { "instruction_stsch", VCPU_STAT(instruction_stsch) },
97 { "instruction_chsc", VCPU_STAT(instruction_chsc) },
98 { "instruction_essa", VCPU_STAT(instruction_essa) },
99 { "instruction_stsi", VCPU_STAT(instruction_stsi) },
100 { "instruction_stfl", VCPU_STAT(instruction_stfl) },
101 { "instruction_tprot", VCPU_STAT(instruction_tprot) },
102 { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
103 { "instruction_sie", VCPU_STAT(instruction_sie) },
104 { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
105 { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
106 { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
107 { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
108 { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
109 { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
110 { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
111 { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
112 { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
113 { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
114 { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
115 { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
116 { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
117 { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
118 { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
119 { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
120 { "diagnose_10", VCPU_STAT(diagnose_10) },
121 { "diagnose_44", VCPU_STAT(diagnose_44) },
122 { "diagnose_9c", VCPU_STAT(diagnose_9c) },
123 { "diagnose_258", VCPU_STAT(diagnose_258) },
124 { "diagnose_308", VCPU_STAT(diagnose_308) },
125 { "diagnose_500", VCPU_STAT(diagnose_500) },
129 /* allow nested virtualization in KVM (if enabled by user space) */
131 module_param(nested, int, S_IRUGO);
132 MODULE_PARM_DESC(nested, "Nested virtualization support");
134 /* upper facilities limit for kvm */
135 unsigned long kvm_s390_fac_list_mask[16] = { FACILITIES_KVM };
137 unsigned long kvm_s390_fac_list_mask_size(void)
139 BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
140 return ARRAY_SIZE(kvm_s390_fac_list_mask);
143 /* available cpu features supported by kvm */
144 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
145 /* available subfunctions indicated via query / "test bit" */
146 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
148 static struct gmap_notifier gmap_notifier;
149 static struct gmap_notifier vsie_gmap_notifier;
150 debug_info_t *kvm_s390_dbf;
152 /* Section: not file related */
153 int kvm_arch_hardware_enable(void)
155 /* every s390 is virtualization enabled ;-) */
159 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
163 * This callback is executed during stop_machine(). All CPUs are therefore
164 * temporarily stopped. In order not to change guest behavior, we have to
165 * disable preemption whenever we touch the epoch of kvm and the VCPUs,
166 * so a CPU won't be stopped while calculating with the epoch.
168 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
172 struct kvm_vcpu *vcpu;
174 unsigned long long *delta = v;
176 list_for_each_entry(kvm, &vm_list, vm_list) {
177 kvm->arch.epoch -= *delta;
178 kvm_for_each_vcpu(i, vcpu, kvm) {
179 vcpu->arch.sie_block->epoch -= *delta;
180 if (vcpu->arch.cputm_enabled)
181 vcpu->arch.cputm_start += *delta;
182 if (vcpu->arch.vsie_block)
183 vcpu->arch.vsie_block->epoch -= *delta;
189 static struct notifier_block kvm_clock_notifier = {
190 .notifier_call = kvm_clock_sync,
193 int kvm_arch_hardware_setup(void)
195 gmap_notifier.notifier_call = kvm_gmap_notifier;
196 gmap_register_pte_notifier(&gmap_notifier);
197 vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
198 gmap_register_pte_notifier(&vsie_gmap_notifier);
199 atomic_notifier_chain_register(&s390_epoch_delta_notifier,
200 &kvm_clock_notifier);
204 void kvm_arch_hardware_unsetup(void)
206 gmap_unregister_pte_notifier(&gmap_notifier);
207 gmap_unregister_pte_notifier(&vsie_gmap_notifier);
208 atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
209 &kvm_clock_notifier);
212 static void allow_cpu_feat(unsigned long nr)
214 set_bit_inv(nr, kvm_s390_available_cpu_feat);
217 static inline int plo_test_bit(unsigned char nr)
219 register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
223 /* Parameter registers are ignored for "test bit" */
233 static void kvm_s390_cpu_feat_init(void)
237 for (i = 0; i < 256; ++i) {
239 kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
242 if (test_facility(28)) /* TOD-clock steering */
243 ptff(kvm_s390_available_subfunc.ptff,
244 sizeof(kvm_s390_available_subfunc.ptff),
247 if (test_facility(17)) { /* MSA */
248 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
249 kvm_s390_available_subfunc.kmac);
250 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
251 kvm_s390_available_subfunc.kmc);
252 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
253 kvm_s390_available_subfunc.km);
254 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
255 kvm_s390_available_subfunc.kimd);
256 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
257 kvm_s390_available_subfunc.klmd);
259 if (test_facility(76)) /* MSA3 */
260 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
261 kvm_s390_available_subfunc.pckmo);
262 if (test_facility(77)) { /* MSA4 */
263 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
264 kvm_s390_available_subfunc.kmctr);
265 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
266 kvm_s390_available_subfunc.kmf);
267 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
268 kvm_s390_available_subfunc.kmo);
269 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
270 kvm_s390_available_subfunc.pcc);
272 if (test_facility(57)) /* MSA5 */
273 __cpacf_query(CPACF_PPNO, (cpacf_mask_t *)
274 kvm_s390_available_subfunc.ppno);
276 if (MACHINE_HAS_ESOP)
277 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
279 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
280 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
282 if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
283 !test_facility(3) || !nested)
285 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
286 if (sclp.has_64bscao)
287 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
289 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
291 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
293 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
295 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
297 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
299 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
301 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
302 * all skey handling functions read/set the skey from the PGSTE
303 * instead of the real storage key.
305 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
306 * pages being detected as preserved although they are resident.
308 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
309 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
311 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
312 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
313 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
315 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
316 * cannot easily shadow the SCA because of the ipte lock.
320 int kvm_arch_init(void *opaque)
322 kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
326 if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
327 debug_unregister(kvm_s390_dbf);
331 kvm_s390_cpu_feat_init();
333 /* Register floating interrupt controller interface. */
334 return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
337 void kvm_arch_exit(void)
339 debug_unregister(kvm_s390_dbf);
342 /* Section: device related */
343 long kvm_arch_dev_ioctl(struct file *filp,
344 unsigned int ioctl, unsigned long arg)
346 if (ioctl == KVM_S390_ENABLE_SIE)
347 return s390_enable_sie();
351 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
356 case KVM_CAP_S390_PSW:
357 case KVM_CAP_S390_GMAP:
358 case KVM_CAP_SYNC_MMU:
359 #ifdef CONFIG_KVM_S390_UCONTROL
360 case KVM_CAP_S390_UCONTROL:
362 case KVM_CAP_ASYNC_PF:
363 case KVM_CAP_SYNC_REGS:
364 case KVM_CAP_ONE_REG:
365 case KVM_CAP_ENABLE_CAP:
366 case KVM_CAP_S390_CSS_SUPPORT:
367 case KVM_CAP_IOEVENTFD:
368 case KVM_CAP_DEVICE_CTRL:
369 case KVM_CAP_ENABLE_CAP_VM:
370 case KVM_CAP_S390_IRQCHIP:
371 case KVM_CAP_VM_ATTRIBUTES:
372 case KVM_CAP_MP_STATE:
373 case KVM_CAP_IMMEDIATE_EXIT:
374 case KVM_CAP_S390_INJECT_IRQ:
375 case KVM_CAP_S390_USER_SIGP:
376 case KVM_CAP_S390_USER_STSI:
377 case KVM_CAP_S390_SKEYS:
378 case KVM_CAP_S390_IRQ_STATE:
379 case KVM_CAP_S390_USER_INSTR0:
382 case KVM_CAP_S390_MEM_OP:
385 case KVM_CAP_NR_VCPUS:
386 case KVM_CAP_MAX_VCPUS:
387 r = KVM_S390_BSCA_CPU_SLOTS;
388 if (!kvm_s390_use_sca_entries())
390 else if (sclp.has_esca && sclp.has_64bscao)
391 r = KVM_S390_ESCA_CPU_SLOTS;
393 case KVM_CAP_NR_MEMSLOTS:
394 r = KVM_USER_MEM_SLOTS;
396 case KVM_CAP_S390_COW:
397 r = MACHINE_HAS_ESOP;
399 case KVM_CAP_S390_VECTOR_REGISTERS:
402 case KVM_CAP_S390_RI:
403 r = test_facility(64);
411 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
412 struct kvm_memory_slot *memslot)
414 gfn_t cur_gfn, last_gfn;
415 unsigned long address;
416 struct gmap *gmap = kvm->arch.gmap;
418 /* Loop over all guest pages */
419 last_gfn = memslot->base_gfn + memslot->npages;
420 for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
421 address = gfn_to_hva_memslot(memslot, cur_gfn);
423 if (test_and_clear_guest_dirty(gmap->mm, address))
424 mark_page_dirty(kvm, cur_gfn);
425 if (fatal_signal_pending(current))
431 /* Section: vm related */
432 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
435 * Get (and clear) the dirty memory log for a memory slot.
437 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
438 struct kvm_dirty_log *log)
442 struct kvm_memslots *slots;
443 struct kvm_memory_slot *memslot;
446 if (kvm_is_ucontrol(kvm))
449 mutex_lock(&kvm->slots_lock);
452 if (log->slot >= KVM_USER_MEM_SLOTS)
455 slots = kvm_memslots(kvm);
456 memslot = id_to_memslot(slots, log->slot);
458 if (!memslot->dirty_bitmap)
461 kvm_s390_sync_dirty_log(kvm, memslot);
462 r = kvm_get_dirty_log(kvm, log, &is_dirty);
466 /* Clear the dirty log */
468 n = kvm_dirty_bitmap_bytes(memslot);
469 memset(memslot->dirty_bitmap, 0, n);
473 mutex_unlock(&kvm->slots_lock);
477 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
480 struct kvm_vcpu *vcpu;
482 kvm_for_each_vcpu(i, vcpu, kvm) {
483 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
487 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
495 case KVM_CAP_S390_IRQCHIP:
496 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
497 kvm->arch.use_irqchip = 1;
500 case KVM_CAP_S390_USER_SIGP:
501 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
502 kvm->arch.user_sigp = 1;
505 case KVM_CAP_S390_VECTOR_REGISTERS:
506 mutex_lock(&kvm->lock);
507 if (kvm->created_vcpus) {
509 } else if (MACHINE_HAS_VX) {
510 set_kvm_facility(kvm->arch.model.fac_mask, 129);
511 set_kvm_facility(kvm->arch.model.fac_list, 129);
512 if (test_facility(134)) {
513 set_kvm_facility(kvm->arch.model.fac_mask, 134);
514 set_kvm_facility(kvm->arch.model.fac_list, 134);
516 if (test_facility(135)) {
517 set_kvm_facility(kvm->arch.model.fac_mask, 135);
518 set_kvm_facility(kvm->arch.model.fac_list, 135);
523 mutex_unlock(&kvm->lock);
524 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
525 r ? "(not available)" : "(success)");
527 case KVM_CAP_S390_RI:
529 mutex_lock(&kvm->lock);
530 if (kvm->created_vcpus) {
532 } else if (test_facility(64)) {
533 set_kvm_facility(kvm->arch.model.fac_mask, 64);
534 set_kvm_facility(kvm->arch.model.fac_list, 64);
537 mutex_unlock(&kvm->lock);
538 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
539 r ? "(not available)" : "(success)");
541 case KVM_CAP_S390_USER_STSI:
542 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
543 kvm->arch.user_stsi = 1;
546 case KVM_CAP_S390_USER_INSTR0:
547 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
548 kvm->arch.user_instr0 = 1;
549 icpt_operexc_on_all_vcpus(kvm);
559 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
563 switch (attr->attr) {
564 case KVM_S390_VM_MEM_LIMIT_SIZE:
566 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
567 kvm->arch.mem_limit);
568 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
578 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
582 switch (attr->attr) {
583 case KVM_S390_VM_MEM_ENABLE_CMMA:
589 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
590 mutex_lock(&kvm->lock);
591 if (!kvm->created_vcpus) {
592 kvm->arch.use_cmma = 1;
595 mutex_unlock(&kvm->lock);
597 case KVM_S390_VM_MEM_CLR_CMMA:
602 if (!kvm->arch.use_cmma)
605 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
606 mutex_lock(&kvm->lock);
607 idx = srcu_read_lock(&kvm->srcu);
608 s390_reset_cmma(kvm->arch.gmap->mm);
609 srcu_read_unlock(&kvm->srcu, idx);
610 mutex_unlock(&kvm->lock);
613 case KVM_S390_VM_MEM_LIMIT_SIZE: {
614 unsigned long new_limit;
616 if (kvm_is_ucontrol(kvm))
619 if (get_user(new_limit, (u64 __user *)attr->addr))
622 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
623 new_limit > kvm->arch.mem_limit)
629 /* gmap_create takes last usable address */
630 if (new_limit != KVM_S390_NO_MEM_LIMIT)
634 mutex_lock(&kvm->lock);
635 if (!kvm->created_vcpus) {
636 /* gmap_create will round the limit up */
637 struct gmap *new = gmap_create(current->mm, new_limit);
642 gmap_remove(kvm->arch.gmap);
644 kvm->arch.gmap = new;
648 mutex_unlock(&kvm->lock);
649 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
650 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
651 (void *) kvm->arch.gmap->asce);
661 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
663 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
665 struct kvm_vcpu *vcpu;
668 if (!test_kvm_facility(kvm, 76))
671 mutex_lock(&kvm->lock);
672 switch (attr->attr) {
673 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
675 kvm->arch.crypto.crycb->aes_wrapping_key_mask,
676 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
677 kvm->arch.crypto.aes_kw = 1;
678 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
680 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
682 kvm->arch.crypto.crycb->dea_wrapping_key_mask,
683 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
684 kvm->arch.crypto.dea_kw = 1;
685 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
687 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
688 kvm->arch.crypto.aes_kw = 0;
689 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
690 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
691 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
693 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
694 kvm->arch.crypto.dea_kw = 0;
695 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
696 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
697 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
700 mutex_unlock(&kvm->lock);
704 kvm_for_each_vcpu(i, vcpu, kvm) {
705 kvm_s390_vcpu_crypto_setup(vcpu);
708 mutex_unlock(&kvm->lock);
712 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
716 if (copy_from_user(>od_high, (void __user *)attr->addr,
722 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
727 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
731 if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod)))
734 kvm_s390_set_tod_clock(kvm, gtod);
735 VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
739 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
746 switch (attr->attr) {
747 case KVM_S390_VM_TOD_HIGH:
748 ret = kvm_s390_set_tod_high(kvm, attr);
750 case KVM_S390_VM_TOD_LOW:
751 ret = kvm_s390_set_tod_low(kvm, attr);
760 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
764 if (copy_to_user((void __user *)attr->addr, >od_high,
767 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
772 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
776 gtod = kvm_s390_get_tod_clock_fast(kvm);
777 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod)))
779 VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
784 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
791 switch (attr->attr) {
792 case KVM_S390_VM_TOD_HIGH:
793 ret = kvm_s390_get_tod_high(kvm, attr);
795 case KVM_S390_VM_TOD_LOW:
796 ret = kvm_s390_get_tod_low(kvm, attr);
805 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
807 struct kvm_s390_vm_cpu_processor *proc;
808 u16 lowest_ibc, unblocked_ibc;
811 mutex_lock(&kvm->lock);
812 if (kvm->created_vcpus) {
816 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
821 if (!copy_from_user(proc, (void __user *)attr->addr,
823 kvm->arch.model.cpuid = proc->cpuid;
824 lowest_ibc = sclp.ibc >> 16 & 0xfff;
825 unblocked_ibc = sclp.ibc & 0xfff;
826 if (lowest_ibc && proc->ibc) {
827 if (proc->ibc > unblocked_ibc)
828 kvm->arch.model.ibc = unblocked_ibc;
829 else if (proc->ibc < lowest_ibc)
830 kvm->arch.model.ibc = lowest_ibc;
832 kvm->arch.model.ibc = proc->ibc;
834 memcpy(kvm->arch.model.fac_list, proc->fac_list,
835 S390_ARCH_FAC_LIST_SIZE_BYTE);
836 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
838 kvm->arch.model.cpuid);
839 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
840 kvm->arch.model.fac_list[0],
841 kvm->arch.model.fac_list[1],
842 kvm->arch.model.fac_list[2]);
847 mutex_unlock(&kvm->lock);
851 static int kvm_s390_set_processor_feat(struct kvm *kvm,
852 struct kvm_device_attr *attr)
854 struct kvm_s390_vm_cpu_feat data;
857 if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
859 if (!bitmap_subset((unsigned long *) data.feat,
860 kvm_s390_available_cpu_feat,
861 KVM_S390_VM_CPU_FEAT_NR_BITS))
864 mutex_lock(&kvm->lock);
865 if (!atomic_read(&kvm->online_vcpus)) {
866 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
867 KVM_S390_VM_CPU_FEAT_NR_BITS);
870 mutex_unlock(&kvm->lock);
874 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
875 struct kvm_device_attr *attr)
878 * Once supported by kernel + hw, we have to store the subfunctions
879 * in kvm->arch and remember that user space configured them.
884 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
888 switch (attr->attr) {
889 case KVM_S390_VM_CPU_PROCESSOR:
890 ret = kvm_s390_set_processor(kvm, attr);
892 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
893 ret = kvm_s390_set_processor_feat(kvm, attr);
895 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
896 ret = kvm_s390_set_processor_subfunc(kvm, attr);
902 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
904 struct kvm_s390_vm_cpu_processor *proc;
907 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
912 proc->cpuid = kvm->arch.model.cpuid;
913 proc->ibc = kvm->arch.model.ibc;
914 memcpy(&proc->fac_list, kvm->arch.model.fac_list,
915 S390_ARCH_FAC_LIST_SIZE_BYTE);
916 VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
918 kvm->arch.model.cpuid);
919 VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
920 kvm->arch.model.fac_list[0],
921 kvm->arch.model.fac_list[1],
922 kvm->arch.model.fac_list[2]);
923 if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
930 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
932 struct kvm_s390_vm_cpu_machine *mach;
935 mach = kzalloc(sizeof(*mach), GFP_KERNEL);
940 get_cpu_id((struct cpuid *) &mach->cpuid);
941 mach->ibc = sclp.ibc;
942 memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
943 S390_ARCH_FAC_LIST_SIZE_BYTE);
944 memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
945 sizeof(S390_lowcore.stfle_fac_list));
946 VM_EVENT(kvm, 3, "GET: host ibc: 0x%4.4x, host cpuid: 0x%16.16llx",
948 kvm->arch.model.cpuid);
949 VM_EVENT(kvm, 3, "GET: host facmask: 0x%16.16llx.%16.16llx.%16.16llx",
953 VM_EVENT(kvm, 3, "GET: host faclist: 0x%16.16llx.%16.16llx.%16.16llx",
957 if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
964 static int kvm_s390_get_processor_feat(struct kvm *kvm,
965 struct kvm_device_attr *attr)
967 struct kvm_s390_vm_cpu_feat data;
969 bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
970 KVM_S390_VM_CPU_FEAT_NR_BITS);
971 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
976 static int kvm_s390_get_machine_feat(struct kvm *kvm,
977 struct kvm_device_attr *attr)
979 struct kvm_s390_vm_cpu_feat data;
981 bitmap_copy((unsigned long *) data.feat,
982 kvm_s390_available_cpu_feat,
983 KVM_S390_VM_CPU_FEAT_NR_BITS);
984 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
989 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
990 struct kvm_device_attr *attr)
993 * Once we can actually configure subfunctions (kernel + hw support),
994 * we have to check if they were already set by user space, if so copy
995 * them from kvm->arch.
1000 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1001 struct kvm_device_attr *attr)
1003 if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1004 sizeof(struct kvm_s390_vm_cpu_subfunc)))
1008 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1012 switch (attr->attr) {
1013 case KVM_S390_VM_CPU_PROCESSOR:
1014 ret = kvm_s390_get_processor(kvm, attr);
1016 case KVM_S390_VM_CPU_MACHINE:
1017 ret = kvm_s390_get_machine(kvm, attr);
1019 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1020 ret = kvm_s390_get_processor_feat(kvm, attr);
1022 case KVM_S390_VM_CPU_MACHINE_FEAT:
1023 ret = kvm_s390_get_machine_feat(kvm, attr);
1025 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1026 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1028 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1029 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1035 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1039 switch (attr->group) {
1040 case KVM_S390_VM_MEM_CTRL:
1041 ret = kvm_s390_set_mem_control(kvm, attr);
1043 case KVM_S390_VM_TOD:
1044 ret = kvm_s390_set_tod(kvm, attr);
1046 case KVM_S390_VM_CPU_MODEL:
1047 ret = kvm_s390_set_cpu_model(kvm, attr);
1049 case KVM_S390_VM_CRYPTO:
1050 ret = kvm_s390_vm_set_crypto(kvm, attr);
1060 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1064 switch (attr->group) {
1065 case KVM_S390_VM_MEM_CTRL:
1066 ret = kvm_s390_get_mem_control(kvm, attr);
1068 case KVM_S390_VM_TOD:
1069 ret = kvm_s390_get_tod(kvm, attr);
1071 case KVM_S390_VM_CPU_MODEL:
1072 ret = kvm_s390_get_cpu_model(kvm, attr);
1082 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1086 switch (attr->group) {
1087 case KVM_S390_VM_MEM_CTRL:
1088 switch (attr->attr) {
1089 case KVM_S390_VM_MEM_ENABLE_CMMA:
1090 case KVM_S390_VM_MEM_CLR_CMMA:
1091 ret = sclp.has_cmma ? 0 : -ENXIO;
1093 case KVM_S390_VM_MEM_LIMIT_SIZE:
1101 case KVM_S390_VM_TOD:
1102 switch (attr->attr) {
1103 case KVM_S390_VM_TOD_LOW:
1104 case KVM_S390_VM_TOD_HIGH:
1112 case KVM_S390_VM_CPU_MODEL:
1113 switch (attr->attr) {
1114 case KVM_S390_VM_CPU_PROCESSOR:
1115 case KVM_S390_VM_CPU_MACHINE:
1116 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1117 case KVM_S390_VM_CPU_MACHINE_FEAT:
1118 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1121 /* configuring subfunctions is not supported yet */
1122 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1128 case KVM_S390_VM_CRYPTO:
1129 switch (attr->attr) {
1130 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1131 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1132 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1133 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1149 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1155 if (args->flags != 0)
1158 /* Is this guest using storage keys? */
1159 if (!mm_use_skey(current->mm))
1160 return KVM_S390_GET_SKEYS_NONE;
1162 /* Enforce sane limit on memory allocation */
1163 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1166 keys = kmalloc_array(args->count, sizeof(uint8_t),
1167 GFP_KERNEL | __GFP_NOWARN);
1169 keys = vmalloc(sizeof(uint8_t) * args->count);
1173 down_read(¤t->mm->mmap_sem);
1174 for (i = 0; i < args->count; i++) {
1175 hva = gfn_to_hva(kvm, args->start_gfn + i);
1176 if (kvm_is_error_hva(hva)) {
1181 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1185 up_read(¤t->mm->mmap_sem);
1188 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1189 sizeof(uint8_t) * args->count);
1198 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1204 if (args->flags != 0)
1207 /* Enforce sane limit on memory allocation */
1208 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1211 keys = kmalloc_array(args->count, sizeof(uint8_t),
1212 GFP_KERNEL | __GFP_NOWARN);
1214 keys = vmalloc(sizeof(uint8_t) * args->count);
1218 r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1219 sizeof(uint8_t) * args->count);
1225 /* Enable storage key handling for the guest */
1226 r = s390_enable_skey();
1230 down_read(¤t->mm->mmap_sem);
1231 for (i = 0; i < args->count; i++) {
1232 hva = gfn_to_hva(kvm, args->start_gfn + i);
1233 if (kvm_is_error_hva(hva)) {
1238 /* Lowest order bit is reserved */
1239 if (keys[i] & 0x01) {
1244 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1248 up_read(¤t->mm->mmap_sem);
1254 long kvm_arch_vm_ioctl(struct file *filp,
1255 unsigned int ioctl, unsigned long arg)
1257 struct kvm *kvm = filp->private_data;
1258 void __user *argp = (void __user *)arg;
1259 struct kvm_device_attr attr;
1263 case KVM_S390_INTERRUPT: {
1264 struct kvm_s390_interrupt s390int;
1267 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1269 r = kvm_s390_inject_vm(kvm, &s390int);
1272 case KVM_ENABLE_CAP: {
1273 struct kvm_enable_cap cap;
1275 if (copy_from_user(&cap, argp, sizeof(cap)))
1277 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1280 case KVM_CREATE_IRQCHIP: {
1281 struct kvm_irq_routing_entry routing;
1284 if (kvm->arch.use_irqchip) {
1285 /* Set up dummy routing. */
1286 memset(&routing, 0, sizeof(routing));
1287 r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1291 case KVM_SET_DEVICE_ATTR: {
1293 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1295 r = kvm_s390_vm_set_attr(kvm, &attr);
1298 case KVM_GET_DEVICE_ATTR: {
1300 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1302 r = kvm_s390_vm_get_attr(kvm, &attr);
1305 case KVM_HAS_DEVICE_ATTR: {
1307 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1309 r = kvm_s390_vm_has_attr(kvm, &attr);
1312 case KVM_S390_GET_SKEYS: {
1313 struct kvm_s390_skeys args;
1316 if (copy_from_user(&args, argp,
1317 sizeof(struct kvm_s390_skeys)))
1319 r = kvm_s390_get_skeys(kvm, &args);
1322 case KVM_S390_SET_SKEYS: {
1323 struct kvm_s390_skeys args;
1326 if (copy_from_user(&args, argp,
1327 sizeof(struct kvm_s390_skeys)))
1329 r = kvm_s390_set_skeys(kvm, &args);
1339 static int kvm_s390_query_ap_config(u8 *config)
1341 u32 fcn_code = 0x04000000UL;
1344 memset(config, 0, 128);
1348 ".long 0xb2af0000\n" /* PQAP(QCI) */
1354 : "r" (fcn_code), "r" (config)
1355 : "cc", "0", "2", "memory"
1361 static int kvm_s390_apxa_installed(void)
1366 if (test_facility(12)) {
1367 cc = kvm_s390_query_ap_config(config);
1370 pr_err("PQAP(QCI) failed with cc=%d", cc);
1372 return config[0] & 0x40;
1378 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1380 kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1382 if (kvm_s390_apxa_installed())
1383 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1385 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1388 static u64 kvm_s390_get_initial_cpuid(void)
1393 cpuid.version = 0xff;
1394 return *((u64 *) &cpuid);
1397 static void kvm_s390_crypto_init(struct kvm *kvm)
1399 if (!test_kvm_facility(kvm, 76))
1402 kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1403 kvm_s390_set_crycb_format(kvm);
1405 /* Enable AES/DEA protected key functions by default */
1406 kvm->arch.crypto.aes_kw = 1;
1407 kvm->arch.crypto.dea_kw = 1;
1408 get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1409 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1410 get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1411 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1414 static void sca_dispose(struct kvm *kvm)
1416 if (kvm->arch.use_esca)
1417 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1419 free_page((unsigned long)(kvm->arch.sca));
1420 kvm->arch.sca = NULL;
1423 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1425 gfp_t alloc_flags = GFP_KERNEL;
1427 char debug_name[16];
1428 static unsigned long sca_offset;
1431 #ifdef CONFIG_KVM_S390_UCONTROL
1432 if (type & ~KVM_VM_S390_UCONTROL)
1434 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1441 rc = s390_enable_sie();
1447 ratelimit_state_init(&kvm->arch.sthyi_limit, 5 * HZ, 500);
1449 kvm->arch.use_esca = 0; /* start with basic SCA */
1450 if (!sclp.has_64bscao)
1451 alloc_flags |= GFP_DMA;
1452 rwlock_init(&kvm->arch.sca_lock);
1453 kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1456 spin_lock(&kvm_lock);
1458 if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1460 kvm->arch.sca = (struct bsca_block *)
1461 ((char *) kvm->arch.sca + sca_offset);
1462 spin_unlock(&kvm_lock);
1464 sprintf(debug_name, "kvm-%u", current->pid);
1466 kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1470 kvm->arch.sie_page2 =
1471 (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1472 if (!kvm->arch.sie_page2)
1475 /* Populate the facility mask initially. */
1476 memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1477 sizeof(S390_lowcore.stfle_fac_list));
1478 for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1479 if (i < kvm_s390_fac_list_mask_size())
1480 kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1482 kvm->arch.model.fac_mask[i] = 0UL;
1485 /* Populate the facility list initially. */
1486 kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1487 memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1488 S390_ARCH_FAC_LIST_SIZE_BYTE);
1490 set_kvm_facility(kvm->arch.model.fac_mask, 74);
1491 set_kvm_facility(kvm->arch.model.fac_list, 74);
1493 kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
1494 kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1496 kvm_s390_crypto_init(kvm);
1498 spin_lock_init(&kvm->arch.float_int.lock);
1499 for (i = 0; i < FIRQ_LIST_COUNT; i++)
1500 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1501 init_waitqueue_head(&kvm->arch.ipte_wq);
1502 mutex_init(&kvm->arch.ipte_mutex);
1504 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1505 VM_EVENT(kvm, 3, "vm created with type %lu", type);
1507 if (type & KVM_VM_S390_UCONTROL) {
1508 kvm->arch.gmap = NULL;
1509 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1511 if (sclp.hamax == U64_MAX)
1512 kvm->arch.mem_limit = TASK_MAX_SIZE;
1514 kvm->arch.mem_limit = min_t(unsigned long, TASK_MAX_SIZE,
1516 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
1517 if (!kvm->arch.gmap)
1519 kvm->arch.gmap->private = kvm;
1520 kvm->arch.gmap->pfault_enabled = 0;
1523 kvm->arch.css_support = 0;
1524 kvm->arch.use_irqchip = 0;
1525 kvm->arch.epoch = 0;
1527 spin_lock_init(&kvm->arch.start_stop_lock);
1528 kvm_s390_vsie_init(kvm);
1529 KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1533 free_page((unsigned long)kvm->arch.sie_page2);
1534 debug_unregister(kvm->arch.dbf);
1536 KVM_EVENT(3, "creation of vm failed: %d", rc);
1540 bool kvm_arch_has_vcpu_debugfs(void)
1545 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
1550 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1552 VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1553 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1554 kvm_s390_clear_local_irqs(vcpu);
1555 kvm_clear_async_pf_completion_queue(vcpu);
1556 if (!kvm_is_ucontrol(vcpu->kvm))
1559 if (kvm_is_ucontrol(vcpu->kvm))
1560 gmap_remove(vcpu->arch.gmap);
1562 if (vcpu->kvm->arch.use_cmma)
1563 kvm_s390_vcpu_unsetup_cmma(vcpu);
1564 free_page((unsigned long)(vcpu->arch.sie_block));
1566 kvm_vcpu_uninit(vcpu);
1567 kmem_cache_free(kvm_vcpu_cache, vcpu);
1570 static void kvm_free_vcpus(struct kvm *kvm)
1573 struct kvm_vcpu *vcpu;
1575 kvm_for_each_vcpu(i, vcpu, kvm)
1576 kvm_arch_vcpu_destroy(vcpu);
1578 mutex_lock(&kvm->lock);
1579 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1580 kvm->vcpus[i] = NULL;
1582 atomic_set(&kvm->online_vcpus, 0);
1583 mutex_unlock(&kvm->lock);
1586 void kvm_arch_destroy_vm(struct kvm *kvm)
1588 kvm_free_vcpus(kvm);
1590 debug_unregister(kvm->arch.dbf);
1591 free_page((unsigned long)kvm->arch.sie_page2);
1592 if (!kvm_is_ucontrol(kvm))
1593 gmap_remove(kvm->arch.gmap);
1594 kvm_s390_destroy_adapters(kvm);
1595 kvm_s390_clear_float_irqs(kvm);
1596 kvm_s390_vsie_destroy(kvm);
1597 KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
1600 /* Section: vcpu related */
1601 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1603 vcpu->arch.gmap = gmap_create(current->mm, -1UL);
1604 if (!vcpu->arch.gmap)
1606 vcpu->arch.gmap->private = vcpu->kvm;
1611 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
1613 if (!kvm_s390_use_sca_entries())
1615 read_lock(&vcpu->kvm->arch.sca_lock);
1616 if (vcpu->kvm->arch.use_esca) {
1617 struct esca_block *sca = vcpu->kvm->arch.sca;
1619 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1620 sca->cpu[vcpu->vcpu_id].sda = 0;
1622 struct bsca_block *sca = vcpu->kvm->arch.sca;
1624 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1625 sca->cpu[vcpu->vcpu_id].sda = 0;
1627 read_unlock(&vcpu->kvm->arch.sca_lock);
1630 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
1632 if (!kvm_s390_use_sca_entries()) {
1633 struct bsca_block *sca = vcpu->kvm->arch.sca;
1635 /* we still need the basic sca for the ipte control */
1636 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1637 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
1639 read_lock(&vcpu->kvm->arch.sca_lock);
1640 if (vcpu->kvm->arch.use_esca) {
1641 struct esca_block *sca = vcpu->kvm->arch.sca;
1643 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1644 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1645 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
1646 vcpu->arch.sie_block->ecb2 |= 0x04U;
1647 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1649 struct bsca_block *sca = vcpu->kvm->arch.sca;
1651 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1652 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1653 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
1654 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1656 read_unlock(&vcpu->kvm->arch.sca_lock);
1659 /* Basic SCA to Extended SCA data copy routines */
1660 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
1663 d->sigp_ctrl.c = s->sigp_ctrl.c;
1664 d->sigp_ctrl.scn = s->sigp_ctrl.scn;
1667 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
1671 d->ipte_control = s->ipte_control;
1673 for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
1674 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
1677 static int sca_switch_to_extended(struct kvm *kvm)
1679 struct bsca_block *old_sca = kvm->arch.sca;
1680 struct esca_block *new_sca;
1681 struct kvm_vcpu *vcpu;
1682 unsigned int vcpu_idx;
1685 new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
1689 scaoh = (u32)((u64)(new_sca) >> 32);
1690 scaol = (u32)(u64)(new_sca) & ~0x3fU;
1692 kvm_s390_vcpu_block_all(kvm);
1693 write_lock(&kvm->arch.sca_lock);
1695 sca_copy_b_to_e(new_sca, old_sca);
1697 kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
1698 vcpu->arch.sie_block->scaoh = scaoh;
1699 vcpu->arch.sie_block->scaol = scaol;
1700 vcpu->arch.sie_block->ecb2 |= 0x04U;
1702 kvm->arch.sca = new_sca;
1703 kvm->arch.use_esca = 1;
1705 write_unlock(&kvm->arch.sca_lock);
1706 kvm_s390_vcpu_unblock_all(kvm);
1708 free_page((unsigned long)old_sca);
1710 VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
1711 old_sca, kvm->arch.sca);
1715 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
1719 if (!kvm_s390_use_sca_entries()) {
1720 if (id < KVM_MAX_VCPUS)
1724 if (id < KVM_S390_BSCA_CPU_SLOTS)
1726 if (!sclp.has_esca || !sclp.has_64bscao)
1729 mutex_lock(&kvm->lock);
1730 rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
1731 mutex_unlock(&kvm->lock);
1733 return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
1736 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1738 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1739 kvm_clear_async_pf_completion_queue(vcpu);
1740 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
1746 kvm_s390_set_prefix(vcpu, 0);
1747 if (test_kvm_facility(vcpu->kvm, 64))
1748 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
1749 /* fprs can be synchronized via vrs, even if the guest has no vx. With
1750 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
1753 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
1755 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
1757 if (kvm_is_ucontrol(vcpu->kvm))
1758 return __kvm_ucontrol_vcpu_init(vcpu);
1763 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1764 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1766 WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
1767 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1768 vcpu->arch.cputm_start = get_tod_clock_fast();
1769 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1772 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1773 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1775 WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
1776 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1777 vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1778 vcpu->arch.cputm_start = 0;
1779 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1782 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1783 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1785 WARN_ON_ONCE(vcpu->arch.cputm_enabled);
1786 vcpu->arch.cputm_enabled = true;
1787 __start_cpu_timer_accounting(vcpu);
1790 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1791 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1793 WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
1794 __stop_cpu_timer_accounting(vcpu);
1795 vcpu->arch.cputm_enabled = false;
1798 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1800 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1801 __enable_cpu_timer_accounting(vcpu);
1805 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1807 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1808 __disable_cpu_timer_accounting(vcpu);
1812 /* set the cpu timer - may only be called from the VCPU thread itself */
1813 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
1815 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1816 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1817 if (vcpu->arch.cputm_enabled)
1818 vcpu->arch.cputm_start = get_tod_clock_fast();
1819 vcpu->arch.sie_block->cputm = cputm;
1820 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1824 /* update and get the cpu timer - can also be called from other VCPU threads */
1825 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
1830 if (unlikely(!vcpu->arch.cputm_enabled))
1831 return vcpu->arch.sie_block->cputm;
1833 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1835 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
1837 * If the writer would ever execute a read in the critical
1838 * section, e.g. in irq context, we have a deadlock.
1840 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
1841 value = vcpu->arch.sie_block->cputm;
1842 /* if cputm_start is 0, accounting is being started/stopped */
1843 if (likely(vcpu->arch.cputm_start))
1844 value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1845 } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
1850 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1853 gmap_enable(vcpu->arch.enabled_gmap);
1854 atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1855 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1856 __start_cpu_timer_accounting(vcpu);
1860 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1863 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1864 __stop_cpu_timer_accounting(vcpu);
1865 atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1866 vcpu->arch.enabled_gmap = gmap_get_enabled();
1867 gmap_disable(vcpu->arch.enabled_gmap);
1871 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
1873 /* this equals initial cpu reset in pop, but we don't switch to ESA */
1874 vcpu->arch.sie_block->gpsw.mask = 0UL;
1875 vcpu->arch.sie_block->gpsw.addr = 0UL;
1876 kvm_s390_set_prefix(vcpu, 0);
1877 kvm_s390_set_cpu_timer(vcpu, 0);
1878 vcpu->arch.sie_block->ckc = 0UL;
1879 vcpu->arch.sie_block->todpr = 0;
1880 memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
1881 vcpu->arch.sie_block->gcr[0] = 0xE0UL;
1882 vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
1883 /* make sure the new fpc will be lazily loaded */
1885 current->thread.fpu.fpc = 0;
1886 vcpu->arch.sie_block->gbea = 1;
1887 vcpu->arch.sie_block->pp = 0;
1888 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1889 kvm_clear_async_pf_completion_queue(vcpu);
1890 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
1891 kvm_s390_vcpu_stop(vcpu);
1892 kvm_s390_clear_local_irqs(vcpu);
1895 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
1897 mutex_lock(&vcpu->kvm->lock);
1899 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
1901 mutex_unlock(&vcpu->kvm->lock);
1902 if (!kvm_is_ucontrol(vcpu->kvm)) {
1903 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
1906 if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
1907 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
1908 /* make vcpu_load load the right gmap on the first trigger */
1909 vcpu->arch.enabled_gmap = vcpu->arch.gmap;
1912 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
1914 if (!test_kvm_facility(vcpu->kvm, 76))
1917 vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
1919 if (vcpu->kvm->arch.crypto.aes_kw)
1920 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
1921 if (vcpu->kvm->arch.crypto.dea_kw)
1922 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
1924 vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
1927 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
1929 free_page(vcpu->arch.sie_block->cbrlo);
1930 vcpu->arch.sie_block->cbrlo = 0;
1933 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
1935 vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
1936 if (!vcpu->arch.sie_block->cbrlo)
1939 vcpu->arch.sie_block->ecb2 |= 0x80;
1940 vcpu->arch.sie_block->ecb2 &= ~0x08;
1944 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
1946 struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
1948 vcpu->arch.sie_block->ibc = model->ibc;
1949 if (test_kvm_facility(vcpu->kvm, 7))
1950 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
1953 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1957 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
1961 if (test_kvm_facility(vcpu->kvm, 78))
1962 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
1963 else if (test_kvm_facility(vcpu->kvm, 8))
1964 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
1966 kvm_s390_vcpu_setup_model(vcpu);
1968 /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
1969 if (MACHINE_HAS_ESOP)
1970 vcpu->arch.sie_block->ecb |= 0x02;
1971 if (test_kvm_facility(vcpu->kvm, 9))
1972 vcpu->arch.sie_block->ecb |= 0x04;
1973 if (test_kvm_facility(vcpu->kvm, 73))
1974 vcpu->arch.sie_block->ecb |= 0x10;
1976 if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
1977 vcpu->arch.sie_block->ecb2 |= 0x08;
1978 if (test_kvm_facility(vcpu->kvm, 130))
1979 vcpu->arch.sie_block->ecb2 |= 0x20;
1980 vcpu->arch.sie_block->eca = 0x1002000U;
1982 vcpu->arch.sie_block->eca |= 0x80000000U;
1984 vcpu->arch.sie_block->eca |= 0x40000000U;
1986 vcpu->arch.sie_block->eca |= 1;
1987 if (sclp.has_sigpif)
1988 vcpu->arch.sie_block->eca |= 0x10000000U;
1989 if (test_kvm_facility(vcpu->kvm, 129)) {
1990 vcpu->arch.sie_block->eca |= 0x00020000;
1991 vcpu->arch.sie_block->ecd |= 0x20000000;
1993 vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
1994 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
1996 if (vcpu->kvm->arch.use_cmma) {
1997 rc = kvm_s390_vcpu_setup_cmma(vcpu);
2001 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2002 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
2004 kvm_s390_vcpu_crypto_setup(vcpu);
2009 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
2012 struct kvm_vcpu *vcpu;
2013 struct sie_page *sie_page;
2016 if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
2021 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2025 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2029 vcpu->arch.sie_block = &sie_page->sie_block;
2030 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2032 /* the real guest size will always be smaller than msl */
2033 vcpu->arch.sie_block->mso = 0;
2034 vcpu->arch.sie_block->msl = sclp.hamax;
2036 vcpu->arch.sie_block->icpua = id;
2037 spin_lock_init(&vcpu->arch.local_int.lock);
2038 vcpu->arch.local_int.float_int = &kvm->arch.float_int;
2039 vcpu->arch.local_int.wq = &vcpu->wq;
2040 vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
2041 seqcount_init(&vcpu->arch.cputm_seqcount);
2043 rc = kvm_vcpu_init(vcpu, kvm, id);
2045 goto out_free_sie_block;
2046 VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2047 vcpu->arch.sie_block);
2048 trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2052 free_page((unsigned long)(vcpu->arch.sie_block));
2054 kmem_cache_free(kvm_vcpu_cache, vcpu);
2059 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2061 return kvm_s390_vcpu_has_irq(vcpu, 0);
2064 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2066 atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2070 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2072 atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2075 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2077 atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2081 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2083 atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2087 * Kick a guest cpu out of SIE and wait until SIE is not running.
2088 * If the CPU is not running (e.g. waiting as idle) the function will
2089 * return immediately. */
2090 void exit_sie(struct kvm_vcpu *vcpu)
2092 atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
2093 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2097 /* Kick a guest cpu out of SIE to process a request synchronously */
2098 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2100 kvm_make_request(req, vcpu);
2101 kvm_s390_vcpu_request(vcpu);
2104 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2107 struct kvm *kvm = gmap->private;
2108 struct kvm_vcpu *vcpu;
2109 unsigned long prefix;
2112 if (gmap_is_shadow(gmap))
2114 if (start >= 1UL << 31)
2115 /* We are only interested in prefix pages */
2117 kvm_for_each_vcpu(i, vcpu, kvm) {
2118 /* match against both prefix pages */
2119 prefix = kvm_s390_get_prefix(vcpu);
2120 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2121 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2123 kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2128 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2130 /* kvm common code refers to this, but never calls it */
2135 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2136 struct kvm_one_reg *reg)
2141 case KVM_REG_S390_TODPR:
2142 r = put_user(vcpu->arch.sie_block->todpr,
2143 (u32 __user *)reg->addr);
2145 case KVM_REG_S390_EPOCHDIFF:
2146 r = put_user(vcpu->arch.sie_block->epoch,
2147 (u64 __user *)reg->addr);
2149 case KVM_REG_S390_CPU_TIMER:
2150 r = put_user(kvm_s390_get_cpu_timer(vcpu),
2151 (u64 __user *)reg->addr);
2153 case KVM_REG_S390_CLOCK_COMP:
2154 r = put_user(vcpu->arch.sie_block->ckc,
2155 (u64 __user *)reg->addr);
2157 case KVM_REG_S390_PFTOKEN:
2158 r = put_user(vcpu->arch.pfault_token,
2159 (u64 __user *)reg->addr);
2161 case KVM_REG_S390_PFCOMPARE:
2162 r = put_user(vcpu->arch.pfault_compare,
2163 (u64 __user *)reg->addr);
2165 case KVM_REG_S390_PFSELECT:
2166 r = put_user(vcpu->arch.pfault_select,
2167 (u64 __user *)reg->addr);
2169 case KVM_REG_S390_PP:
2170 r = put_user(vcpu->arch.sie_block->pp,
2171 (u64 __user *)reg->addr);
2173 case KVM_REG_S390_GBEA:
2174 r = put_user(vcpu->arch.sie_block->gbea,
2175 (u64 __user *)reg->addr);
2184 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2185 struct kvm_one_reg *reg)
2191 case KVM_REG_S390_TODPR:
2192 r = get_user(vcpu->arch.sie_block->todpr,
2193 (u32 __user *)reg->addr);
2195 case KVM_REG_S390_EPOCHDIFF:
2196 r = get_user(vcpu->arch.sie_block->epoch,
2197 (u64 __user *)reg->addr);
2199 case KVM_REG_S390_CPU_TIMER:
2200 r = get_user(val, (u64 __user *)reg->addr);
2202 kvm_s390_set_cpu_timer(vcpu, val);
2204 case KVM_REG_S390_CLOCK_COMP:
2205 r = get_user(vcpu->arch.sie_block->ckc,
2206 (u64 __user *)reg->addr);
2208 case KVM_REG_S390_PFTOKEN:
2209 r = get_user(vcpu->arch.pfault_token,
2210 (u64 __user *)reg->addr);
2211 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2212 kvm_clear_async_pf_completion_queue(vcpu);
2214 case KVM_REG_S390_PFCOMPARE:
2215 r = get_user(vcpu->arch.pfault_compare,
2216 (u64 __user *)reg->addr);
2218 case KVM_REG_S390_PFSELECT:
2219 r = get_user(vcpu->arch.pfault_select,
2220 (u64 __user *)reg->addr);
2222 case KVM_REG_S390_PP:
2223 r = get_user(vcpu->arch.sie_block->pp,
2224 (u64 __user *)reg->addr);
2226 case KVM_REG_S390_GBEA:
2227 r = get_user(vcpu->arch.sie_block->gbea,
2228 (u64 __user *)reg->addr);
2237 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2239 kvm_s390_vcpu_initial_reset(vcpu);
2243 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2245 memcpy(&vcpu->run->s.regs.gprs, ®s->gprs, sizeof(regs->gprs));
2249 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2251 memcpy(®s->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2255 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2256 struct kvm_sregs *sregs)
2258 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2259 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2263 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2264 struct kvm_sregs *sregs)
2266 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2267 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2271 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2273 if (test_fp_ctl(fpu->fpc))
2275 vcpu->run->s.regs.fpc = fpu->fpc;
2277 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
2278 (freg_t *) fpu->fprs);
2280 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
2284 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2286 /* make sure we have the latest values */
2289 convert_vx_to_fp((freg_t *) fpu->fprs,
2290 (__vector128 *) vcpu->run->s.regs.vrs);
2292 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
2293 fpu->fpc = vcpu->run->s.regs.fpc;
2297 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2301 if (!is_vcpu_stopped(vcpu))
2304 vcpu->run->psw_mask = psw.mask;
2305 vcpu->run->psw_addr = psw.addr;
2310 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2311 struct kvm_translation *tr)
2313 return -EINVAL; /* not implemented yet */
2316 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2317 KVM_GUESTDBG_USE_HW_BP | \
2318 KVM_GUESTDBG_ENABLE)
2320 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2321 struct kvm_guest_debug *dbg)
2325 vcpu->guest_debug = 0;
2326 kvm_s390_clear_bp_data(vcpu);
2328 if (dbg->control & ~VALID_GUESTDBG_FLAGS)
2330 if (!sclp.has_gpere)
2333 if (dbg->control & KVM_GUESTDBG_ENABLE) {
2334 vcpu->guest_debug = dbg->control;
2335 /* enforce guest PER */
2336 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2338 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2339 rc = kvm_s390_import_bp_data(vcpu, dbg);
2341 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2342 vcpu->arch.guestdbg.last_bp = 0;
2346 vcpu->guest_debug = 0;
2347 kvm_s390_clear_bp_data(vcpu);
2348 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2354 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2355 struct kvm_mp_state *mp_state)
2357 /* CHECK_STOP and LOAD are not supported yet */
2358 return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2359 KVM_MP_STATE_OPERATING;
2362 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2363 struct kvm_mp_state *mp_state)
2367 /* user space knows about this interface - let it control the state */
2368 vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2370 switch (mp_state->mp_state) {
2371 case KVM_MP_STATE_STOPPED:
2372 kvm_s390_vcpu_stop(vcpu);
2374 case KVM_MP_STATE_OPERATING:
2375 kvm_s390_vcpu_start(vcpu);
2377 case KVM_MP_STATE_LOAD:
2378 case KVM_MP_STATE_CHECK_STOP:
2379 /* fall through - CHECK_STOP and LOAD are not supported yet */
2387 static bool ibs_enabled(struct kvm_vcpu *vcpu)
2389 return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
2392 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2395 kvm_s390_vcpu_request_handled(vcpu);
2396 if (!vcpu->requests)
2399 * We use MMU_RELOAD just to re-arm the ipte notifier for the
2400 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
2401 * This ensures that the ipte instruction for this request has
2402 * already finished. We might race against a second unmapper that
2403 * wants to set the blocking bit. Lets just retry the request loop.
2405 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2407 rc = gmap_mprotect_notify(vcpu->arch.gmap,
2408 kvm_s390_get_prefix(vcpu),
2409 PAGE_SIZE * 2, PROT_WRITE);
2411 kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
2417 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2418 vcpu->arch.sie_block->ihcpu = 0xffff;
2422 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
2423 if (!ibs_enabled(vcpu)) {
2424 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
2425 atomic_or(CPUSTAT_IBS,
2426 &vcpu->arch.sie_block->cpuflags);
2431 if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2432 if (ibs_enabled(vcpu)) {
2433 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2434 atomic_andnot(CPUSTAT_IBS,
2435 &vcpu->arch.sie_block->cpuflags);
2440 if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
2441 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2445 /* nothing to do, just clear the request */
2446 clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
2451 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2453 struct kvm_vcpu *vcpu;
2456 mutex_lock(&kvm->lock);
2458 kvm->arch.epoch = tod - get_tod_clock();
2459 kvm_s390_vcpu_block_all(kvm);
2460 kvm_for_each_vcpu(i, vcpu, kvm)
2461 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2462 kvm_s390_vcpu_unblock_all(kvm);
2464 mutex_unlock(&kvm->lock);
2468 * kvm_arch_fault_in_page - fault-in guest page if necessary
2469 * @vcpu: The corresponding virtual cpu
2470 * @gpa: Guest physical address
2471 * @writable: Whether the page should be writable or not
2473 * Make sure that a guest page has been faulted-in on the host.
2475 * Return: Zero on success, negative error code otherwise.
2477 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
2479 return gmap_fault(vcpu->arch.gmap, gpa,
2480 writable ? FAULT_FLAG_WRITE : 0);
2483 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
2484 unsigned long token)
2486 struct kvm_s390_interrupt inti;
2487 struct kvm_s390_irq irq;
2490 irq.u.ext.ext_params2 = token;
2491 irq.type = KVM_S390_INT_PFAULT_INIT;
2492 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
2494 inti.type = KVM_S390_INT_PFAULT_DONE;
2495 inti.parm64 = token;
2496 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
2500 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
2501 struct kvm_async_pf *work)
2503 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
2504 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
2507 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
2508 struct kvm_async_pf *work)
2510 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
2511 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
2514 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
2515 struct kvm_async_pf *work)
2517 /* s390 will always inject the page directly */
2520 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
2523 * s390 will always inject the page directly,
2524 * but we still want check_async_completion to cleanup
2529 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
2532 struct kvm_arch_async_pf arch;
2535 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2537 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
2538 vcpu->arch.pfault_compare)
2540 if (psw_extint_disabled(vcpu))
2542 if (kvm_s390_vcpu_has_irq(vcpu, 0))
2544 if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
2546 if (!vcpu->arch.gmap->pfault_enabled)
2549 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
2550 hva += current->thread.gmap_addr & ~PAGE_MASK;
2551 if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
2554 rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
2558 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
2563 * On s390 notifications for arriving pages will be delivered directly
2564 * to the guest but the house keeping for completed pfaults is
2565 * handled outside the worker.
2567 kvm_check_async_pf_completion(vcpu);
2569 vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
2570 vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
2575 if (test_cpu_flag(CIF_MCCK_PENDING))
2578 if (!kvm_is_ucontrol(vcpu->kvm)) {
2579 rc = kvm_s390_deliver_pending_interrupts(vcpu);
2584 rc = kvm_s390_handle_requests(vcpu);
2588 if (guestdbg_enabled(vcpu)) {
2589 kvm_s390_backup_guest_per_regs(vcpu);
2590 kvm_s390_patch_guest_per_regs(vcpu);
2593 vcpu->arch.sie_block->icptcode = 0;
2594 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
2595 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
2596 trace_kvm_s390_sie_enter(vcpu, cpuflags);
2601 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
2603 struct kvm_s390_pgm_info pgm_info = {
2604 .code = PGM_ADDRESSING,
2609 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
2610 trace_kvm_s390_sie_fault(vcpu);
2613 * We want to inject an addressing exception, which is defined as a
2614 * suppressing or terminating exception. However, since we came here
2615 * by a DAT access exception, the PSW still points to the faulting
2616 * instruction since DAT exceptions are nullifying. So we've got
2617 * to look up the current opcode to get the length of the instruction
2618 * to be able to forward the PSW.
2620 rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
2621 ilen = insn_length(opcode);
2625 /* Instruction-Fetching Exceptions - we can't detect the ilen.
2626 * Forward by arbitrary ilc, injection will take care of
2627 * nullification if necessary.
2629 pgm_info = vcpu->arch.pgm;
2632 pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
2633 kvm_s390_forward_psw(vcpu, ilen);
2634 return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
2637 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
2639 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
2640 vcpu->arch.sie_block->icptcode);
2641 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
2643 if (guestdbg_enabled(vcpu))
2644 kvm_s390_restore_guest_per_regs(vcpu);
2646 vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
2647 vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
2649 if (vcpu->arch.sie_block->icptcode > 0) {
2650 int rc = kvm_handle_sie_intercept(vcpu);
2652 if (rc != -EOPNOTSUPP)
2654 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
2655 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
2656 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
2657 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
2659 } else if (exit_reason != -EFAULT) {
2660 vcpu->stat.exit_null++;
2662 } else if (kvm_is_ucontrol(vcpu->kvm)) {
2663 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
2664 vcpu->run->s390_ucontrol.trans_exc_code =
2665 current->thread.gmap_addr;
2666 vcpu->run->s390_ucontrol.pgm_code = 0x10;
2668 } else if (current->thread.gmap_pfault) {
2669 trace_kvm_s390_major_guest_pfault(vcpu);
2670 current->thread.gmap_pfault = 0;
2671 if (kvm_arch_setup_async_pf(vcpu))
2673 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
2675 return vcpu_post_run_fault_in_sie(vcpu);
2678 static int __vcpu_run(struct kvm_vcpu *vcpu)
2680 int rc, exit_reason;
2683 * We try to hold kvm->srcu during most of vcpu_run (except when run-
2684 * ning the guest), so that memslots (and other stuff) are protected
2686 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2689 rc = vcpu_pre_run(vcpu);
2693 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2695 * As PF_VCPU will be used in fault handler, between
2696 * guest_enter and guest_exit should be no uaccess.
2698 local_irq_disable();
2699 guest_enter_irqoff();
2700 __disable_cpu_timer_accounting(vcpu);
2702 exit_reason = sie64a(vcpu->arch.sie_block,
2703 vcpu->run->s.regs.gprs);
2704 local_irq_disable();
2705 __enable_cpu_timer_accounting(vcpu);
2706 guest_exit_irqoff();
2708 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2710 rc = vcpu_post_run(vcpu, exit_reason);
2711 } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
2713 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2717 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2719 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
2720 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
2721 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
2722 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
2723 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
2724 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
2725 /* some control register changes require a tlb flush */
2726 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2728 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
2729 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
2730 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
2731 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
2732 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
2733 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
2735 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
2736 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
2737 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
2738 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
2739 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2740 kvm_clear_async_pf_completion_queue(vcpu);
2743 * If userspace sets the riccb (e.g. after migration) to a valid state,
2744 * we should enable RI here instead of doing the lazy enablement.
2746 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
2747 test_kvm_facility(vcpu->kvm, 64)) {
2748 struct runtime_instr_cb *riccb =
2749 (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
2752 vcpu->arch.sie_block->ecb3 |= 0x01;
2754 save_access_regs(vcpu->arch.host_acrs);
2755 restore_access_regs(vcpu->run->s.regs.acrs);
2756 /* save host (userspace) fprs/vrs */
2758 vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
2759 vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
2761 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
2763 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
2764 current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
2765 if (test_fp_ctl(current->thread.fpu.fpc))
2766 /* User space provided an invalid FPC, let's clear it */
2767 current->thread.fpu.fpc = 0;
2769 kvm_run->kvm_dirty_regs = 0;
2772 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2774 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
2775 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
2776 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
2777 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
2778 kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
2779 kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
2780 kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
2781 kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
2782 kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
2783 kvm_run->s.regs.pft = vcpu->arch.pfault_token;
2784 kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
2785 kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
2786 save_access_regs(vcpu->run->s.regs.acrs);
2787 restore_access_regs(vcpu->arch.host_acrs);
2788 /* Save guest register state */
2790 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
2791 /* Restore will be done lazily at return */
2792 current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
2793 current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
2797 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2802 if (kvm_run->immediate_exit)
2805 if (guestdbg_exit_pending(vcpu)) {
2806 kvm_s390_prepare_debug_exit(vcpu);
2810 if (vcpu->sigset_active)
2811 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
2813 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
2814 kvm_s390_vcpu_start(vcpu);
2815 } else if (is_vcpu_stopped(vcpu)) {
2816 pr_err_ratelimited("can't run stopped vcpu %d\n",
2821 sync_regs(vcpu, kvm_run);
2822 enable_cpu_timer_accounting(vcpu);
2825 rc = __vcpu_run(vcpu);
2827 if (signal_pending(current) && !rc) {
2828 kvm_run->exit_reason = KVM_EXIT_INTR;
2832 if (guestdbg_exit_pending(vcpu) && !rc) {
2833 kvm_s390_prepare_debug_exit(vcpu);
2837 if (rc == -EREMOTE) {
2838 /* userspace support is needed, kvm_run has been prepared */
2842 disable_cpu_timer_accounting(vcpu);
2843 store_regs(vcpu, kvm_run);
2845 if (vcpu->sigset_active)
2846 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
2848 vcpu->stat.exit_userspace++;
2853 * store status at address
2854 * we use have two special cases:
2855 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
2856 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
2858 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
2860 unsigned char archmode = 1;
2861 freg_t fprs[NUM_FPRS];
2866 px = kvm_s390_get_prefix(vcpu);
2867 if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
2868 if (write_guest_abs(vcpu, 163, &archmode, 1))
2871 } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
2872 if (write_guest_real(vcpu, 163, &archmode, 1))
2876 gpa -= __LC_FPREGS_SAVE_AREA;
2878 /* manually convert vector registers if necessary */
2879 if (MACHINE_HAS_VX) {
2880 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
2881 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2884 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2885 vcpu->run->s.regs.fprs, 128);
2887 rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
2888 vcpu->run->s.regs.gprs, 128);
2889 rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
2890 &vcpu->arch.sie_block->gpsw, 16);
2891 rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
2893 rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
2894 &vcpu->run->s.regs.fpc, 4);
2895 rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
2896 &vcpu->arch.sie_block->todpr, 4);
2897 cputm = kvm_s390_get_cpu_timer(vcpu);
2898 rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
2900 clkcomp = vcpu->arch.sie_block->ckc >> 8;
2901 rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
2903 rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
2904 &vcpu->run->s.regs.acrs, 64);
2905 rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
2906 &vcpu->arch.sie_block->gcr, 128);
2907 return rc ? -EFAULT : 0;
2910 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
2913 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
2914 * switch in the run ioctl. Let's update our copies before we save
2915 * it into the save area
2918 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
2919 save_access_regs(vcpu->run->s.regs.acrs);
2921 return kvm_s390_store_status_unloaded(vcpu, addr);
2924 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2926 kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
2927 kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
2930 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
2933 struct kvm_vcpu *vcpu;
2935 kvm_for_each_vcpu(i, vcpu, kvm) {
2936 __disable_ibs_on_vcpu(vcpu);
2940 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2944 kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
2945 kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
2948 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
2950 int i, online_vcpus, started_vcpus = 0;
2952 if (!is_vcpu_stopped(vcpu))
2955 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
2956 /* Only one cpu at a time may enter/leave the STOPPED state. */
2957 spin_lock(&vcpu->kvm->arch.start_stop_lock);
2958 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2960 for (i = 0; i < online_vcpus; i++) {
2961 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
2965 if (started_vcpus == 0) {
2966 /* we're the only active VCPU -> speed it up */
2967 __enable_ibs_on_vcpu(vcpu);
2968 } else if (started_vcpus == 1) {
2970 * As we are starting a second VCPU, we have to disable
2971 * the IBS facility on all VCPUs to remove potentially
2972 * oustanding ENABLE requests.
2974 __disable_ibs_on_all_vcpus(vcpu->kvm);
2977 atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2979 * Another VCPU might have used IBS while we were offline.
2980 * Let's play safe and flush the VCPU at startup.
2982 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2983 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2987 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
2989 int i, online_vcpus, started_vcpus = 0;
2990 struct kvm_vcpu *started_vcpu = NULL;
2992 if (is_vcpu_stopped(vcpu))
2995 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
2996 /* Only one cpu at a time may enter/leave the STOPPED state. */
2997 spin_lock(&vcpu->kvm->arch.start_stop_lock);
2998 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3000 /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
3001 kvm_s390_clear_stop_irq(vcpu);
3003 atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
3004 __disable_ibs_on_vcpu(vcpu);
3006 for (i = 0; i < online_vcpus; i++) {
3007 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
3009 started_vcpu = vcpu->kvm->vcpus[i];
3013 if (started_vcpus == 1) {
3015 * As we only have one VCPU left, we want to enable the
3016 * IBS facility for that VCPU to speed it up.
3018 __enable_ibs_on_vcpu(started_vcpu);
3021 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3025 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
3026 struct kvm_enable_cap *cap)
3034 case KVM_CAP_S390_CSS_SUPPORT:
3035 if (!vcpu->kvm->arch.css_support) {
3036 vcpu->kvm->arch.css_support = 1;
3037 VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
3038 trace_kvm_s390_enable_css(vcpu->kvm);
3049 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3050 struct kvm_s390_mem_op *mop)
3052 void __user *uaddr = (void __user *)mop->buf;
3053 void *tmpbuf = NULL;
3055 const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3056 | KVM_S390_MEMOP_F_CHECK_ONLY;
3058 if (mop->flags & ~supported_flags)
3061 if (mop->size > MEM_OP_MAX_SIZE)
3064 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3065 tmpbuf = vmalloc(mop->size);
3070 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3073 case KVM_S390_MEMOP_LOGICAL_READ:
3074 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3075 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3076 mop->size, GACC_FETCH);
3079 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3081 if (copy_to_user(uaddr, tmpbuf, mop->size))
3085 case KVM_S390_MEMOP_LOGICAL_WRITE:
3086 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3087 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3088 mop->size, GACC_STORE);
3091 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
3095 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3101 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
3103 if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
3104 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
3110 long kvm_arch_vcpu_ioctl(struct file *filp,
3111 unsigned int ioctl, unsigned long arg)
3113 struct kvm_vcpu *vcpu = filp->private_data;
3114 void __user *argp = (void __user *)arg;
3119 case KVM_S390_IRQ: {
3120 struct kvm_s390_irq s390irq;
3123 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3125 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3128 case KVM_S390_INTERRUPT: {
3129 struct kvm_s390_interrupt s390int;
3130 struct kvm_s390_irq s390irq;
3133 if (copy_from_user(&s390int, argp, sizeof(s390int)))
3135 if (s390int_to_s390irq(&s390int, &s390irq))
3137 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3140 case KVM_S390_STORE_STATUS:
3141 idx = srcu_read_lock(&vcpu->kvm->srcu);
3142 r = kvm_s390_vcpu_store_status(vcpu, arg);
3143 srcu_read_unlock(&vcpu->kvm->srcu, idx);
3145 case KVM_S390_SET_INITIAL_PSW: {
3149 if (copy_from_user(&psw, argp, sizeof(psw)))
3151 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3154 case KVM_S390_INITIAL_RESET:
3155 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3157 case KVM_SET_ONE_REG:
3158 case KVM_GET_ONE_REG: {
3159 struct kvm_one_reg reg;
3161 if (copy_from_user(®, argp, sizeof(reg)))
3163 if (ioctl == KVM_SET_ONE_REG)
3164 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, ®);
3166 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, ®);
3169 #ifdef CONFIG_KVM_S390_UCONTROL
3170 case KVM_S390_UCAS_MAP: {
3171 struct kvm_s390_ucas_mapping ucasmap;
3173 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3178 if (!kvm_is_ucontrol(vcpu->kvm)) {
3183 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
3184 ucasmap.vcpu_addr, ucasmap.length);
3187 case KVM_S390_UCAS_UNMAP: {
3188 struct kvm_s390_ucas_mapping ucasmap;
3190 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3195 if (!kvm_is_ucontrol(vcpu->kvm)) {
3200 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
3205 case KVM_S390_VCPU_FAULT: {
3206 r = gmap_fault(vcpu->arch.gmap, arg, 0);
3209 case KVM_ENABLE_CAP:
3211 struct kvm_enable_cap cap;
3213 if (copy_from_user(&cap, argp, sizeof(cap)))
3215 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
3218 case KVM_S390_MEM_OP: {
3219 struct kvm_s390_mem_op mem_op;
3221 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3222 r = kvm_s390_guest_mem_op(vcpu, &mem_op);
3227 case KVM_S390_SET_IRQ_STATE: {
3228 struct kvm_s390_irq_state irq_state;
3231 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3233 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
3234 irq_state.len == 0 ||
3235 irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
3239 r = kvm_s390_set_irq_state(vcpu,
3240 (void __user *) irq_state.buf,
3244 case KVM_S390_GET_IRQ_STATE: {
3245 struct kvm_s390_irq_state irq_state;
3248 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3250 if (irq_state.len == 0) {
3254 r = kvm_s390_get_irq_state(vcpu,
3255 (__u8 __user *) irq_state.buf,
3265 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3267 #ifdef CONFIG_KVM_S390_UCONTROL
3268 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
3269 && (kvm_is_ucontrol(vcpu->kvm))) {
3270 vmf->page = virt_to_page(vcpu->arch.sie_block);
3271 get_page(vmf->page);
3275 return VM_FAULT_SIGBUS;
3278 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
3279 unsigned long npages)
3284 /* Section: memory related */
3285 int kvm_arch_prepare_memory_region(struct kvm *kvm,
3286 struct kvm_memory_slot *memslot,
3287 const struct kvm_userspace_memory_region *mem,
3288 enum kvm_mr_change change)
3290 /* A few sanity checks. We can have memory slots which have to be
3291 located/ended at a segment boundary (1MB). The memory in userland is
3292 ok to be fragmented into various different vmas. It is okay to mmap()
3293 and munmap() stuff in this slot after doing this call at any time */
3295 if (mem->userspace_addr & 0xffffful)
3298 if (mem->memory_size & 0xffffful)
3301 if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
3307 void kvm_arch_commit_memory_region(struct kvm *kvm,
3308 const struct kvm_userspace_memory_region *mem,
3309 const struct kvm_memory_slot *old,
3310 const struct kvm_memory_slot *new,
3311 enum kvm_mr_change change)
3315 /* If the basics of the memslot do not change, we do not want
3316 * to update the gmap. Every update causes several unnecessary
3317 * segment translation exceptions. This is usually handled just
3318 * fine by the normal fault handler + gmap, but it will also
3319 * cause faults on the prefix page of running guest CPUs.
3321 if (old->userspace_addr == mem->userspace_addr &&
3322 old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
3323 old->npages * PAGE_SIZE == mem->memory_size)
3326 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
3327 mem->guest_phys_addr, mem->memory_size);
3329 pr_warn("failed to commit memory region\n");
3333 static inline unsigned long nonhyp_mask(int i)
3335 unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
3337 return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
3340 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
3342 vcpu->valid_wakeup = false;
3345 static int __init kvm_s390_init(void)
3349 if (!sclp.has_sief2) {
3350 pr_info("SIE not available\n");
3354 for (i = 0; i < 16; i++)
3355 kvm_s390_fac_list_mask[i] |=
3356 S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
3358 return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
3361 static void __exit kvm_s390_exit(void)
3366 module_init(kvm_s390_init);
3367 module_exit(kvm_s390_exit);
3370 * Enable autoloading of the kvm module.
3371 * Note that we add the module alias here instead of virt/kvm/kvm_main.c
3372 * since x86 takes a different approach.
3374 #include <linux/miscdevice.h>
3375 MODULE_ALIAS_MISCDEV(KVM_MINOR);
3376 MODULE_ALIAS("devname:kvm");