2 * hosting zSeries kernel virtual machines
4 * Copyright IBM Corp. 2008, 2009
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
10 * Author(s): Carsten Otte <cotte@de.ibm.com>
11 * Christian Borntraeger <borntraeger@de.ibm.com>
12 * Heiko Carstens <heiko.carstens@de.ibm.com>
13 * Christian Ehrhardt <ehrhardt@de.ibm.com>
14 * Jason J. Herne <jjherne@us.ibm.com>
17 #include <linux/compiler.h>
18 #include <linux/err.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/mman.h>
25 #include <linux/module.h>
26 #include <linux/moduleparam.h>
27 #include <linux/random.h>
28 #include <linux/slab.h>
29 #include <linux/timer.h>
30 #include <linux/vmalloc.h>
31 #include <linux/bitmap.h>
32 #include <linux/sched/signal.h>
34 #include <asm/asm-offsets.h>
35 #include <asm/lowcore.h>
37 #include <asm/pgtable.h>
40 #include <asm/switch_to.h>
43 #include <asm/cpacf.h>
44 #include <asm/timex.h>
48 #define KMSG_COMPONENT "kvm-s390"
50 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
52 #define CREATE_TRACE_POINTS
54 #include "trace-s390.h"
56 #define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */
58 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
59 (KVM_MAX_VCPUS + LOCAL_IRQS))
61 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
63 struct kvm_stats_debugfs_item debugfs_entries[] = {
64 { "userspace_handled", VCPU_STAT(exit_userspace) },
65 { "exit_null", VCPU_STAT(exit_null) },
66 { "exit_validity", VCPU_STAT(exit_validity) },
67 { "exit_stop_request", VCPU_STAT(exit_stop_request) },
68 { "exit_external_request", VCPU_STAT(exit_external_request) },
69 { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
70 { "exit_instruction", VCPU_STAT(exit_instruction) },
71 { "exit_pei", VCPU_STAT(exit_pei) },
72 { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
73 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
74 { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
75 { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
76 { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
77 { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
78 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
79 { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
80 { "instruction_lctl", VCPU_STAT(instruction_lctl) },
81 { "instruction_stctl", VCPU_STAT(instruction_stctl) },
82 { "instruction_stctg", VCPU_STAT(instruction_stctg) },
83 { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
84 { "deliver_external_call", VCPU_STAT(deliver_external_call) },
85 { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
86 { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
87 { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
88 { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
89 { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
90 { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
91 { "exit_wait_state", VCPU_STAT(exit_wait_state) },
92 { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
93 { "instruction_stidp", VCPU_STAT(instruction_stidp) },
94 { "instruction_spx", VCPU_STAT(instruction_spx) },
95 { "instruction_stpx", VCPU_STAT(instruction_stpx) },
96 { "instruction_stap", VCPU_STAT(instruction_stap) },
97 { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
98 { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
99 { "instruction_stsch", VCPU_STAT(instruction_stsch) },
100 { "instruction_chsc", VCPU_STAT(instruction_chsc) },
101 { "instruction_essa", VCPU_STAT(instruction_essa) },
102 { "instruction_stsi", VCPU_STAT(instruction_stsi) },
103 { "instruction_stfl", VCPU_STAT(instruction_stfl) },
104 { "instruction_tprot", VCPU_STAT(instruction_tprot) },
105 { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
106 { "instruction_sie", VCPU_STAT(instruction_sie) },
107 { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
108 { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
109 { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
110 { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
111 { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
112 { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
113 { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
114 { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
115 { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
116 { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
117 { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
118 { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
119 { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
120 { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
121 { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
122 { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
123 { "diagnose_10", VCPU_STAT(diagnose_10) },
124 { "diagnose_44", VCPU_STAT(diagnose_44) },
125 { "diagnose_9c", VCPU_STAT(diagnose_9c) },
126 { "diagnose_258", VCPU_STAT(diagnose_258) },
127 { "diagnose_308", VCPU_STAT(diagnose_308) },
128 { "diagnose_500", VCPU_STAT(diagnose_500) },
132 /* allow nested virtualization in KVM (if enabled by user space) */
134 module_param(nested, int, S_IRUGO);
135 MODULE_PARM_DESC(nested, "Nested virtualization support");
137 /* upper facilities limit for kvm */
138 unsigned long kvm_s390_fac_list_mask[16] = { FACILITIES_KVM };
140 unsigned long kvm_s390_fac_list_mask_size(void)
142 BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
143 return ARRAY_SIZE(kvm_s390_fac_list_mask);
146 /* available cpu features supported by kvm */
147 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
148 /* available subfunctions indicated via query / "test bit" */
149 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
151 static struct gmap_notifier gmap_notifier;
152 static struct gmap_notifier vsie_gmap_notifier;
153 debug_info_t *kvm_s390_dbf;
155 /* Section: not file related */
156 int kvm_arch_hardware_enable(void)
158 /* every s390 is virtualization enabled ;-) */
162 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
166 * This callback is executed during stop_machine(). All CPUs are therefore
167 * temporarily stopped. In order not to change guest behavior, we have to
168 * disable preemption whenever we touch the epoch of kvm and the VCPUs,
169 * so a CPU won't be stopped while calculating with the epoch.
171 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
175 struct kvm_vcpu *vcpu;
177 unsigned long long *delta = v;
179 list_for_each_entry(kvm, &vm_list, vm_list) {
180 kvm->arch.epoch -= *delta;
181 kvm_for_each_vcpu(i, vcpu, kvm) {
182 vcpu->arch.sie_block->epoch -= *delta;
183 if (vcpu->arch.cputm_enabled)
184 vcpu->arch.cputm_start += *delta;
185 if (vcpu->arch.vsie_block)
186 vcpu->arch.vsie_block->epoch -= *delta;
192 static struct notifier_block kvm_clock_notifier = {
193 .notifier_call = kvm_clock_sync,
196 int kvm_arch_hardware_setup(void)
198 gmap_notifier.notifier_call = kvm_gmap_notifier;
199 gmap_register_pte_notifier(&gmap_notifier);
200 vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
201 gmap_register_pte_notifier(&vsie_gmap_notifier);
202 atomic_notifier_chain_register(&s390_epoch_delta_notifier,
203 &kvm_clock_notifier);
207 void kvm_arch_hardware_unsetup(void)
209 gmap_unregister_pte_notifier(&gmap_notifier);
210 gmap_unregister_pte_notifier(&vsie_gmap_notifier);
211 atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
212 &kvm_clock_notifier);
215 static void allow_cpu_feat(unsigned long nr)
217 set_bit_inv(nr, kvm_s390_available_cpu_feat);
220 static inline int plo_test_bit(unsigned char nr)
222 register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
226 /* Parameter registers are ignored for "test bit" */
236 static void kvm_s390_cpu_feat_init(void)
240 for (i = 0; i < 256; ++i) {
242 kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
245 if (test_facility(28)) /* TOD-clock steering */
246 ptff(kvm_s390_available_subfunc.ptff,
247 sizeof(kvm_s390_available_subfunc.ptff),
250 if (test_facility(17)) { /* MSA */
251 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
252 kvm_s390_available_subfunc.kmac);
253 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
254 kvm_s390_available_subfunc.kmc);
255 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
256 kvm_s390_available_subfunc.km);
257 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
258 kvm_s390_available_subfunc.kimd);
259 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
260 kvm_s390_available_subfunc.klmd);
262 if (test_facility(76)) /* MSA3 */
263 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
264 kvm_s390_available_subfunc.pckmo);
265 if (test_facility(77)) { /* MSA4 */
266 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
267 kvm_s390_available_subfunc.kmctr);
268 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
269 kvm_s390_available_subfunc.kmf);
270 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
271 kvm_s390_available_subfunc.kmo);
272 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
273 kvm_s390_available_subfunc.pcc);
275 if (test_facility(57)) /* MSA5 */
276 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
277 kvm_s390_available_subfunc.ppno);
279 if (test_facility(146)) /* MSA8 */
280 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
281 kvm_s390_available_subfunc.kma);
283 if (MACHINE_HAS_ESOP)
284 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
286 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
287 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
289 if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
290 !test_facility(3) || !nested)
292 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
293 if (sclp.has_64bscao)
294 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
296 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
298 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
300 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
302 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
304 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
306 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
308 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
310 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
311 * all skey handling functions read/set the skey from the PGSTE
312 * instead of the real storage key.
314 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
315 * pages being detected as preserved although they are resident.
317 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
318 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
320 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
321 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
322 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
324 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
325 * cannot easily shadow the SCA because of the ipte lock.
329 int kvm_arch_init(void *opaque)
331 kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
335 if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
336 debug_unregister(kvm_s390_dbf);
340 kvm_s390_cpu_feat_init();
342 /* Register floating interrupt controller interface. */
343 return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
346 void kvm_arch_exit(void)
348 debug_unregister(kvm_s390_dbf);
351 /* Section: device related */
352 long kvm_arch_dev_ioctl(struct file *filp,
353 unsigned int ioctl, unsigned long arg)
355 if (ioctl == KVM_S390_ENABLE_SIE)
356 return s390_enable_sie();
360 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
365 case KVM_CAP_S390_PSW:
366 case KVM_CAP_S390_GMAP:
367 case KVM_CAP_SYNC_MMU:
368 #ifdef CONFIG_KVM_S390_UCONTROL
369 case KVM_CAP_S390_UCONTROL:
371 case KVM_CAP_ASYNC_PF:
372 case KVM_CAP_SYNC_REGS:
373 case KVM_CAP_ONE_REG:
374 case KVM_CAP_ENABLE_CAP:
375 case KVM_CAP_S390_CSS_SUPPORT:
376 case KVM_CAP_IOEVENTFD:
377 case KVM_CAP_DEVICE_CTRL:
378 case KVM_CAP_ENABLE_CAP_VM:
379 case KVM_CAP_S390_IRQCHIP:
380 case KVM_CAP_VM_ATTRIBUTES:
381 case KVM_CAP_MP_STATE:
382 case KVM_CAP_IMMEDIATE_EXIT:
383 case KVM_CAP_S390_INJECT_IRQ:
384 case KVM_CAP_S390_USER_SIGP:
385 case KVM_CAP_S390_USER_STSI:
386 case KVM_CAP_S390_SKEYS:
387 case KVM_CAP_S390_IRQ_STATE:
388 case KVM_CAP_S390_USER_INSTR0:
389 case KVM_CAP_S390_AIS:
392 case KVM_CAP_S390_MEM_OP:
395 case KVM_CAP_NR_VCPUS:
396 case KVM_CAP_MAX_VCPUS:
397 r = KVM_S390_BSCA_CPU_SLOTS;
398 if (!kvm_s390_use_sca_entries())
400 else if (sclp.has_esca && sclp.has_64bscao)
401 r = KVM_S390_ESCA_CPU_SLOTS;
403 case KVM_CAP_NR_MEMSLOTS:
404 r = KVM_USER_MEM_SLOTS;
406 case KVM_CAP_S390_COW:
407 r = MACHINE_HAS_ESOP;
409 case KVM_CAP_S390_VECTOR_REGISTERS:
412 case KVM_CAP_S390_RI:
413 r = test_facility(64);
415 case KVM_CAP_S390_GS:
416 r = test_facility(133);
424 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
425 struct kvm_memory_slot *memslot)
427 gfn_t cur_gfn, last_gfn;
428 unsigned long address;
429 struct gmap *gmap = kvm->arch.gmap;
431 /* Loop over all guest pages */
432 last_gfn = memslot->base_gfn + memslot->npages;
433 for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
434 address = gfn_to_hva_memslot(memslot, cur_gfn);
436 if (test_and_clear_guest_dirty(gmap->mm, address))
437 mark_page_dirty(kvm, cur_gfn);
438 if (fatal_signal_pending(current))
444 /* Section: vm related */
445 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
448 * Get (and clear) the dirty memory log for a memory slot.
450 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
451 struct kvm_dirty_log *log)
455 struct kvm_memslots *slots;
456 struct kvm_memory_slot *memslot;
459 if (kvm_is_ucontrol(kvm))
462 mutex_lock(&kvm->slots_lock);
465 if (log->slot >= KVM_USER_MEM_SLOTS)
468 slots = kvm_memslots(kvm);
469 memslot = id_to_memslot(slots, log->slot);
471 if (!memslot->dirty_bitmap)
474 kvm_s390_sync_dirty_log(kvm, memslot);
475 r = kvm_get_dirty_log(kvm, log, &is_dirty);
479 /* Clear the dirty log */
481 n = kvm_dirty_bitmap_bytes(memslot);
482 memset(memslot->dirty_bitmap, 0, n);
486 mutex_unlock(&kvm->slots_lock);
490 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
493 struct kvm_vcpu *vcpu;
495 kvm_for_each_vcpu(i, vcpu, kvm) {
496 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
500 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
508 case KVM_CAP_S390_IRQCHIP:
509 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
510 kvm->arch.use_irqchip = 1;
513 case KVM_CAP_S390_USER_SIGP:
514 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
515 kvm->arch.user_sigp = 1;
518 case KVM_CAP_S390_VECTOR_REGISTERS:
519 mutex_lock(&kvm->lock);
520 if (kvm->created_vcpus) {
522 } else if (MACHINE_HAS_VX) {
523 set_kvm_facility(kvm->arch.model.fac_mask, 129);
524 set_kvm_facility(kvm->arch.model.fac_list, 129);
525 if (test_facility(134)) {
526 set_kvm_facility(kvm->arch.model.fac_mask, 134);
527 set_kvm_facility(kvm->arch.model.fac_list, 134);
529 if (test_facility(135)) {
530 set_kvm_facility(kvm->arch.model.fac_mask, 135);
531 set_kvm_facility(kvm->arch.model.fac_list, 135);
536 mutex_unlock(&kvm->lock);
537 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
538 r ? "(not available)" : "(success)");
540 case KVM_CAP_S390_RI:
542 mutex_lock(&kvm->lock);
543 if (kvm->created_vcpus) {
545 } else if (test_facility(64)) {
546 set_kvm_facility(kvm->arch.model.fac_mask, 64);
547 set_kvm_facility(kvm->arch.model.fac_list, 64);
550 mutex_unlock(&kvm->lock);
551 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
552 r ? "(not available)" : "(success)");
554 case KVM_CAP_S390_AIS:
555 mutex_lock(&kvm->lock);
556 if (kvm->created_vcpus) {
559 set_kvm_facility(kvm->arch.model.fac_mask, 72);
560 set_kvm_facility(kvm->arch.model.fac_list, 72);
561 kvm->arch.float_int.ais_enabled = 1;
564 mutex_unlock(&kvm->lock);
565 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
566 r ? "(not available)" : "(success)");
568 case KVM_CAP_S390_GS:
570 mutex_lock(&kvm->lock);
571 if (atomic_read(&kvm->online_vcpus)) {
573 } else if (test_facility(133)) {
574 set_kvm_facility(kvm->arch.model.fac_mask, 133);
575 set_kvm_facility(kvm->arch.model.fac_list, 133);
578 mutex_unlock(&kvm->lock);
579 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
580 r ? "(not available)" : "(success)");
582 case KVM_CAP_S390_USER_STSI:
583 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
584 kvm->arch.user_stsi = 1;
587 case KVM_CAP_S390_USER_INSTR0:
588 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
589 kvm->arch.user_instr0 = 1;
590 icpt_operexc_on_all_vcpus(kvm);
600 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
604 switch (attr->attr) {
605 case KVM_S390_VM_MEM_LIMIT_SIZE:
607 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
608 kvm->arch.mem_limit);
609 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
619 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
623 switch (attr->attr) {
624 case KVM_S390_VM_MEM_ENABLE_CMMA:
630 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
631 mutex_lock(&kvm->lock);
632 if (!kvm->created_vcpus) {
633 kvm->arch.use_cmma = 1;
636 mutex_unlock(&kvm->lock);
638 case KVM_S390_VM_MEM_CLR_CMMA:
643 if (!kvm->arch.use_cmma)
646 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
647 mutex_lock(&kvm->lock);
648 idx = srcu_read_lock(&kvm->srcu);
649 s390_reset_cmma(kvm->arch.gmap->mm);
650 srcu_read_unlock(&kvm->srcu, idx);
651 mutex_unlock(&kvm->lock);
654 case KVM_S390_VM_MEM_LIMIT_SIZE: {
655 unsigned long new_limit;
657 if (kvm_is_ucontrol(kvm))
660 if (get_user(new_limit, (u64 __user *)attr->addr))
663 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
664 new_limit > kvm->arch.mem_limit)
670 /* gmap_create takes last usable address */
671 if (new_limit != KVM_S390_NO_MEM_LIMIT)
675 mutex_lock(&kvm->lock);
676 if (!kvm->created_vcpus) {
677 /* gmap_create will round the limit up */
678 struct gmap *new = gmap_create(current->mm, new_limit);
683 gmap_remove(kvm->arch.gmap);
685 kvm->arch.gmap = new;
689 mutex_unlock(&kvm->lock);
690 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
691 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
692 (void *) kvm->arch.gmap->asce);
702 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
704 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
706 struct kvm_vcpu *vcpu;
709 if (!test_kvm_facility(kvm, 76))
712 mutex_lock(&kvm->lock);
713 switch (attr->attr) {
714 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
716 kvm->arch.crypto.crycb->aes_wrapping_key_mask,
717 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
718 kvm->arch.crypto.aes_kw = 1;
719 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
721 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
723 kvm->arch.crypto.crycb->dea_wrapping_key_mask,
724 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
725 kvm->arch.crypto.dea_kw = 1;
726 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
728 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
729 kvm->arch.crypto.aes_kw = 0;
730 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
731 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
732 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
734 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
735 kvm->arch.crypto.dea_kw = 0;
736 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
737 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
738 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
741 mutex_unlock(&kvm->lock);
745 kvm_for_each_vcpu(i, vcpu, kvm) {
746 kvm_s390_vcpu_crypto_setup(vcpu);
749 mutex_unlock(&kvm->lock);
753 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
757 if (copy_from_user(>od_high, (void __user *)attr->addr,
763 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
768 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
772 if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod)))
775 kvm_s390_set_tod_clock(kvm, gtod);
776 VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
780 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
787 switch (attr->attr) {
788 case KVM_S390_VM_TOD_HIGH:
789 ret = kvm_s390_set_tod_high(kvm, attr);
791 case KVM_S390_VM_TOD_LOW:
792 ret = kvm_s390_set_tod_low(kvm, attr);
801 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
805 if (copy_to_user((void __user *)attr->addr, >od_high,
808 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
813 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
817 gtod = kvm_s390_get_tod_clock_fast(kvm);
818 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod)))
820 VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
825 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
832 switch (attr->attr) {
833 case KVM_S390_VM_TOD_HIGH:
834 ret = kvm_s390_get_tod_high(kvm, attr);
836 case KVM_S390_VM_TOD_LOW:
837 ret = kvm_s390_get_tod_low(kvm, attr);
846 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
848 struct kvm_s390_vm_cpu_processor *proc;
849 u16 lowest_ibc, unblocked_ibc;
852 mutex_lock(&kvm->lock);
853 if (kvm->created_vcpus) {
857 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
862 if (!copy_from_user(proc, (void __user *)attr->addr,
864 kvm->arch.model.cpuid = proc->cpuid;
865 lowest_ibc = sclp.ibc >> 16 & 0xfff;
866 unblocked_ibc = sclp.ibc & 0xfff;
867 if (lowest_ibc && proc->ibc) {
868 if (proc->ibc > unblocked_ibc)
869 kvm->arch.model.ibc = unblocked_ibc;
870 else if (proc->ibc < lowest_ibc)
871 kvm->arch.model.ibc = lowest_ibc;
873 kvm->arch.model.ibc = proc->ibc;
875 memcpy(kvm->arch.model.fac_list, proc->fac_list,
876 S390_ARCH_FAC_LIST_SIZE_BYTE);
877 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
879 kvm->arch.model.cpuid);
880 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
881 kvm->arch.model.fac_list[0],
882 kvm->arch.model.fac_list[1],
883 kvm->arch.model.fac_list[2]);
888 mutex_unlock(&kvm->lock);
892 static int kvm_s390_set_processor_feat(struct kvm *kvm,
893 struct kvm_device_attr *attr)
895 struct kvm_s390_vm_cpu_feat data;
898 if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
900 if (!bitmap_subset((unsigned long *) data.feat,
901 kvm_s390_available_cpu_feat,
902 KVM_S390_VM_CPU_FEAT_NR_BITS))
905 mutex_lock(&kvm->lock);
906 if (!atomic_read(&kvm->online_vcpus)) {
907 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
908 KVM_S390_VM_CPU_FEAT_NR_BITS);
911 mutex_unlock(&kvm->lock);
915 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
916 struct kvm_device_attr *attr)
919 * Once supported by kernel + hw, we have to store the subfunctions
920 * in kvm->arch and remember that user space configured them.
925 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
929 switch (attr->attr) {
930 case KVM_S390_VM_CPU_PROCESSOR:
931 ret = kvm_s390_set_processor(kvm, attr);
933 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
934 ret = kvm_s390_set_processor_feat(kvm, attr);
936 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
937 ret = kvm_s390_set_processor_subfunc(kvm, attr);
943 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
945 struct kvm_s390_vm_cpu_processor *proc;
948 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
953 proc->cpuid = kvm->arch.model.cpuid;
954 proc->ibc = kvm->arch.model.ibc;
955 memcpy(&proc->fac_list, kvm->arch.model.fac_list,
956 S390_ARCH_FAC_LIST_SIZE_BYTE);
957 VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
959 kvm->arch.model.cpuid);
960 VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
961 kvm->arch.model.fac_list[0],
962 kvm->arch.model.fac_list[1],
963 kvm->arch.model.fac_list[2]);
964 if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
971 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
973 struct kvm_s390_vm_cpu_machine *mach;
976 mach = kzalloc(sizeof(*mach), GFP_KERNEL);
981 get_cpu_id((struct cpuid *) &mach->cpuid);
982 mach->ibc = sclp.ibc;
983 memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
984 S390_ARCH_FAC_LIST_SIZE_BYTE);
985 memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
986 sizeof(S390_lowcore.stfle_fac_list));
987 VM_EVENT(kvm, 3, "GET: host ibc: 0x%4.4x, host cpuid: 0x%16.16llx",
989 kvm->arch.model.cpuid);
990 VM_EVENT(kvm, 3, "GET: host facmask: 0x%16.16llx.%16.16llx.%16.16llx",
994 VM_EVENT(kvm, 3, "GET: host faclist: 0x%16.16llx.%16.16llx.%16.16llx",
998 if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1005 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1006 struct kvm_device_attr *attr)
1008 struct kvm_s390_vm_cpu_feat data;
1010 bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1011 KVM_S390_VM_CPU_FEAT_NR_BITS);
1012 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1017 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1018 struct kvm_device_attr *attr)
1020 struct kvm_s390_vm_cpu_feat data;
1022 bitmap_copy((unsigned long *) data.feat,
1023 kvm_s390_available_cpu_feat,
1024 KVM_S390_VM_CPU_FEAT_NR_BITS);
1025 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1030 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1031 struct kvm_device_attr *attr)
1034 * Once we can actually configure subfunctions (kernel + hw support),
1035 * we have to check if they were already set by user space, if so copy
1036 * them from kvm->arch.
1041 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1042 struct kvm_device_attr *attr)
1044 if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1045 sizeof(struct kvm_s390_vm_cpu_subfunc)))
1049 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1053 switch (attr->attr) {
1054 case KVM_S390_VM_CPU_PROCESSOR:
1055 ret = kvm_s390_get_processor(kvm, attr);
1057 case KVM_S390_VM_CPU_MACHINE:
1058 ret = kvm_s390_get_machine(kvm, attr);
1060 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1061 ret = kvm_s390_get_processor_feat(kvm, attr);
1063 case KVM_S390_VM_CPU_MACHINE_FEAT:
1064 ret = kvm_s390_get_machine_feat(kvm, attr);
1066 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1067 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1069 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1070 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1076 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1080 switch (attr->group) {
1081 case KVM_S390_VM_MEM_CTRL:
1082 ret = kvm_s390_set_mem_control(kvm, attr);
1084 case KVM_S390_VM_TOD:
1085 ret = kvm_s390_set_tod(kvm, attr);
1087 case KVM_S390_VM_CPU_MODEL:
1088 ret = kvm_s390_set_cpu_model(kvm, attr);
1090 case KVM_S390_VM_CRYPTO:
1091 ret = kvm_s390_vm_set_crypto(kvm, attr);
1101 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1105 switch (attr->group) {
1106 case KVM_S390_VM_MEM_CTRL:
1107 ret = kvm_s390_get_mem_control(kvm, attr);
1109 case KVM_S390_VM_TOD:
1110 ret = kvm_s390_get_tod(kvm, attr);
1112 case KVM_S390_VM_CPU_MODEL:
1113 ret = kvm_s390_get_cpu_model(kvm, attr);
1123 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1127 switch (attr->group) {
1128 case KVM_S390_VM_MEM_CTRL:
1129 switch (attr->attr) {
1130 case KVM_S390_VM_MEM_ENABLE_CMMA:
1131 case KVM_S390_VM_MEM_CLR_CMMA:
1132 ret = sclp.has_cmma ? 0 : -ENXIO;
1134 case KVM_S390_VM_MEM_LIMIT_SIZE:
1142 case KVM_S390_VM_TOD:
1143 switch (attr->attr) {
1144 case KVM_S390_VM_TOD_LOW:
1145 case KVM_S390_VM_TOD_HIGH:
1153 case KVM_S390_VM_CPU_MODEL:
1154 switch (attr->attr) {
1155 case KVM_S390_VM_CPU_PROCESSOR:
1156 case KVM_S390_VM_CPU_MACHINE:
1157 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1158 case KVM_S390_VM_CPU_MACHINE_FEAT:
1159 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1162 /* configuring subfunctions is not supported yet */
1163 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1169 case KVM_S390_VM_CRYPTO:
1170 switch (attr->attr) {
1171 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1172 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1173 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1174 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1190 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1196 if (args->flags != 0)
1199 /* Is this guest using storage keys? */
1200 if (!mm_use_skey(current->mm))
1201 return KVM_S390_GET_SKEYS_NONE;
1203 /* Enforce sane limit on memory allocation */
1204 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1207 keys = kmalloc_array(args->count, sizeof(uint8_t),
1208 GFP_KERNEL | __GFP_NOWARN);
1210 keys = vmalloc(sizeof(uint8_t) * args->count);
1214 down_read(¤t->mm->mmap_sem);
1215 for (i = 0; i < args->count; i++) {
1216 hva = gfn_to_hva(kvm, args->start_gfn + i);
1217 if (kvm_is_error_hva(hva)) {
1222 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1226 up_read(¤t->mm->mmap_sem);
1229 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1230 sizeof(uint8_t) * args->count);
1239 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1245 if (args->flags != 0)
1248 /* Enforce sane limit on memory allocation */
1249 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1252 keys = kmalloc_array(args->count, sizeof(uint8_t),
1253 GFP_KERNEL | __GFP_NOWARN);
1255 keys = vmalloc(sizeof(uint8_t) * args->count);
1259 r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1260 sizeof(uint8_t) * args->count);
1266 /* Enable storage key handling for the guest */
1267 r = s390_enable_skey();
1271 down_read(¤t->mm->mmap_sem);
1272 for (i = 0; i < args->count; i++) {
1273 hva = gfn_to_hva(kvm, args->start_gfn + i);
1274 if (kvm_is_error_hva(hva)) {
1279 /* Lowest order bit is reserved */
1280 if (keys[i] & 0x01) {
1285 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1289 up_read(¤t->mm->mmap_sem);
1295 long kvm_arch_vm_ioctl(struct file *filp,
1296 unsigned int ioctl, unsigned long arg)
1298 struct kvm *kvm = filp->private_data;
1299 void __user *argp = (void __user *)arg;
1300 struct kvm_device_attr attr;
1304 case KVM_S390_INTERRUPT: {
1305 struct kvm_s390_interrupt s390int;
1308 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1310 r = kvm_s390_inject_vm(kvm, &s390int);
1313 case KVM_ENABLE_CAP: {
1314 struct kvm_enable_cap cap;
1316 if (copy_from_user(&cap, argp, sizeof(cap)))
1318 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1321 case KVM_CREATE_IRQCHIP: {
1322 struct kvm_irq_routing_entry routing;
1325 if (kvm->arch.use_irqchip) {
1326 /* Set up dummy routing. */
1327 memset(&routing, 0, sizeof(routing));
1328 r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1332 case KVM_SET_DEVICE_ATTR: {
1334 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1336 r = kvm_s390_vm_set_attr(kvm, &attr);
1339 case KVM_GET_DEVICE_ATTR: {
1341 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1343 r = kvm_s390_vm_get_attr(kvm, &attr);
1346 case KVM_HAS_DEVICE_ATTR: {
1348 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1350 r = kvm_s390_vm_has_attr(kvm, &attr);
1353 case KVM_S390_GET_SKEYS: {
1354 struct kvm_s390_skeys args;
1357 if (copy_from_user(&args, argp,
1358 sizeof(struct kvm_s390_skeys)))
1360 r = kvm_s390_get_skeys(kvm, &args);
1363 case KVM_S390_SET_SKEYS: {
1364 struct kvm_s390_skeys args;
1367 if (copy_from_user(&args, argp,
1368 sizeof(struct kvm_s390_skeys)))
1370 r = kvm_s390_set_skeys(kvm, &args);
1380 static int kvm_s390_query_ap_config(u8 *config)
1382 u32 fcn_code = 0x04000000UL;
1385 memset(config, 0, 128);
1389 ".long 0xb2af0000\n" /* PQAP(QCI) */
1395 : "r" (fcn_code), "r" (config)
1396 : "cc", "0", "2", "memory"
1402 static int kvm_s390_apxa_installed(void)
1407 if (test_facility(12)) {
1408 cc = kvm_s390_query_ap_config(config);
1411 pr_err("PQAP(QCI) failed with cc=%d", cc);
1413 return config[0] & 0x40;
1419 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1421 kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1423 if (kvm_s390_apxa_installed())
1424 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1426 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1429 static u64 kvm_s390_get_initial_cpuid(void)
1434 cpuid.version = 0xff;
1435 return *((u64 *) &cpuid);
1438 static void kvm_s390_crypto_init(struct kvm *kvm)
1440 if (!test_kvm_facility(kvm, 76))
1443 kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1444 kvm_s390_set_crycb_format(kvm);
1446 /* Enable AES/DEA protected key functions by default */
1447 kvm->arch.crypto.aes_kw = 1;
1448 kvm->arch.crypto.dea_kw = 1;
1449 get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1450 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1451 get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1452 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1455 static void sca_dispose(struct kvm *kvm)
1457 if (kvm->arch.use_esca)
1458 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1460 free_page((unsigned long)(kvm->arch.sca));
1461 kvm->arch.sca = NULL;
1464 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1466 gfp_t alloc_flags = GFP_KERNEL;
1468 char debug_name[16];
1469 static unsigned long sca_offset;
1472 #ifdef CONFIG_KVM_S390_UCONTROL
1473 if (type & ~KVM_VM_S390_UCONTROL)
1475 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1482 rc = s390_enable_sie();
1488 ratelimit_state_init(&kvm->arch.sthyi_limit, 5 * HZ, 500);
1490 kvm->arch.use_esca = 0; /* start with basic SCA */
1491 if (!sclp.has_64bscao)
1492 alloc_flags |= GFP_DMA;
1493 rwlock_init(&kvm->arch.sca_lock);
1494 kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1497 spin_lock(&kvm_lock);
1499 if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1501 kvm->arch.sca = (struct bsca_block *)
1502 ((char *) kvm->arch.sca + sca_offset);
1503 spin_unlock(&kvm_lock);
1505 sprintf(debug_name, "kvm-%u", current->pid);
1507 kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1511 kvm->arch.sie_page2 =
1512 (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1513 if (!kvm->arch.sie_page2)
1516 /* Populate the facility mask initially. */
1517 memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1518 sizeof(S390_lowcore.stfle_fac_list));
1519 for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1520 if (i < kvm_s390_fac_list_mask_size())
1521 kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1523 kvm->arch.model.fac_mask[i] = 0UL;
1526 /* Populate the facility list initially. */
1527 kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1528 memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1529 S390_ARCH_FAC_LIST_SIZE_BYTE);
1531 set_kvm_facility(kvm->arch.model.fac_mask, 74);
1532 set_kvm_facility(kvm->arch.model.fac_list, 74);
1534 kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
1535 kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1537 kvm_s390_crypto_init(kvm);
1539 mutex_init(&kvm->arch.float_int.ais_lock);
1540 kvm->arch.float_int.simm = 0;
1541 kvm->arch.float_int.nimm = 0;
1542 kvm->arch.float_int.ais_enabled = 0;
1543 spin_lock_init(&kvm->arch.float_int.lock);
1544 for (i = 0; i < FIRQ_LIST_COUNT; i++)
1545 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1546 init_waitqueue_head(&kvm->arch.ipte_wq);
1547 mutex_init(&kvm->arch.ipte_mutex);
1549 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1550 VM_EVENT(kvm, 3, "vm created with type %lu", type);
1552 if (type & KVM_VM_S390_UCONTROL) {
1553 kvm->arch.gmap = NULL;
1554 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1556 if (sclp.hamax == U64_MAX)
1557 kvm->arch.mem_limit = TASK_SIZE_MAX;
1559 kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
1561 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
1562 if (!kvm->arch.gmap)
1564 kvm->arch.gmap->private = kvm;
1565 kvm->arch.gmap->pfault_enabled = 0;
1568 kvm->arch.css_support = 0;
1569 kvm->arch.use_irqchip = 0;
1570 kvm->arch.epoch = 0;
1572 spin_lock_init(&kvm->arch.start_stop_lock);
1573 kvm_s390_vsie_init(kvm);
1574 KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1578 free_page((unsigned long)kvm->arch.sie_page2);
1579 debug_unregister(kvm->arch.dbf);
1581 KVM_EVENT(3, "creation of vm failed: %d", rc);
1585 bool kvm_arch_has_vcpu_debugfs(void)
1590 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
1595 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1597 VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1598 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1599 kvm_s390_clear_local_irqs(vcpu);
1600 kvm_clear_async_pf_completion_queue(vcpu);
1601 if (!kvm_is_ucontrol(vcpu->kvm))
1604 if (kvm_is_ucontrol(vcpu->kvm))
1605 gmap_remove(vcpu->arch.gmap);
1607 if (vcpu->kvm->arch.use_cmma)
1608 kvm_s390_vcpu_unsetup_cmma(vcpu);
1609 free_page((unsigned long)(vcpu->arch.sie_block));
1611 kvm_vcpu_uninit(vcpu);
1612 kmem_cache_free(kvm_vcpu_cache, vcpu);
1615 static void kvm_free_vcpus(struct kvm *kvm)
1618 struct kvm_vcpu *vcpu;
1620 kvm_for_each_vcpu(i, vcpu, kvm)
1621 kvm_arch_vcpu_destroy(vcpu);
1623 mutex_lock(&kvm->lock);
1624 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1625 kvm->vcpus[i] = NULL;
1627 atomic_set(&kvm->online_vcpus, 0);
1628 mutex_unlock(&kvm->lock);
1631 void kvm_arch_destroy_vm(struct kvm *kvm)
1633 kvm_free_vcpus(kvm);
1635 debug_unregister(kvm->arch.dbf);
1636 free_page((unsigned long)kvm->arch.sie_page2);
1637 if (!kvm_is_ucontrol(kvm))
1638 gmap_remove(kvm->arch.gmap);
1639 kvm_s390_destroy_adapters(kvm);
1640 kvm_s390_clear_float_irqs(kvm);
1641 kvm_s390_vsie_destroy(kvm);
1642 KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
1645 /* Section: vcpu related */
1646 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1648 vcpu->arch.gmap = gmap_create(current->mm, -1UL);
1649 if (!vcpu->arch.gmap)
1651 vcpu->arch.gmap->private = vcpu->kvm;
1656 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
1658 if (!kvm_s390_use_sca_entries())
1660 read_lock(&vcpu->kvm->arch.sca_lock);
1661 if (vcpu->kvm->arch.use_esca) {
1662 struct esca_block *sca = vcpu->kvm->arch.sca;
1664 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1665 sca->cpu[vcpu->vcpu_id].sda = 0;
1667 struct bsca_block *sca = vcpu->kvm->arch.sca;
1669 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1670 sca->cpu[vcpu->vcpu_id].sda = 0;
1672 read_unlock(&vcpu->kvm->arch.sca_lock);
1675 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
1677 if (!kvm_s390_use_sca_entries()) {
1678 struct bsca_block *sca = vcpu->kvm->arch.sca;
1680 /* we still need the basic sca for the ipte control */
1681 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1682 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
1684 read_lock(&vcpu->kvm->arch.sca_lock);
1685 if (vcpu->kvm->arch.use_esca) {
1686 struct esca_block *sca = vcpu->kvm->arch.sca;
1688 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1689 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1690 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
1691 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
1692 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1694 struct bsca_block *sca = vcpu->kvm->arch.sca;
1696 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1697 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1698 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
1699 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1701 read_unlock(&vcpu->kvm->arch.sca_lock);
1704 /* Basic SCA to Extended SCA data copy routines */
1705 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
1708 d->sigp_ctrl.c = s->sigp_ctrl.c;
1709 d->sigp_ctrl.scn = s->sigp_ctrl.scn;
1712 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
1716 d->ipte_control = s->ipte_control;
1718 for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
1719 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
1722 static int sca_switch_to_extended(struct kvm *kvm)
1724 struct bsca_block *old_sca = kvm->arch.sca;
1725 struct esca_block *new_sca;
1726 struct kvm_vcpu *vcpu;
1727 unsigned int vcpu_idx;
1730 new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
1734 scaoh = (u32)((u64)(new_sca) >> 32);
1735 scaol = (u32)(u64)(new_sca) & ~0x3fU;
1737 kvm_s390_vcpu_block_all(kvm);
1738 write_lock(&kvm->arch.sca_lock);
1740 sca_copy_b_to_e(new_sca, old_sca);
1742 kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
1743 vcpu->arch.sie_block->scaoh = scaoh;
1744 vcpu->arch.sie_block->scaol = scaol;
1745 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
1747 kvm->arch.sca = new_sca;
1748 kvm->arch.use_esca = 1;
1750 write_unlock(&kvm->arch.sca_lock);
1751 kvm_s390_vcpu_unblock_all(kvm);
1753 free_page((unsigned long)old_sca);
1755 VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
1756 old_sca, kvm->arch.sca);
1760 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
1764 if (!kvm_s390_use_sca_entries()) {
1765 if (id < KVM_MAX_VCPUS)
1769 if (id < KVM_S390_BSCA_CPU_SLOTS)
1771 if (!sclp.has_esca || !sclp.has_64bscao)
1774 mutex_lock(&kvm->lock);
1775 rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
1776 mutex_unlock(&kvm->lock);
1778 return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
1781 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1783 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1784 kvm_clear_async_pf_completion_queue(vcpu);
1785 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
1791 kvm_s390_set_prefix(vcpu, 0);
1792 if (test_kvm_facility(vcpu->kvm, 64))
1793 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
1794 if (test_kvm_facility(vcpu->kvm, 133))
1795 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
1796 /* fprs can be synchronized via vrs, even if the guest has no vx. With
1797 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
1800 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
1802 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
1804 if (kvm_is_ucontrol(vcpu->kvm))
1805 return __kvm_ucontrol_vcpu_init(vcpu);
1810 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1811 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1813 WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
1814 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1815 vcpu->arch.cputm_start = get_tod_clock_fast();
1816 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1819 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1820 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1822 WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
1823 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1824 vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1825 vcpu->arch.cputm_start = 0;
1826 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1829 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1830 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1832 WARN_ON_ONCE(vcpu->arch.cputm_enabled);
1833 vcpu->arch.cputm_enabled = true;
1834 __start_cpu_timer_accounting(vcpu);
1837 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1838 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1840 WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
1841 __stop_cpu_timer_accounting(vcpu);
1842 vcpu->arch.cputm_enabled = false;
1845 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1847 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1848 __enable_cpu_timer_accounting(vcpu);
1852 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1854 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1855 __disable_cpu_timer_accounting(vcpu);
1859 /* set the cpu timer - may only be called from the VCPU thread itself */
1860 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
1862 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1863 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1864 if (vcpu->arch.cputm_enabled)
1865 vcpu->arch.cputm_start = get_tod_clock_fast();
1866 vcpu->arch.sie_block->cputm = cputm;
1867 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1871 /* update and get the cpu timer - can also be called from other VCPU threads */
1872 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
1877 if (unlikely(!vcpu->arch.cputm_enabled))
1878 return vcpu->arch.sie_block->cputm;
1880 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1882 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
1884 * If the writer would ever execute a read in the critical
1885 * section, e.g. in irq context, we have a deadlock.
1887 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
1888 value = vcpu->arch.sie_block->cputm;
1889 /* if cputm_start is 0, accounting is being started/stopped */
1890 if (likely(vcpu->arch.cputm_start))
1891 value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1892 } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
1897 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1900 gmap_enable(vcpu->arch.enabled_gmap);
1901 atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1902 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1903 __start_cpu_timer_accounting(vcpu);
1907 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1910 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1911 __stop_cpu_timer_accounting(vcpu);
1912 atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1913 vcpu->arch.enabled_gmap = gmap_get_enabled();
1914 gmap_disable(vcpu->arch.enabled_gmap);
1918 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
1920 /* this equals initial cpu reset in pop, but we don't switch to ESA */
1921 vcpu->arch.sie_block->gpsw.mask = 0UL;
1922 vcpu->arch.sie_block->gpsw.addr = 0UL;
1923 kvm_s390_set_prefix(vcpu, 0);
1924 kvm_s390_set_cpu_timer(vcpu, 0);
1925 vcpu->arch.sie_block->ckc = 0UL;
1926 vcpu->arch.sie_block->todpr = 0;
1927 memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
1928 vcpu->arch.sie_block->gcr[0] = 0xE0UL;
1929 vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
1930 /* make sure the new fpc will be lazily loaded */
1932 current->thread.fpu.fpc = 0;
1933 vcpu->arch.sie_block->gbea = 1;
1934 vcpu->arch.sie_block->pp = 0;
1935 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1936 kvm_clear_async_pf_completion_queue(vcpu);
1937 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
1938 kvm_s390_vcpu_stop(vcpu);
1939 kvm_s390_clear_local_irqs(vcpu);
1942 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
1944 mutex_lock(&vcpu->kvm->lock);
1946 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
1948 mutex_unlock(&vcpu->kvm->lock);
1949 if (!kvm_is_ucontrol(vcpu->kvm)) {
1950 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
1953 if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
1954 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
1955 /* make vcpu_load load the right gmap on the first trigger */
1956 vcpu->arch.enabled_gmap = vcpu->arch.gmap;
1959 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
1961 if (!test_kvm_facility(vcpu->kvm, 76))
1964 vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
1966 if (vcpu->kvm->arch.crypto.aes_kw)
1967 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
1968 if (vcpu->kvm->arch.crypto.dea_kw)
1969 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
1971 vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
1974 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
1976 free_page(vcpu->arch.sie_block->cbrlo);
1977 vcpu->arch.sie_block->cbrlo = 0;
1980 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
1982 vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
1983 if (!vcpu->arch.sie_block->cbrlo)
1986 vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
1987 vcpu->arch.sie_block->ecb2 &= ~ECB2_PFMFI;
1991 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
1993 struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
1995 vcpu->arch.sie_block->ibc = model->ibc;
1996 if (test_kvm_facility(vcpu->kvm, 7))
1997 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
2000 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
2004 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
2008 if (test_kvm_facility(vcpu->kvm, 78))
2009 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
2010 else if (test_kvm_facility(vcpu->kvm, 8))
2011 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
2013 kvm_s390_vcpu_setup_model(vcpu);
2015 /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
2016 if (MACHINE_HAS_ESOP)
2017 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
2018 if (test_kvm_facility(vcpu->kvm, 9))
2019 vcpu->arch.sie_block->ecb |= ECB_SRSI;
2020 if (test_kvm_facility(vcpu->kvm, 73))
2021 vcpu->arch.sie_block->ecb |= ECB_TE;
2023 if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
2024 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
2025 if (test_kvm_facility(vcpu->kvm, 130))
2026 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
2027 vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
2029 vcpu->arch.sie_block->eca |= ECA_CEI;
2031 vcpu->arch.sie_block->eca |= ECA_IB;
2033 vcpu->arch.sie_block->eca |= ECA_SII;
2034 if (sclp.has_sigpif)
2035 vcpu->arch.sie_block->eca |= ECA_SIGPI;
2036 if (test_kvm_facility(vcpu->kvm, 129)) {
2037 vcpu->arch.sie_block->eca |= ECA_VX;
2038 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
2040 vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
2042 vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
2045 atomic_or(CPUSTAT_KSS, &vcpu->arch.sie_block->cpuflags);
2047 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
2049 if (vcpu->kvm->arch.use_cmma) {
2050 rc = kvm_s390_vcpu_setup_cmma(vcpu);
2054 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2055 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
2057 kvm_s390_vcpu_crypto_setup(vcpu);
2062 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
2065 struct kvm_vcpu *vcpu;
2066 struct sie_page *sie_page;
2069 if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
2074 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2078 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2082 vcpu->arch.sie_block = &sie_page->sie_block;
2083 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2085 /* the real guest size will always be smaller than msl */
2086 vcpu->arch.sie_block->mso = 0;
2087 vcpu->arch.sie_block->msl = sclp.hamax;
2089 vcpu->arch.sie_block->icpua = id;
2090 spin_lock_init(&vcpu->arch.local_int.lock);
2091 vcpu->arch.local_int.float_int = &kvm->arch.float_int;
2092 vcpu->arch.local_int.wq = &vcpu->wq;
2093 vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
2094 seqcount_init(&vcpu->arch.cputm_seqcount);
2096 rc = kvm_vcpu_init(vcpu, kvm, id);
2098 goto out_free_sie_block;
2099 VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2100 vcpu->arch.sie_block);
2101 trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2105 free_page((unsigned long)(vcpu->arch.sie_block));
2107 kmem_cache_free(kvm_vcpu_cache, vcpu);
2112 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2114 return kvm_s390_vcpu_has_irq(vcpu, 0);
2117 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2119 atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2123 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2125 atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2128 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2130 atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2134 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2136 atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2140 * Kick a guest cpu out of SIE and wait until SIE is not running.
2141 * If the CPU is not running (e.g. waiting as idle) the function will
2142 * return immediately. */
2143 void exit_sie(struct kvm_vcpu *vcpu)
2145 atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
2146 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2150 /* Kick a guest cpu out of SIE to process a request synchronously */
2151 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2153 kvm_make_request(req, vcpu);
2154 kvm_s390_vcpu_request(vcpu);
2157 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2160 struct kvm *kvm = gmap->private;
2161 struct kvm_vcpu *vcpu;
2162 unsigned long prefix;
2165 if (gmap_is_shadow(gmap))
2167 if (start >= 1UL << 31)
2168 /* We are only interested in prefix pages */
2170 kvm_for_each_vcpu(i, vcpu, kvm) {
2171 /* match against both prefix pages */
2172 prefix = kvm_s390_get_prefix(vcpu);
2173 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2174 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2176 kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2181 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2183 /* kvm common code refers to this, but never calls it */
2188 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2189 struct kvm_one_reg *reg)
2194 case KVM_REG_S390_TODPR:
2195 r = put_user(vcpu->arch.sie_block->todpr,
2196 (u32 __user *)reg->addr);
2198 case KVM_REG_S390_EPOCHDIFF:
2199 r = put_user(vcpu->arch.sie_block->epoch,
2200 (u64 __user *)reg->addr);
2202 case KVM_REG_S390_CPU_TIMER:
2203 r = put_user(kvm_s390_get_cpu_timer(vcpu),
2204 (u64 __user *)reg->addr);
2206 case KVM_REG_S390_CLOCK_COMP:
2207 r = put_user(vcpu->arch.sie_block->ckc,
2208 (u64 __user *)reg->addr);
2210 case KVM_REG_S390_PFTOKEN:
2211 r = put_user(vcpu->arch.pfault_token,
2212 (u64 __user *)reg->addr);
2214 case KVM_REG_S390_PFCOMPARE:
2215 r = put_user(vcpu->arch.pfault_compare,
2216 (u64 __user *)reg->addr);
2218 case KVM_REG_S390_PFSELECT:
2219 r = put_user(vcpu->arch.pfault_select,
2220 (u64 __user *)reg->addr);
2222 case KVM_REG_S390_PP:
2223 r = put_user(vcpu->arch.sie_block->pp,
2224 (u64 __user *)reg->addr);
2226 case KVM_REG_S390_GBEA:
2227 r = put_user(vcpu->arch.sie_block->gbea,
2228 (u64 __user *)reg->addr);
2237 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2238 struct kvm_one_reg *reg)
2244 case KVM_REG_S390_TODPR:
2245 r = get_user(vcpu->arch.sie_block->todpr,
2246 (u32 __user *)reg->addr);
2248 case KVM_REG_S390_EPOCHDIFF:
2249 r = get_user(vcpu->arch.sie_block->epoch,
2250 (u64 __user *)reg->addr);
2252 case KVM_REG_S390_CPU_TIMER:
2253 r = get_user(val, (u64 __user *)reg->addr);
2255 kvm_s390_set_cpu_timer(vcpu, val);
2257 case KVM_REG_S390_CLOCK_COMP:
2258 r = get_user(vcpu->arch.sie_block->ckc,
2259 (u64 __user *)reg->addr);
2261 case KVM_REG_S390_PFTOKEN:
2262 r = get_user(vcpu->arch.pfault_token,
2263 (u64 __user *)reg->addr);
2264 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2265 kvm_clear_async_pf_completion_queue(vcpu);
2267 case KVM_REG_S390_PFCOMPARE:
2268 r = get_user(vcpu->arch.pfault_compare,
2269 (u64 __user *)reg->addr);
2271 case KVM_REG_S390_PFSELECT:
2272 r = get_user(vcpu->arch.pfault_select,
2273 (u64 __user *)reg->addr);
2275 case KVM_REG_S390_PP:
2276 r = get_user(vcpu->arch.sie_block->pp,
2277 (u64 __user *)reg->addr);
2279 case KVM_REG_S390_GBEA:
2280 r = get_user(vcpu->arch.sie_block->gbea,
2281 (u64 __user *)reg->addr);
2290 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2292 kvm_s390_vcpu_initial_reset(vcpu);
2296 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2298 memcpy(&vcpu->run->s.regs.gprs, ®s->gprs, sizeof(regs->gprs));
2302 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2304 memcpy(®s->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2308 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2309 struct kvm_sregs *sregs)
2311 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2312 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2316 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2317 struct kvm_sregs *sregs)
2319 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2320 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2324 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2326 if (test_fp_ctl(fpu->fpc))
2328 vcpu->run->s.regs.fpc = fpu->fpc;
2330 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
2331 (freg_t *) fpu->fprs);
2333 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
2337 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2339 /* make sure we have the latest values */
2342 convert_vx_to_fp((freg_t *) fpu->fprs,
2343 (__vector128 *) vcpu->run->s.regs.vrs);
2345 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
2346 fpu->fpc = vcpu->run->s.regs.fpc;
2350 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2354 if (!is_vcpu_stopped(vcpu))
2357 vcpu->run->psw_mask = psw.mask;
2358 vcpu->run->psw_addr = psw.addr;
2363 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2364 struct kvm_translation *tr)
2366 return -EINVAL; /* not implemented yet */
2369 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2370 KVM_GUESTDBG_USE_HW_BP | \
2371 KVM_GUESTDBG_ENABLE)
2373 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2374 struct kvm_guest_debug *dbg)
2378 vcpu->guest_debug = 0;
2379 kvm_s390_clear_bp_data(vcpu);
2381 if (dbg->control & ~VALID_GUESTDBG_FLAGS)
2383 if (!sclp.has_gpere)
2386 if (dbg->control & KVM_GUESTDBG_ENABLE) {
2387 vcpu->guest_debug = dbg->control;
2388 /* enforce guest PER */
2389 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2391 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2392 rc = kvm_s390_import_bp_data(vcpu, dbg);
2394 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2395 vcpu->arch.guestdbg.last_bp = 0;
2399 vcpu->guest_debug = 0;
2400 kvm_s390_clear_bp_data(vcpu);
2401 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2407 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2408 struct kvm_mp_state *mp_state)
2410 /* CHECK_STOP and LOAD are not supported yet */
2411 return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2412 KVM_MP_STATE_OPERATING;
2415 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2416 struct kvm_mp_state *mp_state)
2420 /* user space knows about this interface - let it control the state */
2421 vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2423 switch (mp_state->mp_state) {
2424 case KVM_MP_STATE_STOPPED:
2425 kvm_s390_vcpu_stop(vcpu);
2427 case KVM_MP_STATE_OPERATING:
2428 kvm_s390_vcpu_start(vcpu);
2430 case KVM_MP_STATE_LOAD:
2431 case KVM_MP_STATE_CHECK_STOP:
2432 /* fall through - CHECK_STOP and LOAD are not supported yet */
2440 static bool ibs_enabled(struct kvm_vcpu *vcpu)
2442 return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
2445 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2448 kvm_s390_vcpu_request_handled(vcpu);
2449 if (!vcpu->requests)
2452 * We use MMU_RELOAD just to re-arm the ipte notifier for the
2453 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
2454 * This ensures that the ipte instruction for this request has
2455 * already finished. We might race against a second unmapper that
2456 * wants to set the blocking bit. Lets just retry the request loop.
2458 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2460 rc = gmap_mprotect_notify(vcpu->arch.gmap,
2461 kvm_s390_get_prefix(vcpu),
2462 PAGE_SIZE * 2, PROT_WRITE);
2464 kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
2470 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2471 vcpu->arch.sie_block->ihcpu = 0xffff;
2475 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
2476 if (!ibs_enabled(vcpu)) {
2477 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
2478 atomic_or(CPUSTAT_IBS,
2479 &vcpu->arch.sie_block->cpuflags);
2484 if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2485 if (ibs_enabled(vcpu)) {
2486 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2487 atomic_andnot(CPUSTAT_IBS,
2488 &vcpu->arch.sie_block->cpuflags);
2493 if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
2494 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2498 /* nothing to do, just clear the request */
2499 kvm_clear_request(KVM_REQ_UNHALT, vcpu);
2504 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2506 struct kvm_vcpu *vcpu;
2509 mutex_lock(&kvm->lock);
2511 kvm->arch.epoch = tod - get_tod_clock();
2512 kvm_s390_vcpu_block_all(kvm);
2513 kvm_for_each_vcpu(i, vcpu, kvm)
2514 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2515 kvm_s390_vcpu_unblock_all(kvm);
2517 mutex_unlock(&kvm->lock);
2521 * kvm_arch_fault_in_page - fault-in guest page if necessary
2522 * @vcpu: The corresponding virtual cpu
2523 * @gpa: Guest physical address
2524 * @writable: Whether the page should be writable or not
2526 * Make sure that a guest page has been faulted-in on the host.
2528 * Return: Zero on success, negative error code otherwise.
2530 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
2532 return gmap_fault(vcpu->arch.gmap, gpa,
2533 writable ? FAULT_FLAG_WRITE : 0);
2536 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
2537 unsigned long token)
2539 struct kvm_s390_interrupt inti;
2540 struct kvm_s390_irq irq;
2543 irq.u.ext.ext_params2 = token;
2544 irq.type = KVM_S390_INT_PFAULT_INIT;
2545 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
2547 inti.type = KVM_S390_INT_PFAULT_DONE;
2548 inti.parm64 = token;
2549 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
2553 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
2554 struct kvm_async_pf *work)
2556 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
2557 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
2560 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
2561 struct kvm_async_pf *work)
2563 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
2564 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
2567 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
2568 struct kvm_async_pf *work)
2570 /* s390 will always inject the page directly */
2573 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
2576 * s390 will always inject the page directly,
2577 * but we still want check_async_completion to cleanup
2582 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
2585 struct kvm_arch_async_pf arch;
2588 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2590 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
2591 vcpu->arch.pfault_compare)
2593 if (psw_extint_disabled(vcpu))
2595 if (kvm_s390_vcpu_has_irq(vcpu, 0))
2597 if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
2599 if (!vcpu->arch.gmap->pfault_enabled)
2602 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
2603 hva += current->thread.gmap_addr & ~PAGE_MASK;
2604 if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
2607 rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
2611 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
2616 * On s390 notifications for arriving pages will be delivered directly
2617 * to the guest but the house keeping for completed pfaults is
2618 * handled outside the worker.
2620 kvm_check_async_pf_completion(vcpu);
2622 vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
2623 vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
2628 if (test_cpu_flag(CIF_MCCK_PENDING))
2631 if (!kvm_is_ucontrol(vcpu->kvm)) {
2632 rc = kvm_s390_deliver_pending_interrupts(vcpu);
2637 rc = kvm_s390_handle_requests(vcpu);
2641 if (guestdbg_enabled(vcpu)) {
2642 kvm_s390_backup_guest_per_regs(vcpu);
2643 kvm_s390_patch_guest_per_regs(vcpu);
2646 vcpu->arch.sie_block->icptcode = 0;
2647 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
2648 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
2649 trace_kvm_s390_sie_enter(vcpu, cpuflags);
2654 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
2656 struct kvm_s390_pgm_info pgm_info = {
2657 .code = PGM_ADDRESSING,
2662 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
2663 trace_kvm_s390_sie_fault(vcpu);
2666 * We want to inject an addressing exception, which is defined as a
2667 * suppressing or terminating exception. However, since we came here
2668 * by a DAT access exception, the PSW still points to the faulting
2669 * instruction since DAT exceptions are nullifying. So we've got
2670 * to look up the current opcode to get the length of the instruction
2671 * to be able to forward the PSW.
2673 rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
2674 ilen = insn_length(opcode);
2678 /* Instruction-Fetching Exceptions - we can't detect the ilen.
2679 * Forward by arbitrary ilc, injection will take care of
2680 * nullification if necessary.
2682 pgm_info = vcpu->arch.pgm;
2685 pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
2686 kvm_s390_forward_psw(vcpu, ilen);
2687 return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
2690 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
2692 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
2693 vcpu->arch.sie_block->icptcode);
2694 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
2696 if (guestdbg_enabled(vcpu))
2697 kvm_s390_restore_guest_per_regs(vcpu);
2699 vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
2700 vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
2702 if (vcpu->arch.sie_block->icptcode > 0) {
2703 int rc = kvm_handle_sie_intercept(vcpu);
2705 if (rc != -EOPNOTSUPP)
2707 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
2708 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
2709 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
2710 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
2712 } else if (exit_reason != -EFAULT) {
2713 vcpu->stat.exit_null++;
2715 } else if (kvm_is_ucontrol(vcpu->kvm)) {
2716 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
2717 vcpu->run->s390_ucontrol.trans_exc_code =
2718 current->thread.gmap_addr;
2719 vcpu->run->s390_ucontrol.pgm_code = 0x10;
2721 } else if (current->thread.gmap_pfault) {
2722 trace_kvm_s390_major_guest_pfault(vcpu);
2723 current->thread.gmap_pfault = 0;
2724 if (kvm_arch_setup_async_pf(vcpu))
2726 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
2728 return vcpu_post_run_fault_in_sie(vcpu);
2731 static int __vcpu_run(struct kvm_vcpu *vcpu)
2733 int rc, exit_reason;
2736 * We try to hold kvm->srcu during most of vcpu_run (except when run-
2737 * ning the guest), so that memslots (and other stuff) are protected
2739 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2742 rc = vcpu_pre_run(vcpu);
2746 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2748 * As PF_VCPU will be used in fault handler, between
2749 * guest_enter and guest_exit should be no uaccess.
2751 local_irq_disable();
2752 guest_enter_irqoff();
2753 __disable_cpu_timer_accounting(vcpu);
2755 exit_reason = sie64a(vcpu->arch.sie_block,
2756 vcpu->run->s.regs.gprs);
2757 local_irq_disable();
2758 __enable_cpu_timer_accounting(vcpu);
2759 guest_exit_irqoff();
2761 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2763 rc = vcpu_post_run(vcpu, exit_reason);
2764 } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
2766 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2770 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2772 struct runtime_instr_cb *riccb;
2775 riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
2776 gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
2777 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
2778 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
2779 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
2780 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
2781 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
2782 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
2783 /* some control register changes require a tlb flush */
2784 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2786 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
2787 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
2788 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
2789 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
2790 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
2791 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
2793 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
2794 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
2795 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
2796 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
2797 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2798 kvm_clear_async_pf_completion_queue(vcpu);
2801 * If userspace sets the riccb (e.g. after migration) to a valid state,
2802 * we should enable RI here instead of doing the lazy enablement.
2804 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
2805 test_kvm_facility(vcpu->kvm, 64) &&
2807 !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
2808 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
2809 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
2812 * If userspace sets the gscb (e.g. after migration) to non-zero,
2813 * we should enable GS here instead of doing the lazy enablement.
2815 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
2816 test_kvm_facility(vcpu->kvm, 133) &&
2818 !vcpu->arch.gs_enabled) {
2819 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
2820 vcpu->arch.sie_block->ecb |= ECB_GS;
2821 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
2822 vcpu->arch.gs_enabled = 1;
2824 save_access_regs(vcpu->arch.host_acrs);
2825 restore_access_regs(vcpu->run->s.regs.acrs);
2826 /* save host (userspace) fprs/vrs */
2828 vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
2829 vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
2831 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
2833 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
2834 current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
2835 if (test_fp_ctl(current->thread.fpu.fpc))
2836 /* User space provided an invalid FPC, let's clear it */
2837 current->thread.fpu.fpc = 0;
2838 if (MACHINE_HAS_GS) {
2840 __ctl_set_bit(2, 4);
2841 if (current->thread.gs_cb) {
2842 vcpu->arch.host_gscb = current->thread.gs_cb;
2843 save_gs_cb(vcpu->arch.host_gscb);
2845 if (vcpu->arch.gs_enabled) {
2846 current->thread.gs_cb = (struct gs_cb *)
2847 &vcpu->run->s.regs.gscb;
2848 restore_gs_cb(current->thread.gs_cb);
2853 kvm_run->kvm_dirty_regs = 0;
2856 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2858 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
2859 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
2860 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
2861 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
2862 kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
2863 kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
2864 kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
2865 kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
2866 kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
2867 kvm_run->s.regs.pft = vcpu->arch.pfault_token;
2868 kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
2869 kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
2870 save_access_regs(vcpu->run->s.regs.acrs);
2871 restore_access_regs(vcpu->arch.host_acrs);
2872 /* Save guest register state */
2874 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
2875 /* Restore will be done lazily at return */
2876 current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
2877 current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
2878 if (MACHINE_HAS_GS) {
2879 __ctl_set_bit(2, 4);
2880 if (vcpu->arch.gs_enabled)
2881 save_gs_cb(current->thread.gs_cb);
2883 current->thread.gs_cb = vcpu->arch.host_gscb;
2884 restore_gs_cb(vcpu->arch.host_gscb);
2886 if (!vcpu->arch.host_gscb)
2887 __ctl_clear_bit(2, 4);
2888 vcpu->arch.host_gscb = NULL;
2893 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2898 if (kvm_run->immediate_exit)
2901 if (guestdbg_exit_pending(vcpu)) {
2902 kvm_s390_prepare_debug_exit(vcpu);
2906 if (vcpu->sigset_active)
2907 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
2909 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
2910 kvm_s390_vcpu_start(vcpu);
2911 } else if (is_vcpu_stopped(vcpu)) {
2912 pr_err_ratelimited("can't run stopped vcpu %d\n",
2917 sync_regs(vcpu, kvm_run);
2918 enable_cpu_timer_accounting(vcpu);
2921 rc = __vcpu_run(vcpu);
2923 if (signal_pending(current) && !rc) {
2924 kvm_run->exit_reason = KVM_EXIT_INTR;
2928 if (guestdbg_exit_pending(vcpu) && !rc) {
2929 kvm_s390_prepare_debug_exit(vcpu);
2933 if (rc == -EREMOTE) {
2934 /* userspace support is needed, kvm_run has been prepared */
2938 disable_cpu_timer_accounting(vcpu);
2939 store_regs(vcpu, kvm_run);
2941 if (vcpu->sigset_active)
2942 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
2944 vcpu->stat.exit_userspace++;
2949 * store status at address
2950 * we use have two special cases:
2951 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
2952 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
2954 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
2956 unsigned char archmode = 1;
2957 freg_t fprs[NUM_FPRS];
2962 px = kvm_s390_get_prefix(vcpu);
2963 if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
2964 if (write_guest_abs(vcpu, 163, &archmode, 1))
2967 } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
2968 if (write_guest_real(vcpu, 163, &archmode, 1))
2972 gpa -= __LC_FPREGS_SAVE_AREA;
2974 /* manually convert vector registers if necessary */
2975 if (MACHINE_HAS_VX) {
2976 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
2977 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2980 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2981 vcpu->run->s.regs.fprs, 128);
2983 rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
2984 vcpu->run->s.regs.gprs, 128);
2985 rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
2986 &vcpu->arch.sie_block->gpsw, 16);
2987 rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
2989 rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
2990 &vcpu->run->s.regs.fpc, 4);
2991 rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
2992 &vcpu->arch.sie_block->todpr, 4);
2993 cputm = kvm_s390_get_cpu_timer(vcpu);
2994 rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
2996 clkcomp = vcpu->arch.sie_block->ckc >> 8;
2997 rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
2999 rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
3000 &vcpu->run->s.regs.acrs, 64);
3001 rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
3002 &vcpu->arch.sie_block->gcr, 128);
3003 return rc ? -EFAULT : 0;
3006 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
3009 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
3010 * switch in the run ioctl. Let's update our copies before we save
3011 * it into the save area
3014 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3015 save_access_regs(vcpu->run->s.regs.acrs);
3017 return kvm_s390_store_status_unloaded(vcpu, addr);
3020 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3022 kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
3023 kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
3026 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
3029 struct kvm_vcpu *vcpu;
3031 kvm_for_each_vcpu(i, vcpu, kvm) {
3032 __disable_ibs_on_vcpu(vcpu);
3036 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3040 kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
3041 kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
3044 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
3046 int i, online_vcpus, started_vcpus = 0;
3048 if (!is_vcpu_stopped(vcpu))
3051 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
3052 /* Only one cpu at a time may enter/leave the STOPPED state. */
3053 spin_lock(&vcpu->kvm->arch.start_stop_lock);
3054 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3056 for (i = 0; i < online_vcpus; i++) {
3057 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
3061 if (started_vcpus == 0) {
3062 /* we're the only active VCPU -> speed it up */
3063 __enable_ibs_on_vcpu(vcpu);
3064 } else if (started_vcpus == 1) {
3066 * As we are starting a second VCPU, we have to disable
3067 * the IBS facility on all VCPUs to remove potentially
3068 * oustanding ENABLE requests.
3070 __disable_ibs_on_all_vcpus(vcpu->kvm);
3073 atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
3075 * Another VCPU might have used IBS while we were offline.
3076 * Let's play safe and flush the VCPU at startup.
3078 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3079 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3083 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
3085 int i, online_vcpus, started_vcpus = 0;
3086 struct kvm_vcpu *started_vcpu = NULL;
3088 if (is_vcpu_stopped(vcpu))
3091 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
3092 /* Only one cpu at a time may enter/leave the STOPPED state. */
3093 spin_lock(&vcpu->kvm->arch.start_stop_lock);
3094 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3096 /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
3097 kvm_s390_clear_stop_irq(vcpu);
3099 atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
3100 __disable_ibs_on_vcpu(vcpu);
3102 for (i = 0; i < online_vcpus; i++) {
3103 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
3105 started_vcpu = vcpu->kvm->vcpus[i];
3109 if (started_vcpus == 1) {
3111 * As we only have one VCPU left, we want to enable the
3112 * IBS facility for that VCPU to speed it up.
3114 __enable_ibs_on_vcpu(started_vcpu);
3117 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3121 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
3122 struct kvm_enable_cap *cap)
3130 case KVM_CAP_S390_CSS_SUPPORT:
3131 if (!vcpu->kvm->arch.css_support) {
3132 vcpu->kvm->arch.css_support = 1;
3133 VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
3134 trace_kvm_s390_enable_css(vcpu->kvm);
3145 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3146 struct kvm_s390_mem_op *mop)
3148 void __user *uaddr = (void __user *)mop->buf;
3149 void *tmpbuf = NULL;
3151 const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3152 | KVM_S390_MEMOP_F_CHECK_ONLY;
3154 if (mop->flags & ~supported_flags)
3157 if (mop->size > MEM_OP_MAX_SIZE)
3160 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3161 tmpbuf = vmalloc(mop->size);
3166 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3169 case KVM_S390_MEMOP_LOGICAL_READ:
3170 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3171 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3172 mop->size, GACC_FETCH);
3175 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3177 if (copy_to_user(uaddr, tmpbuf, mop->size))
3181 case KVM_S390_MEMOP_LOGICAL_WRITE:
3182 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3183 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3184 mop->size, GACC_STORE);
3187 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
3191 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3197 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
3199 if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
3200 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
3206 long kvm_arch_vcpu_ioctl(struct file *filp,
3207 unsigned int ioctl, unsigned long arg)
3209 struct kvm_vcpu *vcpu = filp->private_data;
3210 void __user *argp = (void __user *)arg;
3215 case KVM_S390_IRQ: {
3216 struct kvm_s390_irq s390irq;
3219 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3221 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3224 case KVM_S390_INTERRUPT: {
3225 struct kvm_s390_interrupt s390int;
3226 struct kvm_s390_irq s390irq;
3229 if (copy_from_user(&s390int, argp, sizeof(s390int)))
3231 if (s390int_to_s390irq(&s390int, &s390irq))
3233 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3236 case KVM_S390_STORE_STATUS:
3237 idx = srcu_read_lock(&vcpu->kvm->srcu);
3238 r = kvm_s390_vcpu_store_status(vcpu, arg);
3239 srcu_read_unlock(&vcpu->kvm->srcu, idx);
3241 case KVM_S390_SET_INITIAL_PSW: {
3245 if (copy_from_user(&psw, argp, sizeof(psw)))
3247 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3250 case KVM_S390_INITIAL_RESET:
3251 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3253 case KVM_SET_ONE_REG:
3254 case KVM_GET_ONE_REG: {
3255 struct kvm_one_reg reg;
3257 if (copy_from_user(®, argp, sizeof(reg)))
3259 if (ioctl == KVM_SET_ONE_REG)
3260 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, ®);
3262 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, ®);
3265 #ifdef CONFIG_KVM_S390_UCONTROL
3266 case KVM_S390_UCAS_MAP: {
3267 struct kvm_s390_ucas_mapping ucasmap;
3269 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3274 if (!kvm_is_ucontrol(vcpu->kvm)) {
3279 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
3280 ucasmap.vcpu_addr, ucasmap.length);
3283 case KVM_S390_UCAS_UNMAP: {
3284 struct kvm_s390_ucas_mapping ucasmap;
3286 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3291 if (!kvm_is_ucontrol(vcpu->kvm)) {
3296 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
3301 case KVM_S390_VCPU_FAULT: {
3302 r = gmap_fault(vcpu->arch.gmap, arg, 0);
3305 case KVM_ENABLE_CAP:
3307 struct kvm_enable_cap cap;
3309 if (copy_from_user(&cap, argp, sizeof(cap)))
3311 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
3314 case KVM_S390_MEM_OP: {
3315 struct kvm_s390_mem_op mem_op;
3317 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3318 r = kvm_s390_guest_mem_op(vcpu, &mem_op);
3323 case KVM_S390_SET_IRQ_STATE: {
3324 struct kvm_s390_irq_state irq_state;
3327 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3329 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
3330 irq_state.len == 0 ||
3331 irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
3335 r = kvm_s390_set_irq_state(vcpu,
3336 (void __user *) irq_state.buf,
3340 case KVM_S390_GET_IRQ_STATE: {
3341 struct kvm_s390_irq_state irq_state;
3344 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3346 if (irq_state.len == 0) {
3350 r = kvm_s390_get_irq_state(vcpu,
3351 (__u8 __user *) irq_state.buf,
3361 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3363 #ifdef CONFIG_KVM_S390_UCONTROL
3364 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
3365 && (kvm_is_ucontrol(vcpu->kvm))) {
3366 vmf->page = virt_to_page(vcpu->arch.sie_block);
3367 get_page(vmf->page);
3371 return VM_FAULT_SIGBUS;
3374 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
3375 unsigned long npages)
3380 /* Section: memory related */
3381 int kvm_arch_prepare_memory_region(struct kvm *kvm,
3382 struct kvm_memory_slot *memslot,
3383 const struct kvm_userspace_memory_region *mem,
3384 enum kvm_mr_change change)
3386 /* A few sanity checks. We can have memory slots which have to be
3387 located/ended at a segment boundary (1MB). The memory in userland is
3388 ok to be fragmented into various different vmas. It is okay to mmap()
3389 and munmap() stuff in this slot after doing this call at any time */
3391 if (mem->userspace_addr & 0xffffful)
3394 if (mem->memory_size & 0xffffful)
3397 if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
3403 void kvm_arch_commit_memory_region(struct kvm *kvm,
3404 const struct kvm_userspace_memory_region *mem,
3405 const struct kvm_memory_slot *old,
3406 const struct kvm_memory_slot *new,
3407 enum kvm_mr_change change)
3411 /* If the basics of the memslot do not change, we do not want
3412 * to update the gmap. Every update causes several unnecessary
3413 * segment translation exceptions. This is usually handled just
3414 * fine by the normal fault handler + gmap, but it will also
3415 * cause faults on the prefix page of running guest CPUs.
3417 if (old->userspace_addr == mem->userspace_addr &&
3418 old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
3419 old->npages * PAGE_SIZE == mem->memory_size)
3422 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
3423 mem->guest_phys_addr, mem->memory_size);
3425 pr_warn("failed to commit memory region\n");
3429 static inline unsigned long nonhyp_mask(int i)
3431 unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
3433 return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
3436 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
3438 vcpu->valid_wakeup = false;
3441 static int __init kvm_s390_init(void)
3445 if (!sclp.has_sief2) {
3446 pr_info("SIE not available\n");
3450 for (i = 0; i < 16; i++)
3451 kvm_s390_fac_list_mask[i] |=
3452 S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
3454 return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
3457 static void __exit kvm_s390_exit(void)
3462 module_init(kvm_s390_init);
3463 module_exit(kvm_s390_exit);
3466 * Enable autoloading of the kvm module.
3467 * Note that we add the module alias here instead of virt/kvm/kvm_main.c
3468 * since x86 takes a different approach.
3470 #include <linux/miscdevice.h>
3471 MODULE_ALIAS_MISCDEV(KVM_MINOR);
3472 MODULE_ALIAS("devname:kvm");