2 * hosting zSeries kernel virtual machines
4 * Copyright IBM Corp. 2008, 2009
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
10 * Author(s): Carsten Otte <cotte@de.ibm.com>
11 * Christian Borntraeger <borntraeger@de.ibm.com>
12 * Heiko Carstens <heiko.carstens@de.ibm.com>
13 * Christian Ehrhardt <ehrhardt@de.ibm.com>
16 #include <linux/compiler.h>
17 #include <linux/err.h>
19 #include <linux/hrtimer.h>
20 #include <linux/init.h>
21 #include <linux/kvm.h>
22 #include <linux/kvm_host.h>
23 #include <linux/module.h>
24 #include <linux/slab.h>
25 #include <linux/timer.h>
26 #include <asm/asm-offsets.h>
27 #include <asm/lowcore.h>
28 #include <asm/pgtable.h>
30 #include <asm/switch_to.h>
31 #include <asm/facility.h>
36 #define CREATE_TRACE_POINTS
38 #include "trace-s390.h"
40 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
42 struct kvm_stats_debugfs_item debugfs_entries[] = {
43 { "userspace_handled", VCPU_STAT(exit_userspace) },
44 { "exit_null", VCPU_STAT(exit_null) },
45 { "exit_validity", VCPU_STAT(exit_validity) },
46 { "exit_stop_request", VCPU_STAT(exit_stop_request) },
47 { "exit_external_request", VCPU_STAT(exit_external_request) },
48 { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
49 { "exit_instruction", VCPU_STAT(exit_instruction) },
50 { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
51 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
52 { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
53 { "instruction_lctl", VCPU_STAT(instruction_lctl) },
54 { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
55 { "deliver_external_call", VCPU_STAT(deliver_external_call) },
56 { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
57 { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
58 { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
59 { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
60 { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
61 { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
62 { "exit_wait_state", VCPU_STAT(exit_wait_state) },
63 { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
64 { "instruction_stidp", VCPU_STAT(instruction_stidp) },
65 { "instruction_spx", VCPU_STAT(instruction_spx) },
66 { "instruction_stpx", VCPU_STAT(instruction_stpx) },
67 { "instruction_stap", VCPU_STAT(instruction_stap) },
68 { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
69 { "instruction_stsch", VCPU_STAT(instruction_stsch) },
70 { "instruction_chsc", VCPU_STAT(instruction_chsc) },
71 { "instruction_essa", VCPU_STAT(instruction_essa) },
72 { "instruction_stsi", VCPU_STAT(instruction_stsi) },
73 { "instruction_stfl", VCPU_STAT(instruction_stfl) },
74 { "instruction_tprot", VCPU_STAT(instruction_tprot) },
75 { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
76 { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
77 { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
78 { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
79 { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
80 { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
81 { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
82 { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
83 { "diagnose_10", VCPU_STAT(diagnose_10) },
84 { "diagnose_44", VCPU_STAT(diagnose_44) },
85 { "diagnose_9c", VCPU_STAT(diagnose_9c) },
89 unsigned long *vfacilities;
90 static struct gmap_notifier gmap_notifier;
92 /* test availability of vfacility */
93 static inline int test_vfacility(unsigned long nr)
95 return __test_facility(nr, (void *) vfacilities);
98 /* Section: not file related */
99 int kvm_arch_hardware_enable(void *garbage)
101 /* every s390 is virtualization enabled ;-) */
105 void kvm_arch_hardware_disable(void *garbage)
109 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address);
111 int kvm_arch_hardware_setup(void)
113 gmap_notifier.notifier_call = kvm_gmap_notifier;
114 gmap_register_ipte_notifier(&gmap_notifier);
118 void kvm_arch_hardware_unsetup(void)
120 gmap_unregister_ipte_notifier(&gmap_notifier);
123 void kvm_arch_check_processor_compat(void *rtn)
127 int kvm_arch_init(void *opaque)
132 void kvm_arch_exit(void)
136 /* Section: device related */
137 long kvm_arch_dev_ioctl(struct file *filp,
138 unsigned int ioctl, unsigned long arg)
140 if (ioctl == KVM_S390_ENABLE_SIE)
141 return s390_enable_sie();
145 int kvm_dev_ioctl_check_extension(long ext)
150 case KVM_CAP_S390_PSW:
151 case KVM_CAP_S390_GMAP:
152 case KVM_CAP_SYNC_MMU:
153 #ifdef CONFIG_KVM_S390_UCONTROL
154 case KVM_CAP_S390_UCONTROL:
156 case KVM_CAP_ASYNC_PF:
157 case KVM_CAP_SYNC_REGS:
158 case KVM_CAP_ONE_REG:
159 case KVM_CAP_ENABLE_CAP:
160 case KVM_CAP_S390_CSS_SUPPORT:
162 case KVM_CAP_IOEVENTFD:
163 case KVM_CAP_DEVICE_CTRL:
164 case KVM_CAP_ENABLE_CAP_VM:
167 case KVM_CAP_NR_VCPUS:
168 case KVM_CAP_MAX_VCPUS:
171 case KVM_CAP_NR_MEMSLOTS:
172 r = KVM_USER_MEM_SLOTS;
174 case KVM_CAP_S390_COW:
175 r = MACHINE_HAS_ESOP;
183 /* Section: vm related */
185 * Get (and clear) the dirty memory log for a memory slot.
187 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
188 struct kvm_dirty_log *log)
193 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
201 case KVM_CAP_S390_IRQCHIP:
202 kvm->arch.use_irqchip = 1;
212 long kvm_arch_vm_ioctl(struct file *filp,
213 unsigned int ioctl, unsigned long arg)
215 struct kvm *kvm = filp->private_data;
216 void __user *argp = (void __user *)arg;
220 case KVM_S390_INTERRUPT: {
221 struct kvm_s390_interrupt s390int;
224 if (copy_from_user(&s390int, argp, sizeof(s390int)))
226 r = kvm_s390_inject_vm(kvm, &s390int);
229 case KVM_ENABLE_CAP: {
230 struct kvm_enable_cap cap;
232 if (copy_from_user(&cap, argp, sizeof(cap)))
234 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
237 case KVM_CREATE_IRQCHIP: {
238 struct kvm_irq_routing_entry routing;
241 if (kvm->arch.use_irqchip) {
242 /* Set up dummy routing. */
243 memset(&routing, 0, sizeof(routing));
244 kvm_set_irq_routing(kvm, &routing, 0, 0);
256 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
260 static unsigned long sca_offset;
263 #ifdef CONFIG_KVM_S390_UCONTROL
264 if (type & ~KVM_VM_S390_UCONTROL)
266 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
273 rc = s390_enable_sie();
279 kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL);
282 spin_lock(&kvm_lock);
283 sca_offset = (sca_offset + 16) & 0x7f0;
284 kvm->arch.sca = (struct sca_block *) ((char *) kvm->arch.sca + sca_offset);
285 spin_unlock(&kvm_lock);
287 sprintf(debug_name, "kvm-%u", current->pid);
289 kvm->arch.dbf = debug_register(debug_name, 8, 2, 8 * sizeof(long));
293 spin_lock_init(&kvm->arch.float_int.lock);
294 INIT_LIST_HEAD(&kvm->arch.float_int.list);
296 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
297 VM_EVENT(kvm, 3, "%s", "vm created");
299 if (type & KVM_VM_S390_UCONTROL) {
300 kvm->arch.gmap = NULL;
302 kvm->arch.gmap = gmap_alloc(current->mm);
305 kvm->arch.gmap->private = kvm;
306 kvm->arch.gmap->pfault_enabled = 0;
309 kvm->arch.css_support = 0;
310 kvm->arch.use_irqchip = 0;
314 debug_unregister(kvm->arch.dbf);
316 free_page((unsigned long)(kvm->arch.sca));
321 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
323 VCPU_EVENT(vcpu, 3, "%s", "free cpu");
324 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
325 kvm_clear_async_pf_completion_queue(vcpu);
326 if (!kvm_is_ucontrol(vcpu->kvm)) {
327 clear_bit(63 - vcpu->vcpu_id,
328 (unsigned long *) &vcpu->kvm->arch.sca->mcn);
329 if (vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda ==
330 (__u64) vcpu->arch.sie_block)
331 vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda = 0;
335 if (kvm_is_ucontrol(vcpu->kvm))
336 gmap_free(vcpu->arch.gmap);
338 if (vcpu->arch.sie_block->cbrlo)
339 __free_page(__pfn_to_page(
340 vcpu->arch.sie_block->cbrlo >> PAGE_SHIFT));
341 free_page((unsigned long)(vcpu->arch.sie_block));
343 kvm_vcpu_uninit(vcpu);
344 kmem_cache_free(kvm_vcpu_cache, vcpu);
347 static void kvm_free_vcpus(struct kvm *kvm)
350 struct kvm_vcpu *vcpu;
352 kvm_for_each_vcpu(i, vcpu, kvm)
353 kvm_arch_vcpu_destroy(vcpu);
355 mutex_lock(&kvm->lock);
356 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
357 kvm->vcpus[i] = NULL;
359 atomic_set(&kvm->online_vcpus, 0);
360 mutex_unlock(&kvm->lock);
363 void kvm_arch_sync_events(struct kvm *kvm)
367 void kvm_arch_destroy_vm(struct kvm *kvm)
370 free_page((unsigned long)(kvm->arch.sca));
371 debug_unregister(kvm->arch.dbf);
372 if (!kvm_is_ucontrol(kvm))
373 gmap_free(kvm->arch.gmap);
374 kvm_s390_destroy_adapters(kvm);
377 /* Section: vcpu related */
378 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
380 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
381 kvm_clear_async_pf_completion_queue(vcpu);
382 if (kvm_is_ucontrol(vcpu->kvm)) {
383 vcpu->arch.gmap = gmap_alloc(current->mm);
384 if (!vcpu->arch.gmap)
386 vcpu->arch.gmap->private = vcpu->kvm;
390 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
391 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
398 void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
403 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
405 save_fp_ctl(&vcpu->arch.host_fpregs.fpc);
406 save_fp_regs(vcpu->arch.host_fpregs.fprs);
407 save_access_regs(vcpu->arch.host_acrs);
408 restore_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
409 restore_fp_regs(vcpu->arch.guest_fpregs.fprs);
410 restore_access_regs(vcpu->run->s.regs.acrs);
411 gmap_enable(vcpu->arch.gmap);
412 atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
415 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
417 atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
418 gmap_disable(vcpu->arch.gmap);
419 save_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
420 save_fp_regs(vcpu->arch.guest_fpregs.fprs);
421 save_access_regs(vcpu->run->s.regs.acrs);
422 restore_fp_ctl(&vcpu->arch.host_fpregs.fpc);
423 restore_fp_regs(vcpu->arch.host_fpregs.fprs);
424 restore_access_regs(vcpu->arch.host_acrs);
427 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
429 /* this equals initial cpu reset in pop, but we don't switch to ESA */
430 vcpu->arch.sie_block->gpsw.mask = 0UL;
431 vcpu->arch.sie_block->gpsw.addr = 0UL;
432 kvm_s390_set_prefix(vcpu, 0);
433 vcpu->arch.sie_block->cputm = 0UL;
434 vcpu->arch.sie_block->ckc = 0UL;
435 vcpu->arch.sie_block->todpr = 0;
436 memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
437 vcpu->arch.sie_block->gcr[0] = 0xE0UL;
438 vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
439 vcpu->arch.guest_fpregs.fpc = 0;
440 asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc));
441 vcpu->arch.sie_block->gbea = 1;
442 vcpu->arch.sie_block->pp = 0;
443 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
444 kvm_clear_async_pf_completion_queue(vcpu);
445 atomic_set_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
446 kvm_s390_clear_local_irqs(vcpu);
449 int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
454 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
458 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
462 vcpu->arch.sie_block->ecb = 6;
463 if (test_vfacility(50) && test_vfacility(73))
464 vcpu->arch.sie_block->ecb |= 0x10;
466 vcpu->arch.sie_block->ecb2 = 8;
467 vcpu->arch.sie_block->eca = 0xC1002001U;
468 vcpu->arch.sie_block->fac = (int) (long) vfacilities;
469 if (kvm_enabled_cmma()) {
470 cbrl = alloc_page(GFP_KERNEL | __GFP_ZERO);
472 vcpu->arch.sie_block->ecb2 |= 0x80;
473 vcpu->arch.sie_block->ecb2 &= ~0x08;
474 vcpu->arch.sie_block->cbrlo = page_to_phys(cbrl);
477 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
478 tasklet_init(&vcpu->arch.tasklet, kvm_s390_tasklet,
479 (unsigned long) vcpu);
480 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
481 get_cpu_id(&vcpu->arch.cpu_id);
482 vcpu->arch.cpu_id.version = 0xff;
486 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
489 struct kvm_vcpu *vcpu;
490 struct sie_page *sie_page;
493 if (id >= KVM_MAX_VCPUS)
498 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
502 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
506 vcpu->arch.sie_block = &sie_page->sie_block;
507 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
509 vcpu->arch.sie_block->icpua = id;
510 if (!kvm_is_ucontrol(kvm)) {
511 if (!kvm->arch.sca) {
515 if (!kvm->arch.sca->cpu[id].sda)
516 kvm->arch.sca->cpu[id].sda =
517 (__u64) vcpu->arch.sie_block;
518 vcpu->arch.sie_block->scaoh =
519 (__u32)(((__u64)kvm->arch.sca) >> 32);
520 vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca;
521 set_bit(63 - id, (unsigned long *) &kvm->arch.sca->mcn);
524 spin_lock_init(&vcpu->arch.local_int.lock);
525 INIT_LIST_HEAD(&vcpu->arch.local_int.list);
526 vcpu->arch.local_int.float_int = &kvm->arch.float_int;
527 vcpu->arch.local_int.wq = &vcpu->wq;
528 vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
530 rc = kvm_vcpu_init(vcpu, kvm, id);
532 goto out_free_sie_block;
533 VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu,
534 vcpu->arch.sie_block);
535 trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
539 free_page((unsigned long)(vcpu->arch.sie_block));
541 kmem_cache_free(kvm_vcpu_cache, vcpu);
546 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
548 return kvm_cpu_has_interrupt(vcpu);
551 void s390_vcpu_block(struct kvm_vcpu *vcpu)
553 atomic_set_mask(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
556 void s390_vcpu_unblock(struct kvm_vcpu *vcpu)
558 atomic_clear_mask(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
562 * Kick a guest cpu out of SIE and wait until SIE is not running.
563 * If the CPU is not running (e.g. waiting as idle) the function will
564 * return immediately. */
565 void exit_sie(struct kvm_vcpu *vcpu)
567 atomic_set_mask(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
568 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
572 /* Kick a guest cpu out of SIE and prevent SIE-reentry */
573 void exit_sie_sync(struct kvm_vcpu *vcpu)
575 s390_vcpu_block(vcpu);
579 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address)
582 struct kvm *kvm = gmap->private;
583 struct kvm_vcpu *vcpu;
585 kvm_for_each_vcpu(i, vcpu, kvm) {
586 /* match against both prefix pages */
587 if (vcpu->arch.sie_block->prefix == (address & ~0x1000UL)) {
588 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx", address);
589 kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
595 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
597 /* kvm common code refers to this, but never calls it */
602 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
603 struct kvm_one_reg *reg)
608 case KVM_REG_S390_TODPR:
609 r = put_user(vcpu->arch.sie_block->todpr,
610 (u32 __user *)reg->addr);
612 case KVM_REG_S390_EPOCHDIFF:
613 r = put_user(vcpu->arch.sie_block->epoch,
614 (u64 __user *)reg->addr);
616 case KVM_REG_S390_CPU_TIMER:
617 r = put_user(vcpu->arch.sie_block->cputm,
618 (u64 __user *)reg->addr);
620 case KVM_REG_S390_CLOCK_COMP:
621 r = put_user(vcpu->arch.sie_block->ckc,
622 (u64 __user *)reg->addr);
624 case KVM_REG_S390_PFTOKEN:
625 r = put_user(vcpu->arch.pfault_token,
626 (u64 __user *)reg->addr);
628 case KVM_REG_S390_PFCOMPARE:
629 r = put_user(vcpu->arch.pfault_compare,
630 (u64 __user *)reg->addr);
632 case KVM_REG_S390_PFSELECT:
633 r = put_user(vcpu->arch.pfault_select,
634 (u64 __user *)reg->addr);
636 case KVM_REG_S390_PP:
637 r = put_user(vcpu->arch.sie_block->pp,
638 (u64 __user *)reg->addr);
640 case KVM_REG_S390_GBEA:
641 r = put_user(vcpu->arch.sie_block->gbea,
642 (u64 __user *)reg->addr);
651 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
652 struct kvm_one_reg *reg)
657 case KVM_REG_S390_TODPR:
658 r = get_user(vcpu->arch.sie_block->todpr,
659 (u32 __user *)reg->addr);
661 case KVM_REG_S390_EPOCHDIFF:
662 r = get_user(vcpu->arch.sie_block->epoch,
663 (u64 __user *)reg->addr);
665 case KVM_REG_S390_CPU_TIMER:
666 r = get_user(vcpu->arch.sie_block->cputm,
667 (u64 __user *)reg->addr);
669 case KVM_REG_S390_CLOCK_COMP:
670 r = get_user(vcpu->arch.sie_block->ckc,
671 (u64 __user *)reg->addr);
673 case KVM_REG_S390_PFTOKEN:
674 r = get_user(vcpu->arch.pfault_token,
675 (u64 __user *)reg->addr);
677 case KVM_REG_S390_PFCOMPARE:
678 r = get_user(vcpu->arch.pfault_compare,
679 (u64 __user *)reg->addr);
681 case KVM_REG_S390_PFSELECT:
682 r = get_user(vcpu->arch.pfault_select,
683 (u64 __user *)reg->addr);
685 case KVM_REG_S390_PP:
686 r = get_user(vcpu->arch.sie_block->pp,
687 (u64 __user *)reg->addr);
689 case KVM_REG_S390_GBEA:
690 r = get_user(vcpu->arch.sie_block->gbea,
691 (u64 __user *)reg->addr);
700 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
702 kvm_s390_vcpu_initial_reset(vcpu);
706 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
708 memcpy(&vcpu->run->s.regs.gprs, ®s->gprs, sizeof(regs->gprs));
712 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
714 memcpy(®s->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
718 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
719 struct kvm_sregs *sregs)
721 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
722 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
723 restore_access_regs(vcpu->run->s.regs.acrs);
727 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
728 struct kvm_sregs *sregs)
730 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
731 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
735 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
737 if (test_fp_ctl(fpu->fpc))
739 memcpy(&vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs));
740 vcpu->arch.guest_fpregs.fpc = fpu->fpc;
741 restore_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
742 restore_fp_regs(vcpu->arch.guest_fpregs.fprs);
746 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
748 memcpy(&fpu->fprs, &vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs));
749 fpu->fpc = vcpu->arch.guest_fpregs.fpc;
753 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
757 if (!(atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_STOPPED))
760 vcpu->run->psw_mask = psw.mask;
761 vcpu->run->psw_addr = psw.addr;
766 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
767 struct kvm_translation *tr)
769 return -EINVAL; /* not implemented yet */
772 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
773 struct kvm_guest_debug *dbg)
775 return -EINVAL; /* not implemented yet */
778 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
779 struct kvm_mp_state *mp_state)
781 return -EINVAL; /* not implemented yet */
784 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
785 struct kvm_mp_state *mp_state)
787 return -EINVAL; /* not implemented yet */
790 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
793 * We use MMU_RELOAD just to re-arm the ipte notifier for the
794 * guest prefix page. gmap_ipte_notify will wait on the ptl lock.
795 * This ensures that the ipte instruction for this request has
796 * already finished. We might race against a second unmapper that
797 * wants to set the blocking bit. Lets just retry the request loop.
799 while (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
801 rc = gmap_ipte_notify(vcpu->arch.gmap,
802 vcpu->arch.sie_block->prefix,
806 s390_vcpu_unblock(vcpu);
811 static long kvm_arch_fault_in_sync(struct kvm_vcpu *vcpu)
814 hva_t fault = gmap_fault(current->thread.gmap_addr, vcpu->arch.gmap);
815 struct mm_struct *mm = current->mm;
816 down_read(&mm->mmap_sem);
817 rc = get_user_pages(current, mm, fault, 1, 1, 0, NULL, NULL);
818 up_read(&mm->mmap_sem);
822 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
825 struct kvm_s390_interrupt inti;
829 inti.type = KVM_S390_INT_PFAULT_INIT;
830 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &inti));
832 inti.type = KVM_S390_INT_PFAULT_DONE;
833 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
837 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
838 struct kvm_async_pf *work)
840 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
841 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
844 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
845 struct kvm_async_pf *work)
847 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
848 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
851 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
852 struct kvm_async_pf *work)
854 /* s390 will always inject the page directly */
857 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
860 * s390 will always inject the page directly,
861 * but we still want check_async_completion to cleanup
866 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
869 struct kvm_arch_async_pf arch;
872 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
874 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
875 vcpu->arch.pfault_compare)
877 if (psw_extint_disabled(vcpu))
879 if (kvm_cpu_has_interrupt(vcpu))
881 if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
883 if (!vcpu->arch.gmap->pfault_enabled)
886 hva = gmap_fault(current->thread.gmap_addr, vcpu->arch.gmap);
887 if (copy_from_guest(vcpu, &arch.pfault_token, vcpu->arch.pfault_token, 8))
890 rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
894 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
899 * On s390 notifications for arriving pages will be delivered directly
900 * to the guest but the house keeping for completed pfaults is
901 * handled outside the worker.
903 kvm_check_async_pf_completion(vcpu);
905 memcpy(&vcpu->arch.sie_block->gg14, &vcpu->run->s.regs.gprs[14], 16);
910 if (test_cpu_flag(CIF_MCCK_PENDING))
913 if (!kvm_is_ucontrol(vcpu->kvm))
914 kvm_s390_deliver_pending_interrupts(vcpu);
916 rc = kvm_s390_handle_requests(vcpu);
920 vcpu->arch.sie_block->icptcode = 0;
921 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
922 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
923 trace_kvm_s390_sie_enter(vcpu, cpuflags);
928 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
932 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
933 vcpu->arch.sie_block->icptcode);
934 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
936 if (exit_reason >= 0) {
938 } else if (kvm_is_ucontrol(vcpu->kvm)) {
939 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
940 vcpu->run->s390_ucontrol.trans_exc_code =
941 current->thread.gmap_addr;
942 vcpu->run->s390_ucontrol.pgm_code = 0x10;
945 } else if (current->thread.gmap_pfault) {
946 trace_kvm_s390_major_guest_pfault(vcpu);
947 current->thread.gmap_pfault = 0;
948 if (kvm_arch_setup_async_pf(vcpu) ||
949 (kvm_arch_fault_in_sync(vcpu) >= 0))
954 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
955 trace_kvm_s390_sie_fault(vcpu);
956 rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
959 memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16);
962 if (kvm_is_ucontrol(vcpu->kvm))
963 /* Don't exit for host interrupts. */
964 rc = vcpu->arch.sie_block->icptcode ? -EOPNOTSUPP : 0;
966 rc = kvm_handle_sie_intercept(vcpu);
972 bool kvm_enabled_cmma(void)
974 if (!MACHINE_IS_LPAR)
976 /* only enable for z10 and later */
977 if (!MACHINE_HAS_EDAT1)
982 static int __vcpu_run(struct kvm_vcpu *vcpu)
987 * We try to hold kvm->srcu during most of vcpu_run (except when run-
988 * ning the guest), so that memslots (and other stuff) are protected
990 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
993 rc = vcpu_pre_run(vcpu);
997 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
999 * As PF_VCPU will be used in fault handler, between
1000 * guest_enter and guest_exit should be no uaccess.
1005 exit_reason = sie64a(vcpu->arch.sie_block,
1006 vcpu->run->s.regs.gprs);
1008 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
1010 rc = vcpu_post_run(vcpu, exit_reason);
1011 } while (!signal_pending(current) && !rc);
1013 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
1017 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1022 if (vcpu->sigset_active)
1023 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
1025 atomic_clear_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
1027 switch (kvm_run->exit_reason) {
1028 case KVM_EXIT_S390_SIEIC:
1029 case KVM_EXIT_UNKNOWN:
1031 case KVM_EXIT_S390_RESET:
1032 case KVM_EXIT_S390_UCONTROL:
1033 case KVM_EXIT_S390_TSCH:
1039 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
1040 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
1041 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX) {
1042 kvm_run->kvm_dirty_regs &= ~KVM_SYNC_PREFIX;
1043 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
1045 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
1046 kvm_run->kvm_dirty_regs &= ~KVM_SYNC_CRS;
1047 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
1048 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
1052 rc = __vcpu_run(vcpu);
1054 if (signal_pending(current) && !rc) {
1055 kvm_run->exit_reason = KVM_EXIT_INTR;
1059 if (rc == -EOPNOTSUPP) {
1060 /* intercept cannot be handled in-kernel, prepare kvm-run */
1061 kvm_run->exit_reason = KVM_EXIT_S390_SIEIC;
1062 kvm_run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
1063 kvm_run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
1064 kvm_run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
1068 if (rc == -EREMOTE) {
1069 /* intercept was handled, but userspace support is needed
1070 * kvm_run has been prepared by the handler */
1074 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
1075 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
1076 kvm_run->s.regs.prefix = vcpu->arch.sie_block->prefix;
1077 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
1079 if (vcpu->sigset_active)
1080 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
1082 vcpu->stat.exit_userspace++;
1086 static int __guestcopy(struct kvm_vcpu *vcpu, u64 guestdest, void *from,
1087 unsigned long n, int prefix)
1090 return copy_to_guest(vcpu, guestdest, from, n);
1092 return copy_to_guest_absolute(vcpu, guestdest, from, n);
1096 * store status at address
1097 * we use have two special cases:
1098 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
1099 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
1101 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr)
1103 unsigned char archmode = 1;
1107 if (addr == KVM_S390_STORE_STATUS_NOADDR) {
1108 if (copy_to_guest_absolute(vcpu, 163ul, &archmode, 1))
1110 addr = SAVE_AREA_BASE;
1112 } else if (addr == KVM_S390_STORE_STATUS_PREFIXED) {
1113 if (copy_to_guest(vcpu, 163ul, &archmode, 1))
1115 addr = SAVE_AREA_BASE;
1120 if (__guestcopy(vcpu, addr + offsetof(struct save_area, fp_regs),
1121 vcpu->arch.guest_fpregs.fprs, 128, prefix))
1124 if (__guestcopy(vcpu, addr + offsetof(struct save_area, gp_regs),
1125 vcpu->run->s.regs.gprs, 128, prefix))
1128 if (__guestcopy(vcpu, addr + offsetof(struct save_area, psw),
1129 &vcpu->arch.sie_block->gpsw, 16, prefix))
1132 if (__guestcopy(vcpu, addr + offsetof(struct save_area, pref_reg),
1133 &vcpu->arch.sie_block->prefix, 4, prefix))
1136 if (__guestcopy(vcpu,
1137 addr + offsetof(struct save_area, fp_ctrl_reg),
1138 &vcpu->arch.guest_fpregs.fpc, 4, prefix))
1141 if (__guestcopy(vcpu, addr + offsetof(struct save_area, tod_reg),
1142 &vcpu->arch.sie_block->todpr, 4, prefix))
1145 if (__guestcopy(vcpu, addr + offsetof(struct save_area, timer),
1146 &vcpu->arch.sie_block->cputm, 8, prefix))
1149 clkcomp = vcpu->arch.sie_block->ckc >> 8;
1150 if (__guestcopy(vcpu, addr + offsetof(struct save_area, clk_cmp),
1151 &clkcomp, 8, prefix))
1154 if (__guestcopy(vcpu, addr + offsetof(struct save_area, acc_regs),
1155 &vcpu->run->s.regs.acrs, 64, prefix))
1158 if (__guestcopy(vcpu,
1159 addr + offsetof(struct save_area, ctrl_regs),
1160 &vcpu->arch.sie_block->gcr, 128, prefix))
1165 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
1168 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
1169 * copying in vcpu load/put. Lets update our copies before we save
1170 * it into the save area
1172 save_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
1173 save_fp_regs(vcpu->arch.guest_fpregs.fprs);
1174 save_access_regs(vcpu->run->s.regs.acrs);
1176 return kvm_s390_store_status_unloaded(vcpu, addr);
1179 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
1180 struct kvm_enable_cap *cap)
1188 case KVM_CAP_S390_CSS_SUPPORT:
1189 if (!vcpu->kvm->arch.css_support) {
1190 vcpu->kvm->arch.css_support = 1;
1191 trace_kvm_s390_enable_css(vcpu->kvm);
1202 long kvm_arch_vcpu_ioctl(struct file *filp,
1203 unsigned int ioctl, unsigned long arg)
1205 struct kvm_vcpu *vcpu = filp->private_data;
1206 void __user *argp = (void __user *)arg;
1211 case KVM_S390_INTERRUPT: {
1212 struct kvm_s390_interrupt s390int;
1215 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1217 r = kvm_s390_inject_vcpu(vcpu, &s390int);
1220 case KVM_S390_STORE_STATUS:
1221 idx = srcu_read_lock(&vcpu->kvm->srcu);
1222 r = kvm_s390_vcpu_store_status(vcpu, arg);
1223 srcu_read_unlock(&vcpu->kvm->srcu, idx);
1225 case KVM_S390_SET_INITIAL_PSW: {
1229 if (copy_from_user(&psw, argp, sizeof(psw)))
1231 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
1234 case KVM_S390_INITIAL_RESET:
1235 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
1237 case KVM_SET_ONE_REG:
1238 case KVM_GET_ONE_REG: {
1239 struct kvm_one_reg reg;
1241 if (copy_from_user(®, argp, sizeof(reg)))
1243 if (ioctl == KVM_SET_ONE_REG)
1244 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, ®);
1246 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, ®);
1249 #ifdef CONFIG_KVM_S390_UCONTROL
1250 case KVM_S390_UCAS_MAP: {
1251 struct kvm_s390_ucas_mapping ucasmap;
1253 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
1258 if (!kvm_is_ucontrol(vcpu->kvm)) {
1263 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
1264 ucasmap.vcpu_addr, ucasmap.length);
1267 case KVM_S390_UCAS_UNMAP: {
1268 struct kvm_s390_ucas_mapping ucasmap;
1270 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
1275 if (!kvm_is_ucontrol(vcpu->kvm)) {
1280 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
1285 case KVM_S390_VCPU_FAULT: {
1286 r = gmap_fault(arg, vcpu->arch.gmap);
1287 if (!IS_ERR_VALUE(r))
1291 case KVM_ENABLE_CAP:
1293 struct kvm_enable_cap cap;
1295 if (copy_from_user(&cap, argp, sizeof(cap)))
1297 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
1306 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
1308 #ifdef CONFIG_KVM_S390_UCONTROL
1309 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
1310 && (kvm_is_ucontrol(vcpu->kvm))) {
1311 vmf->page = virt_to_page(vcpu->arch.sie_block);
1312 get_page(vmf->page);
1316 return VM_FAULT_SIGBUS;
1319 void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
1320 struct kvm_memory_slot *dont)
1324 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
1325 unsigned long npages)
1330 void kvm_arch_memslots_updated(struct kvm *kvm)
1334 /* Section: memory related */
1335 int kvm_arch_prepare_memory_region(struct kvm *kvm,
1336 struct kvm_memory_slot *memslot,
1337 struct kvm_userspace_memory_region *mem,
1338 enum kvm_mr_change change)
1340 /* A few sanity checks. We can have memory slots which have to be
1341 located/ended at a segment boundary (1MB). The memory in userland is
1342 ok to be fragmented into various different vmas. It is okay to mmap()
1343 and munmap() stuff in this slot after doing this call at any time */
1345 if (mem->userspace_addr & 0xffffful)
1348 if (mem->memory_size & 0xffffful)
1354 void kvm_arch_commit_memory_region(struct kvm *kvm,
1355 struct kvm_userspace_memory_region *mem,
1356 const struct kvm_memory_slot *old,
1357 enum kvm_mr_change change)
1361 /* If the basics of the memslot do not change, we do not want
1362 * to update the gmap. Every update causes several unnecessary
1363 * segment translation exceptions. This is usually handled just
1364 * fine by the normal fault handler + gmap, but it will also
1365 * cause faults on the prefix page of running guest CPUs.
1367 if (old->userspace_addr == mem->userspace_addr &&
1368 old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
1369 old->npages * PAGE_SIZE == mem->memory_size)
1372 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
1373 mem->guest_phys_addr, mem->memory_size);
1375 printk(KERN_WARNING "kvm-s390: failed to commit memory region\n");
1379 void kvm_arch_flush_shadow_all(struct kvm *kvm)
1383 void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
1384 struct kvm_memory_slot *slot)
1388 static int __init kvm_s390_init(void)
1391 ret = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
1396 * guests can ask for up to 255+1 double words, we need a full page
1397 * to hold the maximum amount of facilities. On the other hand, we
1398 * only set facilities that are known to work in KVM.
1400 vfacilities = (unsigned long *) get_zeroed_page(GFP_KERNEL|GFP_DMA);
1405 memcpy(vfacilities, S390_lowcore.stfle_fac_list, 16);
1406 vfacilities[0] &= 0xff82fff3f4fc2000UL;
1407 vfacilities[1] &= 0x005c000000000000UL;
1411 static void __exit kvm_s390_exit(void)
1413 free_page((unsigned long) vfacilities);
1417 module_init(kvm_s390_init);
1418 module_exit(kvm_s390_exit);
1421 * Enable autoloading of the kvm module.
1422 * Note that we add the module alias here instead of virt/kvm/kvm_main.c
1423 * since x86 takes a different approach.
1425 #include <linux/miscdevice.h>
1426 MODULE_ALIAS_MISCDEV(KVM_MINOR);
1427 MODULE_ALIAS("devname:kvm");