2 * s390host.c -- hosting zSeries kernel virtual machines
4 * Copyright IBM Corp. 2008,2009
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
10 * Author(s): Carsten Otte <cotte@de.ibm.com>
11 * Christian Borntraeger <borntraeger@de.ibm.com>
12 * Heiko Carstens <heiko.carstens@de.ibm.com>
13 * Christian Ehrhardt <ehrhardt@de.ibm.com>
16 #include <linux/compiler.h>
17 #include <linux/err.h>
19 #include <linux/hrtimer.h>
20 #include <linux/init.h>
21 #include <linux/kvm.h>
22 #include <linux/kvm_host.h>
23 #include <linux/module.h>
24 #include <linux/slab.h>
25 #include <linux/timer.h>
26 #include <asm/lowcore.h>
27 #include <asm/pgtable.h>
29 #include <asm/system.h>
33 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
35 struct kvm_stats_debugfs_item debugfs_entries[] = {
36 { "userspace_handled", VCPU_STAT(exit_userspace) },
37 { "exit_null", VCPU_STAT(exit_null) },
38 { "exit_validity", VCPU_STAT(exit_validity) },
39 { "exit_stop_request", VCPU_STAT(exit_stop_request) },
40 { "exit_external_request", VCPU_STAT(exit_external_request) },
41 { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
42 { "exit_instruction", VCPU_STAT(exit_instruction) },
43 { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
44 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
45 { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
46 { "instruction_lctl", VCPU_STAT(instruction_lctl) },
47 { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
48 { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
49 { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
50 { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
51 { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
52 { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
53 { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
54 { "exit_wait_state", VCPU_STAT(exit_wait_state) },
55 { "instruction_stidp", VCPU_STAT(instruction_stidp) },
56 { "instruction_spx", VCPU_STAT(instruction_spx) },
57 { "instruction_stpx", VCPU_STAT(instruction_stpx) },
58 { "instruction_stap", VCPU_STAT(instruction_stap) },
59 { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
60 { "instruction_stsch", VCPU_STAT(instruction_stsch) },
61 { "instruction_chsc", VCPU_STAT(instruction_chsc) },
62 { "instruction_stsi", VCPU_STAT(instruction_stsi) },
63 { "instruction_stfl", VCPU_STAT(instruction_stfl) },
64 { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
65 { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
66 { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
67 { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
68 { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
69 { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
70 { "diagnose_44", VCPU_STAT(diagnose_44) },
74 static unsigned long long *facilities;
76 /* Section: not file related */
77 void kvm_arch_hardware_enable(void *garbage)
79 /* every s390 is virtualization enabled ;-) */
82 void kvm_arch_hardware_disable(void *garbage)
86 int kvm_arch_hardware_setup(void)
91 void kvm_arch_hardware_unsetup(void)
95 void kvm_arch_check_processor_compat(void *rtn)
99 int kvm_arch_init(void *opaque)
104 void kvm_arch_exit(void)
108 /* Section: device related */
109 long kvm_arch_dev_ioctl(struct file *filp,
110 unsigned int ioctl, unsigned long arg)
112 if (ioctl == KVM_S390_ENABLE_SIE)
113 return s390_enable_sie();
117 int kvm_dev_ioctl_check_extension(long ext)
122 case KVM_CAP_S390_PSW:
131 /* Section: vm related */
133 * Get (and clear) the dirty memory log for a memory slot.
135 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
136 struct kvm_dirty_log *log)
141 long kvm_arch_vm_ioctl(struct file *filp,
142 unsigned int ioctl, unsigned long arg)
144 struct kvm *kvm = filp->private_data;
145 void __user *argp = (void __user *)arg;
149 case KVM_S390_INTERRUPT: {
150 struct kvm_s390_interrupt s390int;
153 if (copy_from_user(&s390int, argp, sizeof(s390int)))
155 r = kvm_s390_inject_vm(kvm, &s390int);
165 struct kvm *kvm_arch_create_vm(void)
171 rc = s390_enable_sie();
176 kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL);
180 kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL);
184 sprintf(debug_name, "kvm-%u", current->pid);
186 kvm->arch.dbf = debug_register(debug_name, 8, 2, 8 * sizeof(long));
190 spin_lock_init(&kvm->arch.float_int.lock);
191 INIT_LIST_HEAD(&kvm->arch.float_int.list);
193 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
194 VM_EVENT(kvm, 3, "%s", "vm created");
198 free_page((unsigned long)(kvm->arch.sca));
205 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
207 VCPU_EVENT(vcpu, 3, "%s", "free cpu");
208 if (vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda ==
209 (__u64) vcpu->arch.sie_block)
210 vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda = 0;
212 free_page((unsigned long)(vcpu->arch.sie_block));
213 kvm_vcpu_uninit(vcpu);
217 static void kvm_free_vcpus(struct kvm *kvm)
220 struct kvm_vcpu *vcpu;
222 kvm_for_each_vcpu(i, vcpu, kvm)
223 kvm_arch_vcpu_destroy(vcpu);
225 mutex_lock(&kvm->lock);
226 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
227 kvm->vcpus[i] = NULL;
229 atomic_set(&kvm->online_vcpus, 0);
230 mutex_unlock(&kvm->lock);
233 void kvm_arch_sync_events(struct kvm *kvm)
237 void kvm_arch_destroy_vm(struct kvm *kvm)
240 kvm_free_physmem(kvm);
241 free_page((unsigned long)(kvm->arch.sca));
242 debug_unregister(kvm->arch.dbf);
246 /* Section: vcpu related */
247 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
252 void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
257 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
259 save_fp_regs(&vcpu->arch.host_fpregs);
260 save_access_regs(vcpu->arch.host_acrs);
261 vcpu->arch.guest_fpregs.fpc &= FPC_VALID_MASK;
262 restore_fp_regs(&vcpu->arch.guest_fpregs);
263 restore_access_regs(vcpu->arch.guest_acrs);
266 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
268 save_fp_regs(&vcpu->arch.guest_fpregs);
269 save_access_regs(vcpu->arch.guest_acrs);
270 restore_fp_regs(&vcpu->arch.host_fpregs);
271 restore_access_regs(vcpu->arch.host_acrs);
274 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
276 /* this equals initial cpu reset in pop, but we don't switch to ESA */
277 vcpu->arch.sie_block->gpsw.mask = 0UL;
278 vcpu->arch.sie_block->gpsw.addr = 0UL;
279 vcpu->arch.sie_block->prefix = 0UL;
280 vcpu->arch.sie_block->ihcpu = 0xffff;
281 vcpu->arch.sie_block->cputm = 0UL;
282 vcpu->arch.sie_block->ckc = 0UL;
283 vcpu->arch.sie_block->todpr = 0;
284 memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
285 vcpu->arch.sie_block->gcr[0] = 0xE0UL;
286 vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
287 vcpu->arch.guest_fpregs.fpc = 0;
288 asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc));
289 vcpu->arch.sie_block->gbea = 1;
292 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
294 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH);
295 set_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests);
296 vcpu->arch.sie_block->ecb = 2;
297 vcpu->arch.sie_block->eca = 0xC1002001U;
298 vcpu->arch.sie_block->fac = (int) (long) facilities;
299 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
300 tasklet_init(&vcpu->arch.tasklet, kvm_s390_tasklet,
301 (unsigned long) vcpu);
302 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
303 get_cpu_id(&vcpu->arch.cpu_id);
304 vcpu->arch.cpu_id.version = 0xff;
308 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
311 struct kvm_vcpu *vcpu = kzalloc(sizeof(struct kvm_vcpu), GFP_KERNEL);
317 vcpu->arch.sie_block = (struct kvm_s390_sie_block *)
318 get_zeroed_page(GFP_KERNEL);
320 if (!vcpu->arch.sie_block)
323 vcpu->arch.sie_block->icpua = id;
324 BUG_ON(!kvm->arch.sca);
325 if (!kvm->arch.sca->cpu[id].sda)
326 kvm->arch.sca->cpu[id].sda = (__u64) vcpu->arch.sie_block;
327 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)kvm->arch.sca) >> 32);
328 vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca;
330 spin_lock_init(&vcpu->arch.local_int.lock);
331 INIT_LIST_HEAD(&vcpu->arch.local_int.list);
332 vcpu->arch.local_int.float_int = &kvm->arch.float_int;
333 spin_lock(&kvm->arch.float_int.lock);
334 kvm->arch.float_int.local_int[id] = &vcpu->arch.local_int;
335 init_waitqueue_head(&vcpu->arch.local_int.wq);
336 vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
337 spin_unlock(&kvm->arch.float_int.lock);
339 rc = kvm_vcpu_init(vcpu, kvm, id);
341 goto out_free_sie_block;
342 VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu,
343 vcpu->arch.sie_block);
347 free_page((unsigned long)(vcpu->arch.sie_block));
354 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
356 /* kvm common code refers to this, but never calls it */
361 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
364 kvm_s390_vcpu_initial_reset(vcpu);
369 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
372 memcpy(&vcpu->arch.guest_gprs, ®s->gprs, sizeof(regs->gprs));
377 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
380 memcpy(®s->gprs, &vcpu->arch.guest_gprs, sizeof(regs->gprs));
385 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
386 struct kvm_sregs *sregs)
389 memcpy(&vcpu->arch.guest_acrs, &sregs->acrs, sizeof(sregs->acrs));
390 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
395 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
396 struct kvm_sregs *sregs)
399 memcpy(&sregs->acrs, &vcpu->arch.guest_acrs, sizeof(sregs->acrs));
400 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
405 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
408 memcpy(&vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs));
409 vcpu->arch.guest_fpregs.fpc = fpu->fpc;
414 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
417 memcpy(&fpu->fprs, &vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs));
418 fpu->fpc = vcpu->arch.guest_fpregs.fpc;
423 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
428 if (atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_RUNNING)
431 vcpu->run->psw_mask = psw.mask;
432 vcpu->run->psw_addr = psw.addr;
438 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
439 struct kvm_translation *tr)
441 return -EINVAL; /* not implemented yet */
444 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
445 struct kvm_guest_debug *dbg)
447 return -EINVAL; /* not implemented yet */
450 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
451 struct kvm_mp_state *mp_state)
453 return -EINVAL; /* not implemented yet */
456 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
457 struct kvm_mp_state *mp_state)
459 return -EINVAL; /* not implemented yet */
462 static void __vcpu_run(struct kvm_vcpu *vcpu)
464 memcpy(&vcpu->arch.sie_block->gg14, &vcpu->arch.guest_gprs[14], 16);
469 if (test_thread_flag(TIF_MCCK_PENDING))
472 kvm_s390_deliver_pending_interrupts(vcpu);
474 vcpu->arch.sie_block->icptcode = 0;
478 VCPU_EVENT(vcpu, 6, "entering sie flags %x",
479 atomic_read(&vcpu->arch.sie_block->cpuflags));
480 if (sie64a(vcpu->arch.sie_block, vcpu->arch.guest_gprs)) {
481 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
482 kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
484 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
485 vcpu->arch.sie_block->icptcode);
490 memcpy(&vcpu->arch.guest_gprs[14], &vcpu->arch.sie_block->gg14, 16);
493 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
502 if (test_and_clear_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests))
503 kvm_s390_vcpu_set_mem(vcpu);
505 /* verify, that memory has been registered */
506 if (!vcpu->arch.sie_block->gmslm) {
508 VCPU_EVENT(vcpu, 3, "%s", "no memory registered to run vcpu");
512 if (vcpu->sigset_active)
513 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
515 atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
517 BUG_ON(vcpu->kvm->arch.float_int.local_int[vcpu->vcpu_id] == NULL);
519 switch (kvm_run->exit_reason) {
520 case KVM_EXIT_S390_SIEIC:
521 case KVM_EXIT_UNKNOWN:
523 case KVM_EXIT_S390_RESET:
529 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
530 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
536 rc = kvm_handle_sie_intercept(vcpu);
537 } while (!signal_pending(current) && !rc);
539 if (rc == SIE_INTERCEPT_RERUNVCPU)
542 if (signal_pending(current) && !rc) {
543 kvm_run->exit_reason = KVM_EXIT_INTR;
547 if (rc == -ENOTSUPP) {
548 /* intercept cannot be handled in-kernel, prepare kvm-run */
549 kvm_run->exit_reason = KVM_EXIT_S390_SIEIC;
550 kvm_run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
551 kvm_run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
552 kvm_run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
556 if (rc == -EREMOTE) {
557 /* intercept was handled, but userspace support is needed
558 * kvm_run has been prepared by the handler */
562 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
563 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
565 if (vcpu->sigset_active)
566 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
570 vcpu->stat.exit_userspace++;
574 static int __guestcopy(struct kvm_vcpu *vcpu, u64 guestdest, const void *from,
575 unsigned long n, int prefix)
578 return copy_to_guest(vcpu, guestdest, from, n);
580 return copy_to_guest_absolute(vcpu, guestdest, from, n);
584 * store status at address
585 * we use have two special cases:
586 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
587 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
589 int __kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
591 const unsigned char archmode = 1;
594 if (addr == KVM_S390_STORE_STATUS_NOADDR) {
595 if (copy_to_guest_absolute(vcpu, 163ul, &archmode, 1))
597 addr = SAVE_AREA_BASE;
599 } else if (addr == KVM_S390_STORE_STATUS_PREFIXED) {
600 if (copy_to_guest(vcpu, 163ul, &archmode, 1))
602 addr = SAVE_AREA_BASE;
607 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, fp_regs),
608 vcpu->arch.guest_fpregs.fprs, 128, prefix))
611 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, gp_regs),
612 vcpu->arch.guest_gprs, 128, prefix))
615 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, psw),
616 &vcpu->arch.sie_block->gpsw, 16, prefix))
619 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, pref_reg),
620 &vcpu->arch.sie_block->prefix, 4, prefix))
623 if (__guestcopy(vcpu,
624 addr + offsetof(struct save_area_s390x, fp_ctrl_reg),
625 &vcpu->arch.guest_fpregs.fpc, 4, prefix))
628 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, tod_reg),
629 &vcpu->arch.sie_block->todpr, 4, prefix))
632 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, timer),
633 &vcpu->arch.sie_block->cputm, 8, prefix))
636 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, clk_cmp),
637 &vcpu->arch.sie_block->ckc, 8, prefix))
640 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, acc_regs),
641 &vcpu->arch.guest_acrs, 64, prefix))
644 if (__guestcopy(vcpu,
645 addr + offsetof(struct save_area_s390x, ctrl_regs),
646 &vcpu->arch.sie_block->gcr, 128, prefix))
651 static int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
656 rc = __kvm_s390_vcpu_store_status(vcpu, addr);
661 long kvm_arch_vcpu_ioctl(struct file *filp,
662 unsigned int ioctl, unsigned long arg)
664 struct kvm_vcpu *vcpu = filp->private_data;
665 void __user *argp = (void __user *)arg;
668 case KVM_S390_INTERRUPT: {
669 struct kvm_s390_interrupt s390int;
671 if (copy_from_user(&s390int, argp, sizeof(s390int)))
673 return kvm_s390_inject_vcpu(vcpu, &s390int);
675 case KVM_S390_STORE_STATUS:
676 return kvm_s390_vcpu_store_status(vcpu, arg);
677 case KVM_S390_SET_INITIAL_PSW: {
680 if (copy_from_user(&psw, argp, sizeof(psw)))
682 return kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
684 case KVM_S390_INITIAL_RESET:
685 return kvm_arch_vcpu_ioctl_initial_reset(vcpu);
692 /* Section: memory related */
693 int kvm_arch_set_memory_region(struct kvm *kvm,
694 struct kvm_userspace_memory_region *mem,
695 struct kvm_memory_slot old,
699 struct kvm_vcpu *vcpu;
701 /* A few sanity checks. We can have exactly one memory slot which has
702 to start at guest virtual zero and which has to be located at a
703 page boundary in userland and which has to end at a page boundary.
704 The memory in userland is ok to be fragmented into various different
705 vmas. It is okay to mmap() and munmap() stuff in this slot after
706 doing this call at any time */
711 if (mem->guest_phys_addr)
714 if (mem->userspace_addr & (PAGE_SIZE - 1))
717 if (mem->memory_size & (PAGE_SIZE - 1))
723 /* request update of sie control block for all available vcpus */
724 kvm_for_each_vcpu(i, vcpu, kvm) {
725 if (test_and_set_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests))
727 kvm_s390_inject_sigp_stop(vcpu, ACTION_RELOADVCPU_ON_STOP);
733 void kvm_arch_flush_shadow(struct kvm *kvm)
737 gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
742 static int __init kvm_s390_init(void)
745 ret = kvm_init(NULL, sizeof(struct kvm_vcpu), THIS_MODULE);
750 * guests can ask for up to 255+1 double words, we need a full page
751 * to hold the maximum amount of facilites. On the other hand, we
752 * only set facilities that are known to work in KVM.
754 facilities = (unsigned long long *) get_zeroed_page(GFP_DMA);
759 stfle(facilities, 1);
760 facilities[0] &= 0xff00fff3f0700000ULL;
764 static void __exit kvm_s390_exit(void)
766 free_page((unsigned long) facilities);
770 module_init(kvm_s390_init);
771 module_exit(kvm_s390_exit);