2 * s390host.c -- hosting zSeries kernel virtual machines
4 * Copyright IBM Corp. 2008,2009
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
10 * Author(s): Carsten Otte <cotte@de.ibm.com>
11 * Christian Borntraeger <borntraeger@de.ibm.com>
12 * Heiko Carstens <heiko.carstens@de.ibm.com>
13 * Christian Ehrhardt <ehrhardt@de.ibm.com>
16 #include <linux/compiler.h>
17 #include <linux/err.h>
19 #include <linux/hrtimer.h>
20 #include <linux/init.h>
21 #include <linux/kvm.h>
22 #include <linux/kvm_host.h>
23 #include <linux/module.h>
24 #include <linux/slab.h>
25 #include <linux/timer.h>
26 #include <asm/lowcore.h>
27 #include <asm/pgtable.h>
29 #include <asm/system.h>
33 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
35 struct kvm_stats_debugfs_item debugfs_entries[] = {
36 { "userspace_handled", VCPU_STAT(exit_userspace) },
37 { "exit_null", VCPU_STAT(exit_null) },
38 { "exit_validity", VCPU_STAT(exit_validity) },
39 { "exit_stop_request", VCPU_STAT(exit_stop_request) },
40 { "exit_external_request", VCPU_STAT(exit_external_request) },
41 { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
42 { "exit_instruction", VCPU_STAT(exit_instruction) },
43 { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
44 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
45 { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
46 { "instruction_lctl", VCPU_STAT(instruction_lctl) },
47 { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
48 { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
49 { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
50 { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
51 { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
52 { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
53 { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
54 { "exit_wait_state", VCPU_STAT(exit_wait_state) },
55 { "instruction_stidp", VCPU_STAT(instruction_stidp) },
56 { "instruction_spx", VCPU_STAT(instruction_spx) },
57 { "instruction_stpx", VCPU_STAT(instruction_stpx) },
58 { "instruction_stap", VCPU_STAT(instruction_stap) },
59 { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
60 { "instruction_stsch", VCPU_STAT(instruction_stsch) },
61 { "instruction_chsc", VCPU_STAT(instruction_chsc) },
62 { "instruction_stsi", VCPU_STAT(instruction_stsi) },
63 { "instruction_stfl", VCPU_STAT(instruction_stfl) },
64 { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
65 { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
66 { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
67 { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
68 { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
69 { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
70 { "diagnose_44", VCPU_STAT(diagnose_44) },
74 static unsigned long long *facilities;
76 /* Section: not file related */
77 int kvm_arch_hardware_enable(void *garbage)
79 /* every s390 is virtualization enabled ;-) */
83 void kvm_arch_hardware_disable(void *garbage)
87 int kvm_arch_hardware_setup(void)
92 void kvm_arch_hardware_unsetup(void)
96 void kvm_arch_check_processor_compat(void *rtn)
100 int kvm_arch_init(void *opaque)
105 void kvm_arch_exit(void)
109 /* Section: device related */
110 long kvm_arch_dev_ioctl(struct file *filp,
111 unsigned int ioctl, unsigned long arg)
113 if (ioctl == KVM_S390_ENABLE_SIE)
114 return s390_enable_sie();
118 int kvm_dev_ioctl_check_extension(long ext)
126 /* Section: vm related */
128 * Get (and clear) the dirty memory log for a memory slot.
130 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
131 struct kvm_dirty_log *log)
136 long kvm_arch_vm_ioctl(struct file *filp,
137 unsigned int ioctl, unsigned long arg)
139 struct kvm *kvm = filp->private_data;
140 void __user *argp = (void __user *)arg;
144 case KVM_S390_INTERRUPT: {
145 struct kvm_s390_interrupt s390int;
148 if (copy_from_user(&s390int, argp, sizeof(s390int)))
150 r = kvm_s390_inject_vm(kvm, &s390int);
160 struct kvm *kvm_arch_create_vm(void)
166 rc = s390_enable_sie();
171 kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL);
175 kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL);
179 sprintf(debug_name, "kvm-%u", current->pid);
181 kvm->arch.dbf = debug_register(debug_name, 8, 2, 8 * sizeof(long));
185 spin_lock_init(&kvm->arch.float_int.lock);
186 INIT_LIST_HEAD(&kvm->arch.float_int.list);
188 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
189 VM_EVENT(kvm, 3, "%s", "vm created");
193 free_page((unsigned long)(kvm->arch.sca));
200 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
202 VCPU_EVENT(vcpu, 3, "%s", "free cpu");
203 if (vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda ==
204 (__u64) vcpu->arch.sie_block)
205 vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda = 0;
207 free_page((unsigned long)(vcpu->arch.sie_block));
208 kvm_vcpu_uninit(vcpu);
212 static void kvm_free_vcpus(struct kvm *kvm)
215 struct kvm_vcpu *vcpu;
217 kvm_for_each_vcpu(i, vcpu, kvm)
218 kvm_arch_vcpu_destroy(vcpu);
220 mutex_lock(&kvm->lock);
221 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
222 kvm->vcpus[i] = NULL;
224 atomic_set(&kvm->online_vcpus, 0);
225 mutex_unlock(&kvm->lock);
228 void kvm_arch_sync_events(struct kvm *kvm)
232 void kvm_arch_destroy_vm(struct kvm *kvm)
235 kvm_free_physmem(kvm);
236 free_page((unsigned long)(kvm->arch.sca));
237 debug_unregister(kvm->arch.dbf);
241 /* Section: vcpu related */
242 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
247 void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
252 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
254 save_fp_regs(&vcpu->arch.host_fpregs);
255 save_access_regs(vcpu->arch.host_acrs);
256 vcpu->arch.guest_fpregs.fpc &= FPC_VALID_MASK;
257 restore_fp_regs(&vcpu->arch.guest_fpregs);
258 restore_access_regs(vcpu->arch.guest_acrs);
261 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
263 save_fp_regs(&vcpu->arch.guest_fpregs);
264 save_access_regs(vcpu->arch.guest_acrs);
265 restore_fp_regs(&vcpu->arch.host_fpregs);
266 restore_access_regs(vcpu->arch.host_acrs);
269 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
271 /* this equals initial cpu reset in pop, but we don't switch to ESA */
272 vcpu->arch.sie_block->gpsw.mask = 0UL;
273 vcpu->arch.sie_block->gpsw.addr = 0UL;
274 vcpu->arch.sie_block->prefix = 0UL;
275 vcpu->arch.sie_block->ihcpu = 0xffff;
276 vcpu->arch.sie_block->cputm = 0UL;
277 vcpu->arch.sie_block->ckc = 0UL;
278 vcpu->arch.sie_block->todpr = 0;
279 memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
280 vcpu->arch.sie_block->gcr[0] = 0xE0UL;
281 vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
282 vcpu->arch.guest_fpregs.fpc = 0;
283 asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc));
284 vcpu->arch.sie_block->gbea = 1;
287 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
289 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH);
290 set_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests);
291 vcpu->arch.sie_block->ecb = 2;
292 vcpu->arch.sie_block->eca = 0xC1002001U;
293 vcpu->arch.sie_block->fac = (int) (long) facilities;
294 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
295 tasklet_init(&vcpu->arch.tasklet, kvm_s390_tasklet,
296 (unsigned long) vcpu);
297 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
298 get_cpu_id(&vcpu->arch.cpu_id);
299 vcpu->arch.cpu_id.version = 0xff;
303 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
306 struct kvm_vcpu *vcpu = kzalloc(sizeof(struct kvm_vcpu), GFP_KERNEL);
312 vcpu->arch.sie_block = (struct kvm_s390_sie_block *)
313 get_zeroed_page(GFP_KERNEL);
315 if (!vcpu->arch.sie_block)
318 vcpu->arch.sie_block->icpua = id;
319 BUG_ON(!kvm->arch.sca);
320 if (!kvm->arch.sca->cpu[id].sda)
321 kvm->arch.sca->cpu[id].sda = (__u64) vcpu->arch.sie_block;
322 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)kvm->arch.sca) >> 32);
323 vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca;
325 spin_lock_init(&vcpu->arch.local_int.lock);
326 INIT_LIST_HEAD(&vcpu->arch.local_int.list);
327 vcpu->arch.local_int.float_int = &kvm->arch.float_int;
328 spin_lock(&kvm->arch.float_int.lock);
329 kvm->arch.float_int.local_int[id] = &vcpu->arch.local_int;
330 init_waitqueue_head(&vcpu->arch.local_int.wq);
331 vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
332 spin_unlock(&kvm->arch.float_int.lock);
334 rc = kvm_vcpu_init(vcpu, kvm, id);
337 VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu,
338 vcpu->arch.sie_block);
347 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
349 /* kvm common code refers to this, but never calls it */
354 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
357 kvm_s390_vcpu_initial_reset(vcpu);
362 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
365 memcpy(&vcpu->arch.guest_gprs, ®s->gprs, sizeof(regs->gprs));
370 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
373 memcpy(®s->gprs, &vcpu->arch.guest_gprs, sizeof(regs->gprs));
378 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
379 struct kvm_sregs *sregs)
382 memcpy(&vcpu->arch.guest_acrs, &sregs->acrs, sizeof(sregs->acrs));
383 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
388 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
389 struct kvm_sregs *sregs)
392 memcpy(&sregs->acrs, &vcpu->arch.guest_acrs, sizeof(sregs->acrs));
393 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
398 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
401 memcpy(&vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs));
402 vcpu->arch.guest_fpregs.fpc = fpu->fpc;
407 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
410 memcpy(&fpu->fprs, &vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs));
411 fpu->fpc = vcpu->arch.guest_fpregs.fpc;
416 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
421 if (atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_RUNNING)
424 vcpu->arch.sie_block->gpsw = psw;
429 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
430 struct kvm_translation *tr)
432 return -EINVAL; /* not implemented yet */
435 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
436 struct kvm_guest_debug *dbg)
438 return -EINVAL; /* not implemented yet */
441 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
442 struct kvm_mp_state *mp_state)
444 return -EINVAL; /* not implemented yet */
447 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
448 struct kvm_mp_state *mp_state)
450 return -EINVAL; /* not implemented yet */
453 static void __vcpu_run(struct kvm_vcpu *vcpu)
455 memcpy(&vcpu->arch.sie_block->gg14, &vcpu->arch.guest_gprs[14], 16);
460 if (test_thread_flag(TIF_MCCK_PENDING))
463 kvm_s390_deliver_pending_interrupts(vcpu);
465 vcpu->arch.sie_block->icptcode = 0;
469 VCPU_EVENT(vcpu, 6, "entering sie flags %x",
470 atomic_read(&vcpu->arch.sie_block->cpuflags));
471 if (sie64a(vcpu->arch.sie_block, vcpu->arch.guest_gprs)) {
472 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
473 kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
475 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
476 vcpu->arch.sie_block->icptcode);
481 memcpy(&vcpu->arch.guest_gprs[14], &vcpu->arch.sie_block->gg14, 16);
484 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
493 if (test_and_clear_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests))
494 kvm_s390_vcpu_set_mem(vcpu);
496 /* verify, that memory has been registered */
497 if (!vcpu->arch.sie_block->gmslm) {
499 VCPU_EVENT(vcpu, 3, "%s", "no memory registered to run vcpu");
503 if (vcpu->sigset_active)
504 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
506 atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
508 BUG_ON(vcpu->kvm->arch.float_int.local_int[vcpu->vcpu_id] == NULL);
510 switch (kvm_run->exit_reason) {
511 case KVM_EXIT_S390_SIEIC:
512 vcpu->arch.sie_block->gpsw.mask = kvm_run->s390_sieic.mask;
513 vcpu->arch.sie_block->gpsw.addr = kvm_run->s390_sieic.addr;
515 case KVM_EXIT_UNKNOWN:
517 case KVM_EXIT_S390_RESET:
527 rc = kvm_handle_sie_intercept(vcpu);
528 } while (!signal_pending(current) && !rc);
530 if (rc == SIE_INTERCEPT_RERUNVCPU)
533 if (signal_pending(current) && !rc) {
534 kvm_run->exit_reason = KVM_EXIT_INTR;
538 if (rc == -ENOTSUPP) {
539 /* intercept cannot be handled in-kernel, prepare kvm-run */
540 kvm_run->exit_reason = KVM_EXIT_S390_SIEIC;
541 kvm_run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
542 kvm_run->s390_sieic.mask = vcpu->arch.sie_block->gpsw.mask;
543 kvm_run->s390_sieic.addr = vcpu->arch.sie_block->gpsw.addr;
544 kvm_run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
545 kvm_run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
549 if (rc == -EREMOTE) {
550 /* intercept was handled, but userspace support is needed
551 * kvm_run has been prepared by the handler */
555 if (vcpu->sigset_active)
556 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
560 vcpu->stat.exit_userspace++;
564 static int __guestcopy(struct kvm_vcpu *vcpu, u64 guestdest, const void *from,
565 unsigned long n, int prefix)
568 return copy_to_guest(vcpu, guestdest, from, n);
570 return copy_to_guest_absolute(vcpu, guestdest, from, n);
574 * store status at address
575 * we use have two special cases:
576 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
577 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
579 int __kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
581 const unsigned char archmode = 1;
584 if (addr == KVM_S390_STORE_STATUS_NOADDR) {
585 if (copy_to_guest_absolute(vcpu, 163ul, &archmode, 1))
587 addr = SAVE_AREA_BASE;
589 } else if (addr == KVM_S390_STORE_STATUS_PREFIXED) {
590 if (copy_to_guest(vcpu, 163ul, &archmode, 1))
592 addr = SAVE_AREA_BASE;
597 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, fp_regs),
598 vcpu->arch.guest_fpregs.fprs, 128, prefix))
601 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, gp_regs),
602 vcpu->arch.guest_gprs, 128, prefix))
605 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, psw),
606 &vcpu->arch.sie_block->gpsw, 16, prefix))
609 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, pref_reg),
610 &vcpu->arch.sie_block->prefix, 4, prefix))
613 if (__guestcopy(vcpu,
614 addr + offsetof(struct save_area_s390x, fp_ctrl_reg),
615 &vcpu->arch.guest_fpregs.fpc, 4, prefix))
618 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, tod_reg),
619 &vcpu->arch.sie_block->todpr, 4, prefix))
622 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, timer),
623 &vcpu->arch.sie_block->cputm, 8, prefix))
626 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, clk_cmp),
627 &vcpu->arch.sie_block->ckc, 8, prefix))
630 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, acc_regs),
631 &vcpu->arch.guest_acrs, 64, prefix))
634 if (__guestcopy(vcpu,
635 addr + offsetof(struct save_area_s390x, ctrl_regs),
636 &vcpu->arch.sie_block->gcr, 128, prefix))
641 static int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
646 rc = __kvm_s390_vcpu_store_status(vcpu, addr);
651 long kvm_arch_vcpu_ioctl(struct file *filp,
652 unsigned int ioctl, unsigned long arg)
654 struct kvm_vcpu *vcpu = filp->private_data;
655 void __user *argp = (void __user *)arg;
658 case KVM_S390_INTERRUPT: {
659 struct kvm_s390_interrupt s390int;
661 if (copy_from_user(&s390int, argp, sizeof(s390int)))
663 return kvm_s390_inject_vcpu(vcpu, &s390int);
665 case KVM_S390_STORE_STATUS:
666 return kvm_s390_vcpu_store_status(vcpu, arg);
667 case KVM_S390_SET_INITIAL_PSW: {
670 if (copy_from_user(&psw, argp, sizeof(psw)))
672 return kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
674 case KVM_S390_INITIAL_RESET:
675 return kvm_arch_vcpu_ioctl_initial_reset(vcpu);
682 /* Section: memory related */
683 int kvm_arch_set_memory_region(struct kvm *kvm,
684 struct kvm_userspace_memory_region *mem,
685 struct kvm_memory_slot old,
689 struct kvm_vcpu *vcpu;
691 /* A few sanity checks. We can have exactly one memory slot which has
692 to start at guest virtual zero and which has to be located at a
693 page boundary in userland and which has to end at a page boundary.
694 The memory in userland is ok to be fragmented into various different
695 vmas. It is okay to mmap() and munmap() stuff in this slot after
696 doing this call at any time */
701 if (mem->guest_phys_addr)
704 if (mem->userspace_addr & (PAGE_SIZE - 1))
707 if (mem->memory_size & (PAGE_SIZE - 1))
713 /* request update of sie control block for all available vcpus */
714 kvm_for_each_vcpu(i, vcpu, kvm) {
715 if (test_and_set_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests))
717 kvm_s390_inject_sigp_stop(vcpu, ACTION_RELOADVCPU_ON_STOP);
723 void kvm_arch_flush_shadow(struct kvm *kvm)
727 gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
732 static int __init kvm_s390_init(void)
735 ret = kvm_init(NULL, sizeof(struct kvm_vcpu), THIS_MODULE);
740 * guests can ask for up to 255+1 double words, we need a full page
741 * to hold the maximum amount of facilites. On the other hand, we
742 * only set facilities that are known to work in KVM.
744 facilities = (unsigned long long *) get_zeroed_page(GFP_DMA);
749 stfle(facilities, 1);
750 facilities[0] &= 0xff00fff3f0700000ULL;
754 static void __exit kvm_s390_exit(void)
756 free_page((unsigned long) facilities);
760 module_init(kvm_s390_init);
761 module_exit(kvm_s390_exit);