2 * s390host.c -- hosting zSeries kernel virtual machines
4 * Copyright IBM Corp. 2008,2009
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
10 * Author(s): Carsten Otte <cotte@de.ibm.com>
11 * Christian Borntraeger <borntraeger@de.ibm.com>
12 * Heiko Carstens <heiko.carstens@de.ibm.com>
13 * Christian Ehrhardt <ehrhardt@de.ibm.com>
16 #include <linux/compiler.h>
17 #include <linux/err.h>
19 #include <linux/hrtimer.h>
20 #include <linux/init.h>
21 #include <linux/kvm.h>
22 #include <linux/kvm_host.h>
23 #include <linux/module.h>
24 #include <linux/slab.h>
25 #include <linux/timer.h>
26 #include <asm/asm-offsets.h>
27 #include <asm/lowcore.h>
28 #include <asm/pgtable.h>
30 #include <asm/system.h>
34 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
36 struct kvm_stats_debugfs_item debugfs_entries[] = {
37 { "userspace_handled", VCPU_STAT(exit_userspace) },
38 { "exit_null", VCPU_STAT(exit_null) },
39 { "exit_validity", VCPU_STAT(exit_validity) },
40 { "exit_stop_request", VCPU_STAT(exit_stop_request) },
41 { "exit_external_request", VCPU_STAT(exit_external_request) },
42 { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
43 { "exit_instruction", VCPU_STAT(exit_instruction) },
44 { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
45 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
46 { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
47 { "instruction_lctl", VCPU_STAT(instruction_lctl) },
48 { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
49 { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
50 { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
51 { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
52 { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
53 { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
54 { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
55 { "exit_wait_state", VCPU_STAT(exit_wait_state) },
56 { "instruction_stidp", VCPU_STAT(instruction_stidp) },
57 { "instruction_spx", VCPU_STAT(instruction_spx) },
58 { "instruction_stpx", VCPU_STAT(instruction_stpx) },
59 { "instruction_stap", VCPU_STAT(instruction_stap) },
60 { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
61 { "instruction_stsch", VCPU_STAT(instruction_stsch) },
62 { "instruction_chsc", VCPU_STAT(instruction_chsc) },
63 { "instruction_stsi", VCPU_STAT(instruction_stsi) },
64 { "instruction_stfl", VCPU_STAT(instruction_stfl) },
65 { "instruction_tprot", VCPU_STAT(instruction_tprot) },
66 { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
67 { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
68 { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
69 { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
70 { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
71 { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
72 { "diagnose_44", VCPU_STAT(diagnose_44) },
76 static unsigned long long *facilities;
78 /* Section: not file related */
79 int kvm_arch_hardware_enable(void *garbage)
81 /* every s390 is virtualization enabled ;-) */
85 void kvm_arch_hardware_disable(void *garbage)
89 int kvm_arch_hardware_setup(void)
94 void kvm_arch_hardware_unsetup(void)
98 void kvm_arch_check_processor_compat(void *rtn)
102 int kvm_arch_init(void *opaque)
107 void kvm_arch_exit(void)
111 /* Section: device related */
112 long kvm_arch_dev_ioctl(struct file *filp,
113 unsigned int ioctl, unsigned long arg)
115 if (ioctl == KVM_S390_ENABLE_SIE)
116 return s390_enable_sie();
120 int kvm_dev_ioctl_check_extension(long ext)
125 case KVM_CAP_S390_PSW:
134 /* Section: vm related */
136 * Get (and clear) the dirty memory log for a memory slot.
138 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
139 struct kvm_dirty_log *log)
144 long kvm_arch_vm_ioctl(struct file *filp,
145 unsigned int ioctl, unsigned long arg)
147 struct kvm *kvm = filp->private_data;
148 void __user *argp = (void __user *)arg;
152 case KVM_S390_INTERRUPT: {
153 struct kvm_s390_interrupt s390int;
156 if (copy_from_user(&s390int, argp, sizeof(s390int)))
158 r = kvm_s390_inject_vm(kvm, &s390int);
168 int kvm_arch_init_vm(struct kvm *kvm)
173 rc = s390_enable_sie();
177 kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL);
181 sprintf(debug_name, "kvm-%u", current->pid);
183 kvm->arch.dbf = debug_register(debug_name, 8, 2, 8 * sizeof(long));
187 spin_lock_init(&kvm->arch.float_int.lock);
188 INIT_LIST_HEAD(&kvm->arch.float_int.list);
190 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
191 VM_EVENT(kvm, 3, "%s", "vm created");
193 kvm->arch.gmap = gmap_alloc(current->mm);
199 debug_unregister(kvm->arch.dbf);
201 free_page((unsigned long)(kvm->arch.sca));
206 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
208 VCPU_EVENT(vcpu, 3, "%s", "free cpu");
209 clear_bit(63 - vcpu->vcpu_id, (unsigned long *) &vcpu->kvm->arch.sca->mcn);
210 if (vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda ==
211 (__u64) vcpu->arch.sie_block)
212 vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda = 0;
214 free_page((unsigned long)(vcpu->arch.sie_block));
215 kvm_vcpu_uninit(vcpu);
219 static void kvm_free_vcpus(struct kvm *kvm)
222 struct kvm_vcpu *vcpu;
224 kvm_for_each_vcpu(i, vcpu, kvm)
225 kvm_arch_vcpu_destroy(vcpu);
227 mutex_lock(&kvm->lock);
228 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
229 kvm->vcpus[i] = NULL;
231 atomic_set(&kvm->online_vcpus, 0);
232 mutex_unlock(&kvm->lock);
235 void kvm_arch_sync_events(struct kvm *kvm)
239 void kvm_arch_destroy_vm(struct kvm *kvm)
242 free_page((unsigned long)(kvm->arch.sca));
243 debug_unregister(kvm->arch.dbf);
244 gmap_free(kvm->arch.gmap);
247 /* Section: vcpu related */
248 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
250 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
254 void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
259 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
261 save_fp_regs(&vcpu->arch.host_fpregs);
262 save_access_regs(vcpu->arch.host_acrs);
263 vcpu->arch.guest_fpregs.fpc &= FPC_VALID_MASK;
264 restore_fp_regs(&vcpu->arch.guest_fpregs);
265 restore_access_regs(vcpu->arch.guest_acrs);
266 gmap_enable(vcpu->arch.gmap);
269 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
271 gmap_disable(vcpu->arch.gmap);
272 save_fp_regs(&vcpu->arch.guest_fpregs);
273 save_access_regs(vcpu->arch.guest_acrs);
274 restore_fp_regs(&vcpu->arch.host_fpregs);
275 restore_access_regs(vcpu->arch.host_acrs);
278 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
280 /* this equals initial cpu reset in pop, but we don't switch to ESA */
281 vcpu->arch.sie_block->gpsw.mask = 0UL;
282 vcpu->arch.sie_block->gpsw.addr = 0UL;
283 vcpu->arch.sie_block->prefix = 0UL;
284 vcpu->arch.sie_block->ihcpu = 0xffff;
285 vcpu->arch.sie_block->cputm = 0UL;
286 vcpu->arch.sie_block->ckc = 0UL;
287 vcpu->arch.sie_block->todpr = 0;
288 memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
289 vcpu->arch.sie_block->gcr[0] = 0xE0UL;
290 vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
291 vcpu->arch.guest_fpregs.fpc = 0;
292 asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc));
293 vcpu->arch.sie_block->gbea = 1;
296 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
298 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH | CPUSTAT_SM);
299 vcpu->arch.sie_block->ecb = 6;
300 vcpu->arch.sie_block->eca = 0xC1002001U;
301 vcpu->arch.sie_block->fac = (int) (long) facilities;
302 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
303 tasklet_init(&vcpu->arch.tasklet, kvm_s390_tasklet,
304 (unsigned long) vcpu);
305 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
306 get_cpu_id(&vcpu->arch.cpu_id);
307 vcpu->arch.cpu_id.version = 0xff;
311 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
314 struct kvm_vcpu *vcpu = kzalloc(sizeof(struct kvm_vcpu), GFP_KERNEL);
320 vcpu->arch.sie_block = (struct kvm_s390_sie_block *)
321 get_zeroed_page(GFP_KERNEL);
323 if (!vcpu->arch.sie_block)
326 vcpu->arch.sie_block->icpua = id;
327 BUG_ON(!kvm->arch.sca);
328 if (!kvm->arch.sca->cpu[id].sda)
329 kvm->arch.sca->cpu[id].sda = (__u64) vcpu->arch.sie_block;
330 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)kvm->arch.sca) >> 32);
331 vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca;
332 set_bit(63 - id, (unsigned long *) &kvm->arch.sca->mcn);
334 spin_lock_init(&vcpu->arch.local_int.lock);
335 INIT_LIST_HEAD(&vcpu->arch.local_int.list);
336 vcpu->arch.local_int.float_int = &kvm->arch.float_int;
337 spin_lock(&kvm->arch.float_int.lock);
338 kvm->arch.float_int.local_int[id] = &vcpu->arch.local_int;
339 init_waitqueue_head(&vcpu->arch.local_int.wq);
340 vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
341 spin_unlock(&kvm->arch.float_int.lock);
343 rc = kvm_vcpu_init(vcpu, kvm, id);
345 goto out_free_sie_block;
346 VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu,
347 vcpu->arch.sie_block);
351 free_page((unsigned long)(vcpu->arch.sie_block));
358 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
360 /* kvm common code refers to this, but never calls it */
365 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
367 kvm_s390_vcpu_initial_reset(vcpu);
371 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
373 memcpy(&vcpu->arch.guest_gprs, ®s->gprs, sizeof(regs->gprs));
377 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
379 memcpy(®s->gprs, &vcpu->arch.guest_gprs, sizeof(regs->gprs));
383 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
384 struct kvm_sregs *sregs)
386 memcpy(&vcpu->arch.guest_acrs, &sregs->acrs, sizeof(sregs->acrs));
387 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
391 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
392 struct kvm_sregs *sregs)
394 memcpy(&sregs->acrs, &vcpu->arch.guest_acrs, sizeof(sregs->acrs));
395 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
399 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
401 memcpy(&vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs));
402 vcpu->arch.guest_fpregs.fpc = fpu->fpc;
406 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
408 memcpy(&fpu->fprs, &vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs));
409 fpu->fpc = vcpu->arch.guest_fpregs.fpc;
413 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
417 if (atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_RUNNING)
420 vcpu->run->psw_mask = psw.mask;
421 vcpu->run->psw_addr = psw.addr;
426 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
427 struct kvm_translation *tr)
429 return -EINVAL; /* not implemented yet */
432 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
433 struct kvm_guest_debug *dbg)
435 return -EINVAL; /* not implemented yet */
438 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
439 struct kvm_mp_state *mp_state)
441 return -EINVAL; /* not implemented yet */
444 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
445 struct kvm_mp_state *mp_state)
447 return -EINVAL; /* not implemented yet */
450 static void __vcpu_run(struct kvm_vcpu *vcpu)
452 memcpy(&vcpu->arch.sie_block->gg14, &vcpu->arch.guest_gprs[14], 16);
457 if (test_thread_flag(TIF_MCCK_PENDING))
460 kvm_s390_deliver_pending_interrupts(vcpu);
462 vcpu->arch.sie_block->icptcode = 0;
466 VCPU_EVENT(vcpu, 6, "entering sie flags %x",
467 atomic_read(&vcpu->arch.sie_block->cpuflags));
468 if (sie64a(vcpu->arch.sie_block, vcpu->arch.guest_gprs)) {
469 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
470 kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
472 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
473 vcpu->arch.sie_block->icptcode);
478 memcpy(&vcpu->arch.guest_gprs[14], &vcpu->arch.sie_block->gg14, 16);
481 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
487 if (vcpu->sigset_active)
488 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
490 atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
492 BUG_ON(vcpu->kvm->arch.float_int.local_int[vcpu->vcpu_id] == NULL);
494 switch (kvm_run->exit_reason) {
495 case KVM_EXIT_S390_SIEIC:
496 case KVM_EXIT_UNKNOWN:
498 case KVM_EXIT_S390_RESET:
504 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
505 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
511 rc = kvm_handle_sie_intercept(vcpu);
512 } while (!signal_pending(current) && !rc);
514 if (rc == SIE_INTERCEPT_RERUNVCPU)
517 if (signal_pending(current) && !rc) {
518 kvm_run->exit_reason = KVM_EXIT_INTR;
522 if (rc == -EOPNOTSUPP) {
523 /* intercept cannot be handled in-kernel, prepare kvm-run */
524 kvm_run->exit_reason = KVM_EXIT_S390_SIEIC;
525 kvm_run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
526 kvm_run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
527 kvm_run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
531 if (rc == -EREMOTE) {
532 /* intercept was handled, but userspace support is needed
533 * kvm_run has been prepared by the handler */
537 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
538 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
540 if (vcpu->sigset_active)
541 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
543 vcpu->stat.exit_userspace++;
547 static int __guestcopy(struct kvm_vcpu *vcpu, u64 guestdest, void *from,
548 unsigned long n, int prefix)
551 return copy_to_guest(vcpu, guestdest, from, n);
553 return copy_to_guest_absolute(vcpu, guestdest, from, n);
557 * store status at address
558 * we use have two special cases:
559 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
560 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
562 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
564 unsigned char archmode = 1;
567 if (addr == KVM_S390_STORE_STATUS_NOADDR) {
568 if (copy_to_guest_absolute(vcpu, 163ul, &archmode, 1))
570 addr = SAVE_AREA_BASE;
572 } else if (addr == KVM_S390_STORE_STATUS_PREFIXED) {
573 if (copy_to_guest(vcpu, 163ul, &archmode, 1))
575 addr = SAVE_AREA_BASE;
580 if (__guestcopy(vcpu, addr + offsetof(struct save_area, fp_regs),
581 vcpu->arch.guest_fpregs.fprs, 128, prefix))
584 if (__guestcopy(vcpu, addr + offsetof(struct save_area, gp_regs),
585 vcpu->arch.guest_gprs, 128, prefix))
588 if (__guestcopy(vcpu, addr + offsetof(struct save_area, psw),
589 &vcpu->arch.sie_block->gpsw, 16, prefix))
592 if (__guestcopy(vcpu, addr + offsetof(struct save_area, pref_reg),
593 &vcpu->arch.sie_block->prefix, 4, prefix))
596 if (__guestcopy(vcpu,
597 addr + offsetof(struct save_area, fp_ctrl_reg),
598 &vcpu->arch.guest_fpregs.fpc, 4, prefix))
601 if (__guestcopy(vcpu, addr + offsetof(struct save_area, tod_reg),
602 &vcpu->arch.sie_block->todpr, 4, prefix))
605 if (__guestcopy(vcpu, addr + offsetof(struct save_area, timer),
606 &vcpu->arch.sie_block->cputm, 8, prefix))
609 if (__guestcopy(vcpu, addr + offsetof(struct save_area, clk_cmp),
610 &vcpu->arch.sie_block->ckc, 8, prefix))
613 if (__guestcopy(vcpu, addr + offsetof(struct save_area, acc_regs),
614 &vcpu->arch.guest_acrs, 64, prefix))
617 if (__guestcopy(vcpu,
618 addr + offsetof(struct save_area, ctrl_regs),
619 &vcpu->arch.sie_block->gcr, 128, prefix))
624 long kvm_arch_vcpu_ioctl(struct file *filp,
625 unsigned int ioctl, unsigned long arg)
627 struct kvm_vcpu *vcpu = filp->private_data;
628 void __user *argp = (void __user *)arg;
632 case KVM_S390_INTERRUPT: {
633 struct kvm_s390_interrupt s390int;
636 if (copy_from_user(&s390int, argp, sizeof(s390int)))
638 r = kvm_s390_inject_vcpu(vcpu, &s390int);
641 case KVM_S390_STORE_STATUS:
642 r = kvm_s390_vcpu_store_status(vcpu, arg);
644 case KVM_S390_SET_INITIAL_PSW: {
648 if (copy_from_user(&psw, argp, sizeof(psw)))
650 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
653 case KVM_S390_INITIAL_RESET:
654 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
662 /* Section: memory related */
663 int kvm_arch_prepare_memory_region(struct kvm *kvm,
664 struct kvm_memory_slot *memslot,
665 struct kvm_memory_slot old,
666 struct kvm_userspace_memory_region *mem,
669 /* A few sanity checks. We can have exactly one memory slot which has
670 to start at guest virtual zero and which has to be located at a
671 page boundary in userland and which has to end at a page boundary.
672 The memory in userland is ok to be fragmented into various different
673 vmas. It is okay to mmap() and munmap() stuff in this slot after
674 doing this call at any time */
679 if (mem->guest_phys_addr)
682 if (mem->userspace_addr & 0xffffful)
685 if (mem->memory_size & 0xffffful)
694 void kvm_arch_commit_memory_region(struct kvm *kvm,
695 struct kvm_userspace_memory_region *mem,
696 struct kvm_memory_slot old,
702 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
703 mem->guest_phys_addr, mem->memory_size);
705 printk(KERN_WARNING "kvm-s390: failed to commit memory region\n");
709 void kvm_arch_flush_shadow(struct kvm *kvm)
713 static int __init kvm_s390_init(void)
716 ret = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
721 * guests can ask for up to 255+1 double words, we need a full page
722 * to hold the maximum amount of facilities. On the other hand, we
723 * only set facilities that are known to work in KVM.
725 facilities = (unsigned long long *) get_zeroed_page(GFP_KERNEL|GFP_DMA);
730 memcpy(facilities, S390_lowcore.stfle_fac_list, 16);
731 facilities[0] &= 0xff00fff3f47c0000ULL;
732 facilities[1] &= 0x201c000000000000ULL;
736 static void __exit kvm_s390_exit(void)
738 free_page((unsigned long) facilities);
742 module_init(kvm_s390_init);
743 module_exit(kvm_s390_exit);