2 * s390host.c -- hosting zSeries kernel virtual machines
4 * Copyright IBM Corp. 2008
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
10 * Author(s): Carsten Otte <cotte@de.ibm.com>
11 * Christian Borntraeger <borntraeger@de.ibm.com>
12 * Heiko Carstens <heiko.carstens@de.ibm.com>
15 #include <linux/compiler.h>
16 #include <linux/err.h>
18 #include <linux/init.h>
19 #include <linux/kvm.h>
20 #include <linux/kvm_host.h>
21 #include <linux/module.h>
22 #include <linux/slab.h>
23 #include <linux/timer.h>
24 #include <asm/lowcore.h>
25 #include <asm/pgtable.h>
30 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
32 struct kvm_stats_debugfs_item debugfs_entries[] = {
33 { "userspace_handled", VCPU_STAT(exit_userspace) },
34 { "exit_null", VCPU_STAT(exit_null) },
35 { "exit_validity", VCPU_STAT(exit_validity) },
36 { "exit_stop_request", VCPU_STAT(exit_stop_request) },
37 { "exit_external_request", VCPU_STAT(exit_external_request) },
38 { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
39 { "exit_instruction", VCPU_STAT(exit_instruction) },
40 { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
41 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
42 { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
43 { "instruction_lctl", VCPU_STAT(instruction_lctl) },
44 { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
45 { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
46 { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
47 { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
48 { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
49 { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
50 { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
51 { "exit_wait_state", VCPU_STAT(exit_wait_state) },
52 { "instruction_stidp", VCPU_STAT(instruction_stidp) },
53 { "instruction_spx", VCPU_STAT(instruction_spx) },
54 { "instruction_stpx", VCPU_STAT(instruction_stpx) },
55 { "instruction_stap", VCPU_STAT(instruction_stap) },
56 { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
57 { "instruction_stsch", VCPU_STAT(instruction_stsch) },
58 { "instruction_chsc", VCPU_STAT(instruction_chsc) },
59 { "instruction_stsi", VCPU_STAT(instruction_stsi) },
60 { "instruction_stfl", VCPU_STAT(instruction_stfl) },
61 { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
62 { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
63 { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
64 { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
65 { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
66 { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
67 { "diagnose_44", VCPU_STAT(diagnose_44) },
72 /* Section: not file related */
73 void kvm_arch_hardware_enable(void *garbage)
75 /* every s390 is virtualization enabled ;-) */
78 void kvm_arch_hardware_disable(void *garbage)
82 int kvm_arch_hardware_setup(void)
87 void kvm_arch_hardware_unsetup(void)
91 void kvm_arch_check_processor_compat(void *rtn)
95 int kvm_arch_init(void *opaque)
100 void kvm_arch_exit(void)
104 /* Section: device related */
105 long kvm_arch_dev_ioctl(struct file *filp,
106 unsigned int ioctl, unsigned long arg)
108 if (ioctl == KVM_S390_ENABLE_SIE)
109 return s390_enable_sie();
113 int kvm_dev_ioctl_check_extension(long ext)
116 case KVM_CAP_USER_MEMORY:
123 /* Section: vm related */
125 * Get (and clear) the dirty memory log for a memory slot.
127 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
128 struct kvm_dirty_log *log)
133 long kvm_arch_vm_ioctl(struct file *filp,
134 unsigned int ioctl, unsigned long arg)
136 struct kvm *kvm = filp->private_data;
137 void __user *argp = (void __user *)arg;
141 case KVM_S390_INTERRUPT: {
142 struct kvm_s390_interrupt s390int;
145 if (copy_from_user(&s390int, argp, sizeof(s390int)))
147 r = kvm_s390_inject_vm(kvm, &s390int);
157 struct kvm *kvm_arch_create_vm(void)
163 rc = s390_enable_sie();
168 kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL);
172 kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL);
176 sprintf(debug_name, "kvm-%u", current->pid);
178 kvm->arch.dbf = debug_register(debug_name, 8, 2, 8 * sizeof(long));
182 spin_lock_init(&kvm->arch.float_int.lock);
183 INIT_LIST_HEAD(&kvm->arch.float_int.list);
185 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
186 VM_EVENT(kvm, 3, "%s", "vm created");
190 free_page((unsigned long)(kvm->arch.sca));
197 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
199 VCPU_EVENT(vcpu, 3, "%s", "free cpu");
200 free_page((unsigned long)(vcpu->arch.sie_block));
204 static void kvm_free_vcpus(struct kvm *kvm)
208 for (i = 0; i < KVM_MAX_VCPUS; ++i) {
210 kvm_arch_vcpu_destroy(kvm->vcpus[i]);
211 kvm->vcpus[i] = NULL;
216 void kvm_arch_destroy_vm(struct kvm *kvm)
219 kvm_free_physmem(kvm);
220 free_page((unsigned long)(kvm->arch.sca));
221 debug_unregister(kvm->arch.dbf);
225 /* Section: vcpu related */
226 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
231 void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
233 /* kvm common code refers to this, but does'nt call it */
237 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
239 save_fp_regs(&vcpu->arch.host_fpregs);
240 save_access_regs(vcpu->arch.host_acrs);
241 vcpu->arch.guest_fpregs.fpc &= FPC_VALID_MASK;
242 restore_fp_regs(&vcpu->arch.guest_fpregs);
243 restore_access_regs(vcpu->arch.guest_acrs);
246 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
248 save_fp_regs(&vcpu->arch.guest_fpregs);
249 save_access_regs(vcpu->arch.guest_acrs);
250 restore_fp_regs(&vcpu->arch.host_fpregs);
251 restore_access_regs(vcpu->arch.host_acrs);
254 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
256 /* this equals initial cpu reset in pop, but we don't switch to ESA */
257 vcpu->arch.sie_block->gpsw.mask = 0UL;
258 vcpu->arch.sie_block->gpsw.addr = 0UL;
259 vcpu->arch.sie_block->prefix = 0UL;
260 vcpu->arch.sie_block->ihcpu = 0xffff;
261 vcpu->arch.sie_block->cputm = 0UL;
262 vcpu->arch.sie_block->ckc = 0UL;
263 vcpu->arch.sie_block->todpr = 0;
264 memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
265 vcpu->arch.sie_block->gcr[0] = 0xE0UL;
266 vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
267 vcpu->arch.guest_fpregs.fpc = 0;
268 asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc));
269 vcpu->arch.sie_block->gbea = 1;
272 /* The current code can have up to 256 pages for virtio */
273 #define VIRTIODESCSPACE (256ul * 4096ul)
275 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
277 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH);
278 vcpu->arch.sie_block->gmslm = vcpu->kvm->arch.guest_memsize +
279 vcpu->kvm->arch.guest_origin +
280 VIRTIODESCSPACE - 1ul;
281 vcpu->arch.sie_block->gmsor = vcpu->kvm->arch.guest_origin;
282 vcpu->arch.sie_block->ecb = 2;
283 vcpu->arch.sie_block->eca = 0xC1002001U;
284 setup_timer(&vcpu->arch.ckc_timer, kvm_s390_idle_wakeup,
285 (unsigned long) vcpu);
286 get_cpu_id(&vcpu->arch.cpu_id);
287 vcpu->arch.cpu_id.version = 0xfe;
291 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
294 struct kvm_vcpu *vcpu = kzalloc(sizeof(struct kvm_vcpu), GFP_KERNEL);
300 vcpu->arch.sie_block = (struct kvm_s390_sie_block *)
301 get_zeroed_page(GFP_KERNEL);
303 if (!vcpu->arch.sie_block)
306 vcpu->arch.sie_block->icpua = id;
307 BUG_ON(!kvm->arch.sca);
308 BUG_ON(kvm->arch.sca->cpu[id].sda);
309 kvm->arch.sca->cpu[id].sda = (__u64) vcpu->arch.sie_block;
310 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)kvm->arch.sca) >> 32);
311 vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca;
313 spin_lock_init(&vcpu->arch.local_int.lock);
314 INIT_LIST_HEAD(&vcpu->arch.local_int.list);
315 vcpu->arch.local_int.float_int = &kvm->arch.float_int;
316 spin_lock_bh(&kvm->arch.float_int.lock);
317 kvm->arch.float_int.local_int[id] = &vcpu->arch.local_int;
318 init_waitqueue_head(&vcpu->arch.local_int.wq);
319 vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
320 spin_unlock_bh(&kvm->arch.float_int.lock);
322 rc = kvm_vcpu_init(vcpu, kvm, id);
325 VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu,
326 vcpu->arch.sie_block);
335 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
337 /* kvm common code refers to this, but never calls it */
342 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
345 kvm_s390_vcpu_initial_reset(vcpu);
350 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
353 memcpy(&vcpu->arch.guest_gprs, ®s->gprs, sizeof(regs->gprs));
358 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
361 memcpy(®s->gprs, &vcpu->arch.guest_gprs, sizeof(regs->gprs));
366 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
367 struct kvm_sregs *sregs)
370 memcpy(&vcpu->arch.guest_acrs, &sregs->acrs, sizeof(sregs->acrs));
371 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
376 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
377 struct kvm_sregs *sregs)
380 memcpy(&sregs->acrs, &vcpu->arch.guest_acrs, sizeof(sregs->acrs));
381 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
386 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
389 memcpy(&vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs));
390 vcpu->arch.guest_fpregs.fpc = fpu->fpc;
395 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
398 memcpy(&fpu->fprs, &vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs));
399 fpu->fpc = vcpu->arch.guest_fpregs.fpc;
404 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
409 if (atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_RUNNING)
412 vcpu->arch.sie_block->gpsw = psw;
417 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
418 struct kvm_translation *tr)
420 return -EINVAL; /* not implemented yet */
423 int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
424 struct kvm_debug_guest *dbg)
426 return -EINVAL; /* not implemented yet */
429 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
430 struct kvm_mp_state *mp_state)
432 return -EINVAL; /* not implemented yet */
435 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
436 struct kvm_mp_state *mp_state)
438 return -EINVAL; /* not implemented yet */
441 extern void s390_handle_mcck(void);
443 static void __vcpu_run(struct kvm_vcpu *vcpu)
445 memcpy(&vcpu->arch.sie_block->gg14, &vcpu->arch.guest_gprs[14], 16);
450 if (test_thread_flag(TIF_MCCK_PENDING))
453 kvm_s390_deliver_pending_interrupts(vcpu);
455 vcpu->arch.sie_block->icptcode = 0;
459 VCPU_EVENT(vcpu, 6, "entering sie flags %x",
460 atomic_read(&vcpu->arch.sie_block->cpuflags));
461 if (sie64a(vcpu->arch.sie_block, vcpu->arch.guest_gprs)) {
462 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
463 kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
465 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
466 vcpu->arch.sie_block->icptcode);
471 memcpy(&vcpu->arch.guest_gprs[14], &vcpu->arch.sie_block->gg14, 16);
474 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
481 if (vcpu->sigset_active)
482 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
484 atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
486 BUG_ON(vcpu->kvm->arch.float_int.local_int[vcpu->vcpu_id] == NULL);
488 switch (kvm_run->exit_reason) {
489 case KVM_EXIT_S390_SIEIC:
490 vcpu->arch.sie_block->gpsw.mask = kvm_run->s390_sieic.mask;
491 vcpu->arch.sie_block->gpsw.addr = kvm_run->s390_sieic.addr;
493 case KVM_EXIT_UNKNOWN:
494 case KVM_EXIT_S390_RESET:
504 rc = kvm_handle_sie_intercept(vcpu);
505 } while (!signal_pending(current) && !rc);
507 if (signal_pending(current) && !rc)
510 if (rc == -ENOTSUPP) {
511 /* intercept cannot be handled in-kernel, prepare kvm-run */
512 kvm_run->exit_reason = KVM_EXIT_S390_SIEIC;
513 kvm_run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
514 kvm_run->s390_sieic.mask = vcpu->arch.sie_block->gpsw.mask;
515 kvm_run->s390_sieic.addr = vcpu->arch.sie_block->gpsw.addr;
516 kvm_run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
517 kvm_run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
521 if (rc == -EREMOTE) {
522 /* intercept was handled, but userspace support is needed
523 * kvm_run has been prepared by the handler */
527 if (vcpu->sigset_active)
528 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
532 vcpu->stat.exit_userspace++;
536 static int __guestcopy(struct kvm_vcpu *vcpu, u64 guestdest, const void *from,
537 unsigned long n, int prefix)
540 return copy_to_guest(vcpu, guestdest, from, n);
542 return copy_to_guest_absolute(vcpu, guestdest, from, n);
546 * store status at address
547 * we use have two special cases:
548 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
549 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
551 int __kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
553 const unsigned char archmode = 1;
556 if (addr == KVM_S390_STORE_STATUS_NOADDR) {
557 if (copy_to_guest_absolute(vcpu, 163ul, &archmode, 1))
559 addr = SAVE_AREA_BASE;
561 } else if (addr == KVM_S390_STORE_STATUS_PREFIXED) {
562 if (copy_to_guest(vcpu, 163ul, &archmode, 1))
564 addr = SAVE_AREA_BASE;
569 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, fp_regs),
570 vcpu->arch.guest_fpregs.fprs, 128, prefix))
573 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, gp_regs),
574 vcpu->arch.guest_gprs, 128, prefix))
577 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, psw),
578 &vcpu->arch.sie_block->gpsw, 16, prefix))
581 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, pref_reg),
582 &vcpu->arch.sie_block->prefix, 4, prefix))
585 if (__guestcopy(vcpu,
586 addr + offsetof(struct save_area_s390x, fp_ctrl_reg),
587 &vcpu->arch.guest_fpregs.fpc, 4, prefix))
590 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, tod_reg),
591 &vcpu->arch.sie_block->todpr, 4, prefix))
594 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, timer),
595 &vcpu->arch.sie_block->cputm, 8, prefix))
598 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, clk_cmp),
599 &vcpu->arch.sie_block->ckc, 8, prefix))
602 if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, acc_regs),
603 &vcpu->arch.guest_acrs, 64, prefix))
606 if (__guestcopy(vcpu,
607 addr + offsetof(struct save_area_s390x, ctrl_regs),
608 &vcpu->arch.sie_block->gcr, 128, prefix))
613 static int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
618 rc = __kvm_s390_vcpu_store_status(vcpu, addr);
623 long kvm_arch_vcpu_ioctl(struct file *filp,
624 unsigned int ioctl, unsigned long arg)
626 struct kvm_vcpu *vcpu = filp->private_data;
627 void __user *argp = (void __user *)arg;
630 case KVM_S390_INTERRUPT: {
631 struct kvm_s390_interrupt s390int;
633 if (copy_from_user(&s390int, argp, sizeof(s390int)))
635 return kvm_s390_inject_vcpu(vcpu, &s390int);
637 case KVM_S390_STORE_STATUS:
638 return kvm_s390_vcpu_store_status(vcpu, arg);
639 case KVM_S390_SET_INITIAL_PSW: {
642 if (copy_from_user(&psw, argp, sizeof(psw)))
644 return kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
646 case KVM_S390_INITIAL_RESET:
647 return kvm_arch_vcpu_ioctl_initial_reset(vcpu);
654 /* Section: memory related */
655 int kvm_arch_set_memory_region(struct kvm *kvm,
656 struct kvm_userspace_memory_region *mem,
657 struct kvm_memory_slot old,
660 /* A few sanity checks. We can have exactly one memory slot which has
661 to start at guest virtual zero and which has to be located at a
662 page boundary in userland and which has to end at a page boundary.
663 The memory in userland is ok to be fragmented into various different
664 vmas. It is okay to mmap() and munmap() stuff in this slot after
665 doing this call at any time */
670 if (mem->guest_phys_addr)
673 if (mem->userspace_addr & (PAGE_SIZE - 1))
676 if (mem->memory_size & (PAGE_SIZE - 1))
679 kvm->arch.guest_origin = mem->userspace_addr;
680 kvm->arch.guest_memsize = mem->memory_size;
682 /* FIXME: we do want to interrupt running CPUs and update their memory
683 configuration now to avoid race conditions. But hey, changing the
684 memory layout while virtual CPUs are running is usually bad
685 programming practice. */
690 void kvm_arch_flush_shadow(struct kvm *kvm)
694 gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
699 static int __init kvm_s390_init(void)
701 return kvm_init(NULL, sizeof(struct kvm_vcpu), THIS_MODULE);
704 static void __exit kvm_s390_exit(void)
709 module_init(kvm_s390_init);
710 module_exit(kvm_s390_exit);