Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/hch/hfsplus
[pandora-kernel.git] / arch / powerpc / kvm / book3s.c
index a3cef30..e316847 100644 (file)
@@ -17,6 +17,7 @@
 #include <linux/kvm_host.h>
 #include <linux/err.h>
 #include <linux/slab.h>
+#include "trace.h"
 
 #include <asm/reg.h>
 #include <asm/cputable.h>
@@ -35,7 +36,6 @@
 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
 
 /* #define EXIT_DEBUG */
-/* #define EXIT_DEBUG_SIMPLE */
 /* #define DEBUG_EXT */
 
 static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
@@ -105,65 +105,71 @@ void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
        kvmppc_giveup_ext(vcpu, MSR_VSX);
 }
 
-#if defined(EXIT_DEBUG)
-static u32 kvmppc_get_dec(struct kvm_vcpu *vcpu)
-{
-       u64 jd = mftb() - vcpu->arch.dec_jiffies;
-       return vcpu->arch.dec - jd;
-}
-#endif
-
 static void kvmppc_recalc_shadow_msr(struct kvm_vcpu *vcpu)
 {
-       vcpu->arch.shadow_msr = vcpu->arch.msr;
+       ulong smsr = vcpu->arch.shared->msr;
+
        /* Guest MSR values */
-       vcpu->arch.shadow_msr &= MSR_FE0 | MSR_FE1 | MSR_SF | MSR_SE |
-                                MSR_BE | MSR_DE;
+       smsr &= MSR_FE0 | MSR_FE1 | MSR_SF | MSR_SE | MSR_BE | MSR_DE;
        /* Process MSR values */
-       vcpu->arch.shadow_msr |= MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_PR |
-                                MSR_EE;
+       smsr |= MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_PR | MSR_EE;
        /* External providers the guest reserved */
-       vcpu->arch.shadow_msr |= (vcpu->arch.msr & vcpu->arch.guest_owned_ext);
+       smsr |= (vcpu->arch.shared->msr & vcpu->arch.guest_owned_ext);
        /* 64-bit Process MSR values */
 #ifdef CONFIG_PPC_BOOK3S_64
-       vcpu->arch.shadow_msr |= MSR_ISF | MSR_HV;
+       smsr |= MSR_ISF | MSR_HV;
 #endif
+       vcpu->arch.shadow_msr = smsr;
 }
 
 void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr)
 {
-       ulong old_msr = vcpu->arch.msr;
+       ulong old_msr = vcpu->arch.shared->msr;
 
 #ifdef EXIT_DEBUG
        printk(KERN_INFO "KVM: Set MSR to 0x%llx\n", msr);
 #endif
 
        msr &= to_book3s(vcpu)->msr_mask;
-       vcpu->arch.msr = msr;
+       vcpu->arch.shared->msr = msr;
        kvmppc_recalc_shadow_msr(vcpu);
 
-       if (msr & (MSR_WE|MSR_POW)) {
+       if (msr & MSR_POW) {
                if (!vcpu->arch.pending_exceptions) {
                        kvm_vcpu_block(vcpu);
                        vcpu->stat.halt_wakeup++;
+
+                       /* Unset POW bit after we woke up */
+                       msr &= ~MSR_POW;
+                       vcpu->arch.shared->msr = msr;
                }
        }
 
-       if ((vcpu->arch.msr & (MSR_PR|MSR_IR|MSR_DR)) !=
+       if ((vcpu->arch.shared->msr & (MSR_PR|MSR_IR|MSR_DR)) !=
                   (old_msr & (MSR_PR|MSR_IR|MSR_DR))) {
                kvmppc_mmu_flush_segments(vcpu);
                kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu));
+
+               /* Preload magic page segment when in kernel mode */
+               if (!(msr & MSR_PR) && vcpu->arch.magic_page_pa) {
+                       struct kvm_vcpu_arch *a = &vcpu->arch;
+
+                       if (msr & MSR_DR)
+                               kvmppc_mmu_map_segment(vcpu, a->magic_page_ea);
+                       else
+                               kvmppc_mmu_map_segment(vcpu, a->magic_page_pa);
+               }
        }
 
        /* Preload FPU if it's enabled */
-       if (vcpu->arch.msr & MSR_FP)
+       if (vcpu->arch.shared->msr & MSR_FP)
                kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP);
 }
 
 void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags)
 {
-       vcpu->arch.srr0 = kvmppc_get_pc(vcpu);
-       vcpu->arch.srr1 = vcpu->arch.msr | flags;
+       vcpu->arch.shared->srr0 = kvmppc_get_pc(vcpu);
+       vcpu->arch.shared->srr1 = vcpu->arch.shared->msr | flags;
        kvmppc_set_pc(vcpu, to_book3s(vcpu)->hior + vec);
        vcpu->arch.mmu.reset_msr(vcpu);
 }
@@ -180,6 +186,7 @@ static int kvmppc_book3s_vec2irqprio(unsigned int vec)
        case 0x400: prio = BOOK3S_IRQPRIO_INST_STORAGE;         break;
        case 0x480: prio = BOOK3S_IRQPRIO_INST_SEGMENT;         break;
        case 0x500: prio = BOOK3S_IRQPRIO_EXTERNAL;             break;
+       case 0x501: prio = BOOK3S_IRQPRIO_EXTERNAL_LEVEL;       break;
        case 0x600: prio = BOOK3S_IRQPRIO_ALIGNMENT;            break;
        case 0x700: prio = BOOK3S_IRQPRIO_PROGRAM;              break;
        case 0x800: prio = BOOK3S_IRQPRIO_FP_UNAVAIL;           break;
@@ -199,6 +206,9 @@ static void kvmppc_book3s_dequeue_irqprio(struct kvm_vcpu *vcpu,
 {
        clear_bit(kvmppc_book3s_vec2irqprio(vec),
                  &vcpu->arch.pending_exceptions);
+
+       if (!vcpu->arch.pending_exceptions)
+               vcpu->arch.shared->int_pending = 0;
 }
 
 void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec)
@@ -237,13 +247,19 @@ void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu)
 void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
                                 struct kvm_interrupt *irq)
 {
-       kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL);
+       unsigned int vec = BOOK3S_INTERRUPT_EXTERNAL;
+
+       if (irq->irq == KVM_INTERRUPT_SET_LEVEL)
+               vec = BOOK3S_INTERRUPT_EXTERNAL_LEVEL;
+
+       kvmppc_book3s_queue_irqprio(vcpu, vec);
 }
 
 void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu,
                                   struct kvm_interrupt *irq)
 {
        kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL);
+       kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL_LEVEL);
 }
 
 int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority)
@@ -251,14 +267,29 @@ int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority)
        int deliver = 1;
        int vec = 0;
        ulong flags = 0ULL;
+       ulong crit_raw = vcpu->arch.shared->critical;
+       ulong crit_r1 = kvmppc_get_gpr(vcpu, 1);
+       bool crit;
+
+       /* Truncate crit indicators in 32 bit mode */
+       if (!(vcpu->arch.shared->msr & MSR_SF)) {
+               crit_raw &= 0xffffffff;
+               crit_r1 &= 0xffffffff;
+       }
+
+       /* Critical section when crit == r1 */
+       crit = (crit_raw == crit_r1);
+       /* ... and we're in supervisor mode */
+       crit = crit && !(vcpu->arch.shared->msr & MSR_PR);
 
        switch (priority) {
        case BOOK3S_IRQPRIO_DECREMENTER:
-               deliver = vcpu->arch.msr & MSR_EE;
+               deliver = (vcpu->arch.shared->msr & MSR_EE) && !crit;
                vec = BOOK3S_INTERRUPT_DECREMENTER;
                break;
        case BOOK3S_IRQPRIO_EXTERNAL:
-               deliver = vcpu->arch.msr & MSR_EE;
+       case BOOK3S_IRQPRIO_EXTERNAL_LEVEL:
+               deliver = (vcpu->arch.shared->msr & MSR_EE) && !crit;
                vec = BOOK3S_INTERRUPT_EXTERNAL;
                break;
        case BOOK3S_IRQPRIO_SYSTEM_RESET:
@@ -320,9 +351,27 @@ int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority)
        return deliver;
 }
 
+/*
+ * This function determines if an irqprio should be cleared once issued.
+ */
+static bool clear_irqprio(struct kvm_vcpu *vcpu, unsigned int priority)
+{
+       switch (priority) {
+               case BOOK3S_IRQPRIO_DECREMENTER:
+                       /* DEC interrupts get cleared by mtdec */
+                       return false;
+               case BOOK3S_IRQPRIO_EXTERNAL_LEVEL:
+                       /* External interrupts get cleared by userspace */
+                       return false;
+       }
+
+       return true;
+}
+
 void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu)
 {
        unsigned long *pending = &vcpu->arch.pending_exceptions;
+       unsigned long old_pending = vcpu->arch.pending_exceptions;
        unsigned int priority;
 
 #ifdef EXIT_DEBUG
@@ -332,8 +381,7 @@ void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu)
        priority = __ffs(*pending);
        while (priority < BOOK3S_IRQPRIO_MAX) {
                if (kvmppc_book3s_irqprio_deliver(vcpu, priority) &&
-                   (priority != BOOK3S_IRQPRIO_DECREMENTER)) {
-                       /* DEC interrupts get cleared by mtdec */
+                   clear_irqprio(vcpu, priority)) {
                        clear_bit(priority, &vcpu->arch.pending_exceptions);
                        break;
                }
@@ -342,6 +390,12 @@ void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu)
                                         BITS_PER_BYTE * sizeof(*pending),
                                         priority + 1);
        }
+
+       /* Tell the guest about our interrupt status */
+       if (*pending)
+               vcpu->arch.shared->int_pending = 1;
+       else if (old_pending)
+               vcpu->arch.shared->int_pending = 0;
 }
 
 void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr)
@@ -398,6 +452,25 @@ void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr)
        }
 }
 
+pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn)
+{
+       ulong mp_pa = vcpu->arch.magic_page_pa;
+
+       /* Magic page override */
+       if (unlikely(mp_pa) &&
+           unlikely(((gfn << PAGE_SHIFT) & KVM_PAM) ==
+                    ((mp_pa & PAGE_MASK) & KVM_PAM))) {
+               ulong shared_page = ((ulong)vcpu->arch.shared) & PAGE_MASK;
+               pfn_t pfn;
+
+               pfn = (pfn_t)virt_to_phys((void*)shared_page) >> PAGE_SHIFT;
+               get_page(pfn_to_page(pfn));
+               return pfn;
+       }
+
+       return gfn_to_pfn(vcpu->kvm, gfn);
+}
+
 /* Book3s_32 CPUs always have 32 bytes cache line size, which Linux assumes. To
  * make Book3s_32 Linux work on Book3s_64, we have to make sure we trap dcbz to
  * emulate 32 bytes dcbz length.
@@ -415,8 +488,10 @@ static void kvmppc_patch_dcbz(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte)
        int i;
 
        hpage = gfn_to_page(vcpu->kvm, pte->raddr >> PAGE_SHIFT);
-       if (is_error_page(hpage))
+       if (is_error_page(hpage)) {
+               kvm_release_page_clean(hpage);
                return;
+       }
 
        hpage_offset = pte->raddr & ~PAGE_MASK;
        hpage_offset &= ~0xFFFULL;
@@ -437,14 +512,14 @@ static void kvmppc_patch_dcbz(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte)
 static int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, bool data,
                         struct kvmppc_pte *pte)
 {
-       int relocated = (vcpu->arch.msr & (data ? MSR_DR : MSR_IR));
+       int relocated = (vcpu->arch.shared->msr & (data ? MSR_DR : MSR_IR));
        int r;
 
        if (relocated) {
                r = vcpu->arch.mmu.xlate(vcpu, eaddr, pte, data);
        } else {
                pte->eaddr = eaddr;
-               pte->raddr = eaddr & 0xffffffff;
+               pte->raddr = eaddr & KVM_PAM;
                pte->vpage = VSID_REAL | eaddr >> 12;
                pte->may_read = true;
                pte->may_write = true;
@@ -533,6 +608,13 @@ mmio:
 
 static int kvmppc_visible_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
 {
+       ulong mp_pa = vcpu->arch.magic_page_pa;
+
+       if (unlikely(mp_pa) &&
+           unlikely((mp_pa & KVM_PAM) >> PAGE_SHIFT == gfn)) {
+               return 1;
+       }
+
        return kvm_is_visible_gfn(vcpu->kvm, gfn);
 }
 
@@ -545,8 +627,8 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
        int page_found = 0;
        struct kvmppc_pte pte;
        bool is_mmio = false;
-       bool dr = (vcpu->arch.msr & MSR_DR) ? true : false;
-       bool ir = (vcpu->arch.msr & MSR_IR) ? true : false;
+       bool dr = (vcpu->arch.shared->msr & MSR_DR) ? true : false;
+       bool ir = (vcpu->arch.shared->msr & MSR_IR) ? true : false;
        u64 vsid;
 
        relocated = data ? dr : ir;
@@ -558,12 +640,12 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
                pte.may_execute = true;
                pte.may_read = true;
                pte.may_write = true;
-               pte.raddr = eaddr & 0xffffffff;
+               pte.raddr = eaddr & KVM_PAM;
                pte.eaddr = eaddr;
                pte.vpage = eaddr >> 12;
        }
 
-       switch (vcpu->arch.msr & (MSR_DR|MSR_IR)) {
+       switch (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) {
        case 0:
                pte.vpage |= ((u64)VSID_REAL << (SID_SHIFT - 12));
                break;
@@ -571,7 +653,7 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
        case MSR_IR:
                vcpu->arch.mmu.esid_to_vsid(vcpu, eaddr >> SID_SHIFT, &vsid);
 
-               if ((vcpu->arch.msr & (MSR_DR|MSR_IR)) == MSR_DR)
+               if ((vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) == MSR_DR)
                        pte.vpage |= ((u64)VSID_REAL_DR << (SID_SHIFT - 12));
                else
                        pte.vpage |= ((u64)VSID_REAL_IR << (SID_SHIFT - 12));
@@ -594,20 +676,23 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 
        if (page_found == -ENOENT) {
                /* Page not found in guest PTE entries */
-               vcpu->arch.dear = kvmppc_get_fault_dar(vcpu);
-               to_book3s(vcpu)->dsisr = to_svcpu(vcpu)->fault_dsisr;
-               vcpu->arch.msr |= (to_svcpu(vcpu)->shadow_srr1 & 0x00000000f8000000ULL);
+               vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu);
+               vcpu->arch.shared->dsisr = to_svcpu(vcpu)->fault_dsisr;
+               vcpu->arch.shared->msr |=
+                       (to_svcpu(vcpu)->shadow_srr1 & 0x00000000f8000000ULL);
                kvmppc_book3s_queue_irqprio(vcpu, vec);
        } else if (page_found == -EPERM) {
                /* Storage protection */
-               vcpu->arch.dear = kvmppc_get_fault_dar(vcpu);
-               to_book3s(vcpu)->dsisr = to_svcpu(vcpu)->fault_dsisr & ~DSISR_NOHPTE;
-               to_book3s(vcpu)->dsisr |= DSISR_PROTFAULT;
-               vcpu->arch.msr |= (to_svcpu(vcpu)->shadow_srr1 & 0x00000000f8000000ULL);
+               vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu);
+               vcpu->arch.shared->dsisr =
+                       to_svcpu(vcpu)->fault_dsisr & ~DSISR_NOHPTE;
+               vcpu->arch.shared->dsisr |= DSISR_PROTFAULT;
+               vcpu->arch.shared->msr |=
+                       (to_svcpu(vcpu)->shadow_srr1 & 0x00000000f8000000ULL);
                kvmppc_book3s_queue_irqprio(vcpu, vec);
        } else if (page_found == -EINVAL) {
                /* Page not found in guest SLB */
-               vcpu->arch.dear = kvmppc_get_fault_dar(vcpu);
+               vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu);
                kvmppc_book3s_queue_irqprio(vcpu, vec + 0x80);
        } else if (!is_mmio &&
                   kvmppc_visible_gfn(vcpu, pte.raddr >> PAGE_SHIFT)) {
@@ -695,9 +780,11 @@ static int kvmppc_read_inst(struct kvm_vcpu *vcpu)
 
        ret = kvmppc_ld(vcpu, &srr0, sizeof(u32), &last_inst, false);
        if (ret == -ENOENT) {
-               vcpu->arch.msr = kvmppc_set_field(vcpu->arch.msr, 33, 33, 1);
-               vcpu->arch.msr = kvmppc_set_field(vcpu->arch.msr, 34, 36, 0);
-               vcpu->arch.msr = kvmppc_set_field(vcpu->arch.msr, 42, 47, 0);
+               ulong msr = vcpu->arch.shared->msr;
+
+               msr = kvmppc_set_field(msr, 33, 33, 1);
+               msr = kvmppc_set_field(msr, 34, 36, 0);
+               vcpu->arch.shared->msr = kvmppc_set_field(msr, 42, 47, 0);
                kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_INST_STORAGE);
                return EMULATE_AGAIN;
        }
@@ -736,7 +823,7 @@ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
        if (vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE)
                return RESUME_GUEST;
 
-       if (!(vcpu->arch.msr & msr)) {
+       if (!(vcpu->arch.shared->msr & msr)) {
                kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
                return RESUME_GUEST;
        }
@@ -796,16 +883,8 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 
        run->exit_reason = KVM_EXIT_UNKNOWN;
        run->ready_for_interrupt_injection = 1;
-#ifdef EXIT_DEBUG
-       printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | dar=0x%lx | dec=0x%x | msr=0x%lx\n",
-               exit_nr, kvmppc_get_pc(vcpu), kvmppc_get_fault_dar(vcpu),
-               kvmppc_get_dec(vcpu), to_svcpu(vcpu)->shadow_srr1);
-#elif defined (EXIT_DEBUG_SIMPLE)
-       if ((exit_nr != 0x900) && (exit_nr != 0x500))
-               printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | dar=0x%lx | msr=0x%lx\n",
-                       exit_nr, kvmppc_get_pc(vcpu), kvmppc_get_fault_dar(vcpu),
-                       vcpu->arch.msr);
-#endif
+
+       trace_kvm_book3s_exit(exit_nr, vcpu);
        kvm_resched(vcpu);
        switch (exit_nr) {
        case BOOK3S_INTERRUPT_INST_STORAGE:
@@ -836,9 +915,9 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
                        kvmppc_mmu_pte_flush(vcpu, kvmppc_get_pc(vcpu), ~0xFFFUL);
                        r = RESUME_GUEST;
                } else {
-                       vcpu->arch.msr |= to_svcpu(vcpu)->shadow_srr1 & 0x58000000;
+                       vcpu->arch.shared->msr |=
+                               to_svcpu(vcpu)->shadow_srr1 & 0x58000000;
                        kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
-                       kvmppc_mmu_pte_flush(vcpu, kvmppc_get_pc(vcpu), ~0xFFFUL);
                        r = RESUME_GUEST;
                }
                break;
@@ -861,17 +940,16 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
                if (to_svcpu(vcpu)->fault_dsisr & DSISR_NOHPTE) {
                        r = kvmppc_handle_pagefault(run, vcpu, dar, exit_nr);
                } else {
-                       vcpu->arch.dear = dar;
-                       to_book3s(vcpu)->dsisr = to_svcpu(vcpu)->fault_dsisr;
+                       vcpu->arch.shared->dar = dar;
+                       vcpu->arch.shared->dsisr = to_svcpu(vcpu)->fault_dsisr;
                        kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
-                       kvmppc_mmu_pte_flush(vcpu, vcpu->arch.dear, ~0xFFFUL);
                        r = RESUME_GUEST;
                }
                break;
        }
        case BOOK3S_INTERRUPT_DATA_SEGMENT:
                if (kvmppc_mmu_map_segment(vcpu, kvmppc_get_fault_dar(vcpu)) < 0) {
-                       vcpu->arch.dear = kvmppc_get_fault_dar(vcpu);
+                       vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu);
                        kvmppc_book3s_queue_irqprio(vcpu,
                                BOOK3S_INTERRUPT_DATA_SEGMENT);
                }
@@ -904,7 +982,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 program_interrupt:
                flags = to_svcpu(vcpu)->shadow_srr1 & 0x1f0000ull;
 
-               if (vcpu->arch.msr & MSR_PR) {
+               if (vcpu->arch.shared->msr & MSR_PR) {
 #ifdef EXIT_DEBUG
                        printk(KERN_INFO "Userspace triggered 0x700 exception at 0x%lx (0x%x)\n", kvmppc_get_pc(vcpu), kvmppc_get_last_inst(vcpu));
 #endif
@@ -941,10 +1019,10 @@ program_interrupt:
                break;
        }
        case BOOK3S_INTERRUPT_SYSCALL:
-               // XXX make user settable
                if (vcpu->arch.osi_enabled &&
                    (((u32)kvmppc_get_gpr(vcpu, 3)) == OSI_SC_MAGIC_R3) &&
                    (((u32)kvmppc_get_gpr(vcpu, 4)) == OSI_SC_MAGIC_R4)) {
+                       /* MOL hypercalls */
                        u64 *gprs = run->osi.gprs;
                        int i;
 
@@ -953,8 +1031,13 @@ program_interrupt:
                                gprs[i] = kvmppc_get_gpr(vcpu, i);
                        vcpu->arch.osi_needed = 1;
                        r = RESUME_HOST_NV;
-
+               } else if (!(vcpu->arch.shared->msr & MSR_PR) &&
+                   (((u32)kvmppc_get_gpr(vcpu, 0)) == KVM_SC_MAGIC_R0)) {
+                       /* KVM PV hypercalls */
+                       kvmppc_set_gpr(vcpu, 3, kvmppc_kvm_pv(vcpu));
+                       r = RESUME_GUEST;
                } else {
+                       /* Guest syscalls */
                        vcpu->stat.syscall_exits++;
                        kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
                        r = RESUME_GUEST;
@@ -989,9 +1072,9 @@ program_interrupt:
        }
        case BOOK3S_INTERRUPT_ALIGNMENT:
                if (kvmppc_read_inst(vcpu) == EMULATE_DONE) {
-                       to_book3s(vcpu)->dsisr = kvmppc_alignment_dsisr(vcpu,
+                       vcpu->arch.shared->dsisr = kvmppc_alignment_dsisr(vcpu,
                                kvmppc_get_last_inst(vcpu));
-                       vcpu->arch.dear = kvmppc_alignment_dar(vcpu,
+                       vcpu->arch.shared->dar = kvmppc_alignment_dar(vcpu,
                                kvmppc_get_last_inst(vcpu));
                        kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
                }
@@ -1031,9 +1114,7 @@ program_interrupt:
                }
        }
 
-#ifdef EXIT_DEBUG
-       printk(KERN_EMERG "KVM exit: vcpu=0x%p pc=0x%lx r=0x%x\n", vcpu, kvmppc_get_pc(vcpu), r);
-#endif
+       trace_kvm_book3s_reenter(r, vcpu);
 
        return r;
 }
@@ -1052,14 +1133,14 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
        regs->ctr = kvmppc_get_ctr(vcpu);
        regs->lr = kvmppc_get_lr(vcpu);
        regs->xer = kvmppc_get_xer(vcpu);
-       regs->msr = vcpu->arch.msr;
-       regs->srr0 = vcpu->arch.srr0;
-       regs->srr1 = vcpu->arch.srr1;
+       regs->msr = vcpu->arch.shared->msr;
+       regs->srr0 = vcpu->arch.shared->srr0;
+       regs->srr1 = vcpu->arch.shared->srr1;
        regs->pid = vcpu->arch.pid;
-       regs->sprg0 = vcpu->arch.sprg0;
-       regs->sprg1 = vcpu->arch.sprg1;
-       regs->sprg2 = vcpu->arch.sprg2;
-       regs->sprg3 = vcpu->arch.sprg3;
+       regs->sprg0 = vcpu->arch.shared->sprg0;
+       regs->sprg1 = vcpu->arch.shared->sprg1;
+       regs->sprg2 = vcpu->arch.shared->sprg2;
+       regs->sprg3 = vcpu->arch.shared->sprg3;
        regs->sprg5 = vcpu->arch.sprg4;
        regs->sprg6 = vcpu->arch.sprg5;
        regs->sprg7 = vcpu->arch.sprg6;
@@ -1080,12 +1161,12 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
        kvmppc_set_lr(vcpu, regs->lr);
        kvmppc_set_xer(vcpu, regs->xer);
        kvmppc_set_msr(vcpu, regs->msr);
-       vcpu->arch.srr0 = regs->srr0;
-       vcpu->arch.srr1 = regs->srr1;
-       vcpu->arch.sprg0 = regs->sprg0;
-       vcpu->arch.sprg1 = regs->sprg1;
-       vcpu->arch.sprg2 = regs->sprg2;
-       vcpu->arch.sprg3 = regs->sprg3;
+       vcpu->arch.shared->srr0 = regs->srr0;
+       vcpu->arch.shared->srr1 = regs->srr1;
+       vcpu->arch.shared->sprg0 = regs->sprg0;
+       vcpu->arch.shared->sprg1 = regs->sprg1;
+       vcpu->arch.shared->sprg2 = regs->sprg2;
+       vcpu->arch.shared->sprg3 = regs->sprg3;
        vcpu->arch.sprg5 = regs->sprg4;
        vcpu->arch.sprg6 = regs->sprg5;
        vcpu->arch.sprg7 = regs->sprg6;
@@ -1111,10 +1192,9 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
                        sregs->u.s.ppc64.slb[i].slbv = vcpu3s->slb[i].origv;
                }
        } else {
-               for (i = 0; i < 16; i++) {
-                       sregs->u.s.ppc32.sr[i] = vcpu3s->sr[i].raw;
-                       sregs->u.s.ppc32.sr[i] = vcpu3s->sr[i].raw;
-               }
+               for (i = 0; i < 16; i++)
+                       sregs->u.s.ppc32.sr[i] = vcpu->arch.shared->sr[i];
+
                for (i = 0; i < 8; i++) {
                        sregs->u.s.ppc32.ibat[i] = vcpu3s->ibat[i].raw;
                        sregs->u.s.ppc32.dbat[i] = vcpu3s->dbat[i].raw;
@@ -1225,6 +1305,7 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
        struct kvmppc_vcpu_book3s *vcpu_book3s;
        struct kvm_vcpu *vcpu;
        int err = -ENOMEM;
+       unsigned long p;
 
        vcpu_book3s = vmalloc(sizeof(struct kvmppc_vcpu_book3s));
        if (!vcpu_book3s)
@@ -1242,6 +1323,12 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
        if (err)
                goto free_shadow_vcpu;
 
+       p = __get_free_page(GFP_KERNEL|__GFP_ZERO);
+       /* the real shared page fills the last 4k of our page */
+       vcpu->arch.shared = (void*)(p + PAGE_SIZE - 4096);
+       if (!p)
+               goto uninit_vcpu;
+
        vcpu->arch.host_retip = kvm_return_point;
        vcpu->arch.host_msr = mfmsr();
 #ifdef CONFIG_PPC_BOOK3S_64
@@ -1268,10 +1355,12 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
 
        err = kvmppc_mmu_init(vcpu);
        if (err < 0)
-               goto free_shadow_vcpu;
+               goto uninit_vcpu;
 
        return vcpu;
 
+uninit_vcpu:
+       kvm_vcpu_uninit(vcpu);
 free_shadow_vcpu:
        kfree(vcpu_book3s->shadow_vcpu);
 free_vcpu:
@@ -1284,6 +1373,7 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
 {
        struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
 
+       free_page((unsigned long)vcpu->arch.shared & PAGE_MASK);
        kvm_vcpu_uninit(vcpu);
        kfree(vcpu_book3s->shadow_vcpu);
        vfree(vcpu_book3s);
@@ -1346,7 +1436,7 @@ int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
        local_irq_enable();
 
        /* Preload FPU if it's enabled */
-       if (vcpu->arch.msr & MSR_FP)
+       if (vcpu->arch.shared->msr & MSR_FP)
                kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP);
 
        ret = __kvmppc_vcpu_entry(kvm_run, vcpu);