Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
[pandora-kernel.git] / arch / powerpc / kvm / book3s_pr.c
index c0b48f9..fe14ca3 100644 (file)
 #include <linux/sched.h>
 #include <linux/vmalloc.h>
 #include <linux/highmem.h>
+#include <linux/module.h>
 
-#include "trace.h"
+#include "book3s.h"
+
+#define CREATE_TRACE_POINTS
+#include "trace_pr.h"
 
 /* #define EXIT_DEBUG */
 /* #define DEBUG_EXT */
@@ -56,29 +60,25 @@ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
 #define HW_PAGE_SIZE PAGE_SIZE
 #endif
 
-void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+static void kvmppc_core_vcpu_load_pr(struct kvm_vcpu *vcpu, int cpu)
 {
 #ifdef CONFIG_PPC_BOOK3S_64
        struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
        memcpy(svcpu->slb, to_book3s(vcpu)->slb_shadow, sizeof(svcpu->slb));
-       memcpy(&get_paca()->shadow_vcpu, to_book3s(vcpu)->shadow_vcpu,
-              sizeof(get_paca()->shadow_vcpu));
        svcpu->slb_max = to_book3s(vcpu)->slb_shadow_max;
        svcpu_put(svcpu);
 #endif
        vcpu->cpu = smp_processor_id();
 #ifdef CONFIG_PPC_BOOK3S_32
-       current->thread.kvm_shadow_vcpu = to_book3s(vcpu)->shadow_vcpu;
+       current->thread.kvm_shadow_vcpu = vcpu->arch.shadow_vcpu;
 #endif
 }
 
-void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
+static void kvmppc_core_vcpu_put_pr(struct kvm_vcpu *vcpu)
 {
 #ifdef CONFIG_PPC_BOOK3S_64
        struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
        memcpy(to_book3s(vcpu)->slb_shadow, svcpu->slb, sizeof(svcpu->slb));
-       memcpy(to_book3s(vcpu)->shadow_vcpu, &get_paca()->shadow_vcpu,
-              sizeof(get_paca()->shadow_vcpu));
        to_book3s(vcpu)->slb_shadow_max = svcpu->slb_max;
        svcpu_put(svcpu);
 #endif
@@ -87,7 +87,61 @@ void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
        vcpu->cpu = -1;
 }
 
-int kvmppc_core_check_requests(struct kvm_vcpu *vcpu)
+/* Copy data needed by real-mode code from vcpu to shadow vcpu */
+void kvmppc_copy_to_svcpu(struct kvmppc_book3s_shadow_vcpu *svcpu,
+                         struct kvm_vcpu *vcpu)
+{
+       svcpu->gpr[0] = vcpu->arch.gpr[0];
+       svcpu->gpr[1] = vcpu->arch.gpr[1];
+       svcpu->gpr[2] = vcpu->arch.gpr[2];
+       svcpu->gpr[3] = vcpu->arch.gpr[3];
+       svcpu->gpr[4] = vcpu->arch.gpr[4];
+       svcpu->gpr[5] = vcpu->arch.gpr[5];
+       svcpu->gpr[6] = vcpu->arch.gpr[6];
+       svcpu->gpr[7] = vcpu->arch.gpr[7];
+       svcpu->gpr[8] = vcpu->arch.gpr[8];
+       svcpu->gpr[9] = vcpu->arch.gpr[9];
+       svcpu->gpr[10] = vcpu->arch.gpr[10];
+       svcpu->gpr[11] = vcpu->arch.gpr[11];
+       svcpu->gpr[12] = vcpu->arch.gpr[12];
+       svcpu->gpr[13] = vcpu->arch.gpr[13];
+       svcpu->cr  = vcpu->arch.cr;
+       svcpu->xer = vcpu->arch.xer;
+       svcpu->ctr = vcpu->arch.ctr;
+       svcpu->lr  = vcpu->arch.lr;
+       svcpu->pc  = vcpu->arch.pc;
+}
+
+/* Copy data touched by real-mode code from shadow vcpu back to vcpu */
+void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu,
+                           struct kvmppc_book3s_shadow_vcpu *svcpu)
+{
+       vcpu->arch.gpr[0] = svcpu->gpr[0];
+       vcpu->arch.gpr[1] = svcpu->gpr[1];
+       vcpu->arch.gpr[2] = svcpu->gpr[2];
+       vcpu->arch.gpr[3] = svcpu->gpr[3];
+       vcpu->arch.gpr[4] = svcpu->gpr[4];
+       vcpu->arch.gpr[5] = svcpu->gpr[5];
+       vcpu->arch.gpr[6] = svcpu->gpr[6];
+       vcpu->arch.gpr[7] = svcpu->gpr[7];
+       vcpu->arch.gpr[8] = svcpu->gpr[8];
+       vcpu->arch.gpr[9] = svcpu->gpr[9];
+       vcpu->arch.gpr[10] = svcpu->gpr[10];
+       vcpu->arch.gpr[11] = svcpu->gpr[11];
+       vcpu->arch.gpr[12] = svcpu->gpr[12];
+       vcpu->arch.gpr[13] = svcpu->gpr[13];
+       vcpu->arch.cr  = svcpu->cr;
+       vcpu->arch.xer = svcpu->xer;
+       vcpu->arch.ctr = svcpu->ctr;
+       vcpu->arch.lr  = svcpu->lr;
+       vcpu->arch.pc  = svcpu->pc;
+       vcpu->arch.shadow_srr1 = svcpu->shadow_srr1;
+       vcpu->arch.fault_dar   = svcpu->fault_dar;
+       vcpu->arch.fault_dsisr = svcpu->fault_dsisr;
+       vcpu->arch.last_inst   = svcpu->last_inst;
+}
+
+static int kvmppc_core_check_requests_pr(struct kvm_vcpu *vcpu)
 {
        int r = 1; /* Indicate we want to get back into the guest */
 
@@ -100,44 +154,69 @@ int kvmppc_core_check_requests(struct kvm_vcpu *vcpu)
 }
 
 /************* MMU Notifiers *************/
+static void do_kvm_unmap_hva(struct kvm *kvm, unsigned long start,
+                            unsigned long end)
+{
+       long i;
+       struct kvm_vcpu *vcpu;
+       struct kvm_memslots *slots;
+       struct kvm_memory_slot *memslot;
+
+       slots = kvm_memslots(kvm);
+       kvm_for_each_memslot(memslot, slots) {
+               unsigned long hva_start, hva_end;
+               gfn_t gfn, gfn_end;
+
+               hva_start = max(start, memslot->userspace_addr);
+               hva_end = min(end, memslot->userspace_addr +
+                                       (memslot->npages << PAGE_SHIFT));
+               if (hva_start >= hva_end)
+                       continue;
+               /*
+                * {gfn(page) | page intersects with [hva_start, hva_end)} =
+                * {gfn, gfn+1, ..., gfn_end-1}.
+                */
+               gfn = hva_to_gfn_memslot(hva_start, memslot);
+               gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot);
+               kvm_for_each_vcpu(i, vcpu, kvm)
+                       kvmppc_mmu_pte_pflush(vcpu, gfn << PAGE_SHIFT,
+                                             gfn_end << PAGE_SHIFT);
+       }
+}
 
-int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
+static int kvm_unmap_hva_pr(struct kvm *kvm, unsigned long hva)
 {
        trace_kvm_unmap_hva(hva);
 
-       /*
-        * Flush all shadow tlb entries everywhere. This is slow, but
-        * we are 100% sure that we catch the to be unmapped page
-        */
-       kvm_flush_remote_tlbs(kvm);
+       do_kvm_unmap_hva(kvm, hva, hva + PAGE_SIZE);
 
        return 0;
 }
 
-int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
+static int kvm_unmap_hva_range_pr(struct kvm *kvm, unsigned long start,
+                                 unsigned long end)
 {
-       /* kvm_unmap_hva flushes everything anyways */
-       kvm_unmap_hva(kvm, start);
+       do_kvm_unmap_hva(kvm, start, end);
 
        return 0;
 }
 
-int kvm_age_hva(struct kvm *kvm, unsigned long hva)
+static int kvm_age_hva_pr(struct kvm *kvm, unsigned long hva)
 {
        /* XXX could be more clever ;) */
        return 0;
 }
 
-int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
+static int kvm_test_age_hva_pr(struct kvm *kvm, unsigned long hva)
 {
        /* XXX could be more clever ;) */
        return 0;
 }
 
-void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
+static void kvm_set_spte_hva_pr(struct kvm *kvm, unsigned long hva, pte_t pte)
 {
        /* The page will get remapped properly on its next fault */
-       kvm_unmap_hva(kvm, hva);
+       do_kvm_unmap_hva(kvm, hva, hva + PAGE_SIZE);
 }
 
 /*****************************************/
@@ -159,7 +238,7 @@ static void kvmppc_recalc_shadow_msr(struct kvm_vcpu *vcpu)
        vcpu->arch.shadow_msr = smsr;
 }
 
-void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr)
+static void kvmppc_set_msr_pr(struct kvm_vcpu *vcpu, u64 msr)
 {
        ulong old_msr = vcpu->arch.shared->msr;
 
@@ -219,7 +298,7 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr)
                kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP);
 }
 
-void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr)
+void kvmppc_set_pvr_pr(struct kvm_vcpu *vcpu, u32 pvr)
 {
        u32 host_pvr;
 
@@ -256,6 +335,23 @@ void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr)
        if (!strcmp(cur_cpu_spec->platform, "ppc-cell-be"))
                to_book3s(vcpu)->msr_mask &= ~(MSR_FE0 | MSR_FE1);
 
+       /*
+        * If they're asking for POWER6 or later, set the flag
+        * indicating that we can do multiple large page sizes
+        * and 1TB segments.
+        * Also set the flag that indicates that tlbie has the large
+        * page bit in the RB operand instead of the instruction.
+        */
+       switch (PVR_VER(pvr)) {
+       case PVR_POWER6:
+       case PVR_POWER7:
+       case PVR_POWER7p:
+       case PVR_POWER8:
+               vcpu->arch.hflags |= BOOK3S_HFLAG_MULTI_PGSIZE |
+                       BOOK3S_HFLAG_NEW_TLBIE;
+               break;
+       }
+
 #ifdef CONFIG_PPC_BOOK3S_32
        /* 32 bit Book3S always has 32 byte dcbz */
        vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32;
@@ -334,6 +430,7 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
                            ulong eaddr, int vec)
 {
        bool data = (vec == BOOK3S_INTERRUPT_DATA_STORAGE);
+       bool iswrite = false;
        int r = RESUME_GUEST;
        int relocated;
        int page_found = 0;
@@ -344,10 +441,12 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
        u64 vsid;
 
        relocated = data ? dr : ir;
+       if (data && (vcpu->arch.fault_dsisr & DSISR_ISSTORE))
+               iswrite = true;
 
        /* Resolve real address if translation turned on */
        if (relocated) {
-               page_found = vcpu->arch.mmu.xlate(vcpu, eaddr, &pte, data);
+               page_found = vcpu->arch.mmu.xlate(vcpu, eaddr, &pte, data, iswrite);
        } else {
                pte.may_execute = true;
                pte.may_read = true;
@@ -355,6 +454,7 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
                pte.raddr = eaddr & KVM_PAM;
                pte.eaddr = eaddr;
                pte.vpage = eaddr >> 12;
+               pte.page_size = MMU_PAGE_64K;
        }
 
        switch (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) {
@@ -388,22 +488,18 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 
        if (page_found == -ENOENT) {
                /* Page not found in guest PTE entries */
-               struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
                vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu);
-               vcpu->arch.shared->dsisr = svcpu->fault_dsisr;
+               vcpu->arch.shared->dsisr = vcpu->arch.fault_dsisr;
                vcpu->arch.shared->msr |=
-                       (svcpu->shadow_srr1 & 0x00000000f8000000ULL);
-               svcpu_put(svcpu);
+                       vcpu->arch.shadow_srr1 & 0x00000000f8000000ULL;
                kvmppc_book3s_queue_irqprio(vcpu, vec);
        } else if (page_found == -EPERM) {
                /* Storage protection */
-               struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
                vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu);
-               vcpu->arch.shared->dsisr = svcpu->fault_dsisr & ~DSISR_NOHPTE;
+               vcpu->arch.shared->dsisr = vcpu->arch.fault_dsisr & ~DSISR_NOHPTE;
                vcpu->arch.shared->dsisr |= DSISR_PROTFAULT;
                vcpu->arch.shared->msr |=
-                       svcpu->shadow_srr1 & 0x00000000f8000000ULL;
-               svcpu_put(svcpu);
+                       vcpu->arch.shadow_srr1 & 0x00000000f8000000ULL;
                kvmppc_book3s_queue_irqprio(vcpu, vec);
        } else if (page_found == -EINVAL) {
                /* Page not found in guest SLB */
@@ -411,12 +507,20 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
                kvmppc_book3s_queue_irqprio(vcpu, vec + 0x80);
        } else if (!is_mmio &&
                   kvmppc_visible_gfn(vcpu, pte.raddr >> PAGE_SHIFT)) {
+               if (data && !(vcpu->arch.fault_dsisr & DSISR_NOHPTE)) {
+                       /*
+                        * There is already a host HPTE there, presumably
+                        * a read-only one for a page the guest thinks
+                        * is writable, so get rid of it first.
+                        */
+                       kvmppc_mmu_unmap_page(vcpu, &pte);
+               }
                /* The guest's PTE is not mapped yet. Map on the host */
-               kvmppc_mmu_map_page(vcpu, &pte);
+               kvmppc_mmu_map_page(vcpu, &pte, iswrite);
                if (data)
                        vcpu->stat.sp_storage++;
                else if (vcpu->arch.mmu.is_dcbz32(vcpu) &&
-                       (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32)))
+                        (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32)))
                        kvmppc_patch_dcbz(vcpu, &pte);
        } else {
                /* MMIO */
@@ -619,13 +723,15 @@ static void kvmppc_handle_lost_ext(struct kvm_vcpu *vcpu)
 
        if (lost_ext & MSR_FP)
                kvmppc_load_up_fpu();
+#ifdef CONFIG_ALTIVEC
        if (lost_ext & MSR_VEC)
                kvmppc_load_up_altivec();
+#endif
        current->thread.regs->msr |= lost_ext;
 }
 
-int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
-                       unsigned int exit_nr)
+int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
+                         unsigned int exit_nr)
 {
        int r = RESUME_HOST;
        int s;
@@ -643,25 +749,32 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
        switch (exit_nr) {
        case BOOK3S_INTERRUPT_INST_STORAGE:
        {
-               struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
-               ulong shadow_srr1 = svcpu->shadow_srr1;
+               ulong shadow_srr1 = vcpu->arch.shadow_srr1;
                vcpu->stat.pf_instruc++;
 
 #ifdef CONFIG_PPC_BOOK3S_32
                /* We set segments as unused segments when invalidating them. So
                 * treat the respective fault as segment fault. */
-               if (svcpu->sr[kvmppc_get_pc(vcpu) >> SID_SHIFT] == SR_INVALID) {
-                       kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu));
-                       r = RESUME_GUEST;
+               {
+                       struct kvmppc_book3s_shadow_vcpu *svcpu;
+                       u32 sr;
+
+                       svcpu = svcpu_get(vcpu);
+                       sr = svcpu->sr[kvmppc_get_pc(vcpu) >> SID_SHIFT];
                        svcpu_put(svcpu);
-                       break;
+                       if (sr == SR_INVALID) {
+                               kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu));
+                               r = RESUME_GUEST;
+                               break;
+                       }
                }
 #endif
-               svcpu_put(svcpu);
 
                /* only care about PTEG not found errors, but leave NX alone */
                if (shadow_srr1 & 0x40000000) {
+                       int idx = srcu_read_lock(&vcpu->kvm->srcu);
                        r = kvmppc_handle_pagefault(run, vcpu, kvmppc_get_pc(vcpu), exit_nr);
+                       srcu_read_unlock(&vcpu->kvm->srcu, idx);
                        vcpu->stat.sp_instruc++;
                } else if (vcpu->arch.mmu.is_dcbz32(vcpu) &&
                          (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32))) {
@@ -682,25 +795,36 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
        case BOOK3S_INTERRUPT_DATA_STORAGE:
        {
                ulong dar = kvmppc_get_fault_dar(vcpu);
-               struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
-               u32 fault_dsisr = svcpu->fault_dsisr;
+               u32 fault_dsisr = vcpu->arch.fault_dsisr;
                vcpu->stat.pf_storage++;
 
 #ifdef CONFIG_PPC_BOOK3S_32
                /* We set segments as unused segments when invalidating them. So
                 * treat the respective fault as segment fault. */
-               if ((svcpu->sr[dar >> SID_SHIFT]) == SR_INVALID) {
-                       kvmppc_mmu_map_segment(vcpu, dar);
-                       r = RESUME_GUEST;
+               {
+                       struct kvmppc_book3s_shadow_vcpu *svcpu;
+                       u32 sr;
+
+                       svcpu = svcpu_get(vcpu);
+                       sr = svcpu->sr[dar >> SID_SHIFT];
                        svcpu_put(svcpu);
-                       break;
+                       if (sr == SR_INVALID) {
+                               kvmppc_mmu_map_segment(vcpu, dar);
+                               r = RESUME_GUEST;
+                               break;
+                       }
                }
 #endif
-               svcpu_put(svcpu);
 
-               /* The only case we need to handle is missing shadow PTEs */
-               if (fault_dsisr & DSISR_NOHPTE) {
+               /*
+                * We need to handle missing shadow PTEs, and
+                * protection faults due to us mapping a page read-only
+                * when the guest thinks it is writable.
+                */
+               if (fault_dsisr & (DSISR_NOHPTE | DSISR_PROTFAULT)) {
+                       int idx = srcu_read_lock(&vcpu->kvm->srcu);
                        r = kvmppc_handle_pagefault(run, vcpu, dar, exit_nr);
+                       srcu_read_unlock(&vcpu->kvm->srcu, idx);
                } else {
                        vcpu->arch.shared->dar = dar;
                        vcpu->arch.shared->dsisr = fault_dsisr;
@@ -743,13 +867,10 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
        case BOOK3S_INTERRUPT_H_EMUL_ASSIST:
        {
                enum emulation_result er;
-               struct kvmppc_book3s_shadow_vcpu *svcpu;
                ulong flags;
 
 program_interrupt:
-               svcpu = svcpu_get(vcpu);
-               flags = svcpu->shadow_srr1 & 0x1f0000ull;
-               svcpu_put(svcpu);
+               flags = vcpu->arch.shadow_srr1 & 0x1f0000ull;
 
                if (vcpu->arch.shared->msr & MSR_PR) {
 #ifdef EXIT_DEBUG
@@ -798,7 +919,7 @@ program_interrupt:
                        ulong cmd = kvmppc_get_gpr(vcpu, 3);
                        int i;
 
-#ifdef CONFIG_KVM_BOOK3S_64_PR
+#ifdef CONFIG_PPC_BOOK3S_64
                        if (kvmppc_h_pr(vcpu, cmd) == EMULATE_DONE) {
                                r = RESUME_GUEST;
                                break;
@@ -881,9 +1002,7 @@ program_interrupt:
                break;
        default:
        {
-               struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
-               ulong shadow_srr1 = svcpu->shadow_srr1;
-               svcpu_put(svcpu);
+               ulong shadow_srr1 = vcpu->arch.shadow_srr1;
                /* Ugh - bork here! What did we get? */
                printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | msr=0x%lx\n",
                        exit_nr, kvmppc_get_pc(vcpu), shadow_srr1);
@@ -920,8 +1039,8 @@ program_interrupt:
        return r;
 }
 
-int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
-                                  struct kvm_sregs *sregs)
+static int kvm_arch_vcpu_ioctl_get_sregs_pr(struct kvm_vcpu *vcpu,
+                                           struct kvm_sregs *sregs)
 {
        struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
        int i;
@@ -947,13 +1066,13 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
        return 0;
 }
 
-int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
-                                  struct kvm_sregs *sregs)
+static int kvm_arch_vcpu_ioctl_set_sregs_pr(struct kvm_vcpu *vcpu,
+                                           struct kvm_sregs *sregs)
 {
        struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
        int i;
 
-       kvmppc_set_pvr(vcpu, sregs->pvr);
+       kvmppc_set_pvr_pr(vcpu, sregs->pvr);
 
        vcpu3s->sdr1 = sregs->u.s.sdr1;
        if (vcpu->arch.hflags & BOOK3S_HFLAG_SLB) {
@@ -983,7 +1102,8 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
        return 0;
 }
 
-int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)
+static int kvmppc_get_one_reg_pr(struct kvm_vcpu *vcpu, u64 id,
+                                union kvmppc_one_reg *val)
 {
        int r = 0;
 
@@ -1012,7 +1132,8 @@ int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)
        return r;
 }
 
-int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)
+static int kvmppc_set_one_reg_pr(struct kvm_vcpu *vcpu, u64 id,
+                                union kvmppc_one_reg *val)
 {
        int r = 0;
 
@@ -1042,28 +1163,30 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)
        return r;
 }
 
-int kvmppc_core_check_processor_compat(void)
-{
-       return 0;
-}
-
-struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
+static struct kvm_vcpu *kvmppc_core_vcpu_create_pr(struct kvm *kvm,
+                                                  unsigned int id)
 {
        struct kvmppc_vcpu_book3s *vcpu_book3s;
        struct kvm_vcpu *vcpu;
        int err = -ENOMEM;
        unsigned long p;
 
-       vcpu_book3s = vzalloc(sizeof(struct kvmppc_vcpu_book3s));
-       if (!vcpu_book3s)
+       vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
+       if (!vcpu)
                goto out;
 
-       vcpu_book3s->shadow_vcpu =
-               kzalloc(sizeof(*vcpu_book3s->shadow_vcpu), GFP_KERNEL);
-       if (!vcpu_book3s->shadow_vcpu)
+       vcpu_book3s = vzalloc(sizeof(struct kvmppc_vcpu_book3s));
+       if (!vcpu_book3s)
                goto free_vcpu;
+       vcpu->arch.book3s = vcpu_book3s;
+
+#ifdef CONFIG_KVM_BOOK3S_32
+       vcpu->arch.shadow_vcpu =
+               kzalloc(sizeof(*vcpu->arch.shadow_vcpu), GFP_KERNEL);
+       if (!vcpu->arch.shadow_vcpu)
+               goto free_vcpu3s;
+#endif
 
-       vcpu = &vcpu_book3s->vcpu;
        err = kvm_vcpu_init(vcpu, kvm, id);
        if (err)
                goto free_shadow_vcpu;
@@ -1076,13 +1199,19 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
        vcpu->arch.shared = (void *)(p + PAGE_SIZE - 4096);
 
 #ifdef CONFIG_PPC_BOOK3S_64
-       /* default to book3s_64 (970fx) */
+       /*
+        * Default to the same as the host if we're on sufficiently
+        * recent machine that we have 1TB segments;
+        * otherwise default to PPC970FX.
+        */
        vcpu->arch.pvr = 0x3C0301;
+       if (mmu_has_feature(MMU_FTR_1T_SEGMENT))
+               vcpu->arch.pvr = mfspr(SPRN_PVR);
 #else
        /* default to book3s_32 (750) */
        vcpu->arch.pvr = 0x84202;
 #endif
-       kvmppc_set_pvr(vcpu, vcpu->arch.pvr);
+       kvmppc_set_pvr_pr(vcpu, vcpu->arch.pvr);
        vcpu->arch.slb_nr = 64;
 
        vcpu->arch.shadow_msr = MSR_USER64;
@@ -1096,24 +1225,31 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
 uninit_vcpu:
        kvm_vcpu_uninit(vcpu);
 free_shadow_vcpu:
-       kfree(vcpu_book3s->shadow_vcpu);
-free_vcpu:
+#ifdef CONFIG_KVM_BOOK3S_32
+       kfree(vcpu->arch.shadow_vcpu);
+free_vcpu3s:
+#endif
        vfree(vcpu_book3s);
+free_vcpu:
+       kmem_cache_free(kvm_vcpu_cache, vcpu);
 out:
        return ERR_PTR(err);
 }
 
-void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
+static void kvmppc_core_vcpu_free_pr(struct kvm_vcpu *vcpu)
 {
        struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
 
        free_page((unsigned long)vcpu->arch.shared & PAGE_MASK);
        kvm_vcpu_uninit(vcpu);
-       kfree(vcpu_book3s->shadow_vcpu);
+#ifdef CONFIG_KVM_BOOK3S_32
+       kfree(vcpu->arch.shadow_vcpu);
+#endif
        vfree(vcpu_book3s);
+       kmem_cache_free(kvm_vcpu_cache, vcpu);
 }
 
-int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
+static int kvmppc_vcpu_run_pr(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 {
        int ret;
        struct thread_fp_state fp;
@@ -1216,8 +1352,8 @@ out:
 /*
  * Get (and clear) the dirty memory log for a memory slot.
  */
-int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
-                                     struct kvm_dirty_log *log)
+static int kvm_vm_ioctl_get_dirty_log_pr(struct kvm *kvm,
+                                        struct kvm_dirty_log *log)
 {
        struct kvm_memory_slot *memslot;
        struct kvm_vcpu *vcpu;
@@ -1252,67 +1388,100 @@ out:
        return r;
 }
 
-#ifdef CONFIG_PPC64
-int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm, struct kvm_ppc_smmu_info *info)
+static void kvmppc_core_flush_memslot_pr(struct kvm *kvm,
+                                        struct kvm_memory_slot *memslot)
 {
-       info->flags = KVM_PPC_1T_SEGMENTS;
-
-       /* SLB is always 64 entries */
-       info->slb_size = 64;
-
-       /* Standard 4k base page size segment */
-       info->sps[0].page_shift = 12;
-       info->sps[0].slb_enc = 0;
-       info->sps[0].enc[0].page_shift = 12;
-       info->sps[0].enc[0].pte_enc = 0;
-
-       /* Standard 16M large page size segment */
-       info->sps[1].page_shift = 24;
-       info->sps[1].slb_enc = SLB_VSID_L;
-       info->sps[1].enc[0].page_shift = 24;
-       info->sps[1].enc[0].pte_enc = 0;
+       return;
+}
 
+static int kvmppc_core_prepare_memory_region_pr(struct kvm *kvm,
+                                       struct kvm_memory_slot *memslot,
+                                       struct kvm_userspace_memory_region *mem)
+{
        return 0;
 }
-#endif /* CONFIG_PPC64 */
 
-void kvmppc_core_free_memslot(struct kvm_memory_slot *free,
-                             struct kvm_memory_slot *dont)
+static void kvmppc_core_commit_memory_region_pr(struct kvm *kvm,
+                               struct kvm_userspace_memory_region *mem,
+                               const struct kvm_memory_slot *old)
 {
+       return;
 }
 
-int kvmppc_core_create_memslot(struct kvm_memory_slot *slot,
-                              unsigned long npages)
+static void kvmppc_core_free_memslot_pr(struct kvm_memory_slot *free,
+                                       struct kvm_memory_slot *dont)
 {
-       return 0;
+       return;
 }
 
-int kvmppc_core_prepare_memory_region(struct kvm *kvm,
-                                     struct kvm_memory_slot *memslot,
-                                     struct kvm_userspace_memory_region *mem)
+static int kvmppc_core_create_memslot_pr(struct kvm_memory_slot *slot,
+                                        unsigned long npages)
 {
        return 0;
 }
 
-void kvmppc_core_commit_memory_region(struct kvm *kvm,
-                               struct kvm_userspace_memory_region *mem,
-                               const struct kvm_memory_slot *old)
+
+#ifdef CONFIG_PPC64
+static int kvm_vm_ioctl_get_smmu_info_pr(struct kvm *kvm,
+                                        struct kvm_ppc_smmu_info *info)
 {
-}
+       long int i;
+       struct kvm_vcpu *vcpu;
+
+       info->flags = 0;
+
+       /* SLB is always 64 entries */
+       info->slb_size = 64;
+
+       /* Standard 4k base page size segment */
+       info->sps[0].page_shift = 12;
+       info->sps[0].slb_enc = 0;
+       info->sps[0].enc[0].page_shift = 12;
+       info->sps[0].enc[0].pte_enc = 0;
+
+       /*
+        * 64k large page size.
+        * We only want to put this in if the CPUs we're emulating
+        * support it, but unfortunately we don't have a vcpu easily
+        * to hand here to test.  Just pick the first vcpu, and if
+        * that doesn't exist yet, report the minimum capability,
+        * i.e., no 64k pages.
+        * 1T segment support goes along with 64k pages.
+        */
+       i = 1;
+       vcpu = kvm_get_vcpu(kvm, 0);
+       if (vcpu && (vcpu->arch.hflags & BOOK3S_HFLAG_MULTI_PGSIZE)) {
+               info->flags = KVM_PPC_1T_SEGMENTS;
+               info->sps[i].page_shift = 16;
+               info->sps[i].slb_enc = SLB_VSID_L | SLB_VSID_LP_01;
+               info->sps[i].enc[0].page_shift = 16;
+               info->sps[i].enc[0].pte_enc = 1;
+               ++i;
+       }
+
+       /* Standard 16M large page size segment */
+       info->sps[i].page_shift = 24;
+       info->sps[i].slb_enc = SLB_VSID_L;
+       info->sps[i].enc[0].page_shift = 24;
+       info->sps[i].enc[0].pte_enc = 0;
 
-void kvmppc_core_flush_memslot(struct kvm *kvm, struct kvm_memory_slot *memslot)
+       return 0;
+}
+#else
+static int kvm_vm_ioctl_get_smmu_info_pr(struct kvm *kvm,
+                                        struct kvm_ppc_smmu_info *info)
 {
+       /* We should not get called */
+       BUG();
 }
+#endif /* CONFIG_PPC64 */
 
 static unsigned int kvm_global_user_count = 0;
 static DEFINE_SPINLOCK(kvm_global_user_count_lock);
 
-int kvmppc_core_init_vm(struct kvm *kvm)
+static int kvmppc_core_init_vm_pr(struct kvm *kvm)
 {
-#ifdef CONFIG_PPC64
-       INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables);
-       INIT_LIST_HEAD(&kvm->arch.rtas_tokens);
-#endif
+       mutex_init(&kvm->arch.hpt_mutex);
 
        if (firmware_has_feature(FW_FEATURE_SET_MODE)) {
                spin_lock(&kvm_global_user_count_lock);
@@ -1323,7 +1492,7 @@ int kvmppc_core_init_vm(struct kvm *kvm)
        return 0;
 }
 
-void kvmppc_core_destroy_vm(struct kvm *kvm)
+static void kvmppc_core_destroy_vm_pr(struct kvm *kvm)
 {
 #ifdef CONFIG_PPC64
        WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables));
@@ -1338,26 +1507,81 @@ void kvmppc_core_destroy_vm(struct kvm *kvm)
        }
 }
 
-static int kvmppc_book3s_init(void)
+static int kvmppc_core_check_processor_compat_pr(void)
 {
-       int r;
+       /* we are always compatible */
+       return 0;
+}
 
-       r = kvm_init(NULL, sizeof(struct kvmppc_vcpu_book3s), 0,
-                    THIS_MODULE);
+static long kvm_arch_vm_ioctl_pr(struct file *filp,
+                                unsigned int ioctl, unsigned long arg)
+{
+       return -ENOTTY;
+}
 
-       if (r)
+static struct kvmppc_ops kvm_ops_pr = {
+       .get_sregs = kvm_arch_vcpu_ioctl_get_sregs_pr,
+       .set_sregs = kvm_arch_vcpu_ioctl_set_sregs_pr,
+       .get_one_reg = kvmppc_get_one_reg_pr,
+       .set_one_reg = kvmppc_set_one_reg_pr,
+       .vcpu_load   = kvmppc_core_vcpu_load_pr,
+       .vcpu_put    = kvmppc_core_vcpu_put_pr,
+       .set_msr     = kvmppc_set_msr_pr,
+       .vcpu_run    = kvmppc_vcpu_run_pr,
+       .vcpu_create = kvmppc_core_vcpu_create_pr,
+       .vcpu_free   = kvmppc_core_vcpu_free_pr,
+       .check_requests = kvmppc_core_check_requests_pr,
+       .get_dirty_log = kvm_vm_ioctl_get_dirty_log_pr,
+       .flush_memslot = kvmppc_core_flush_memslot_pr,
+       .prepare_memory_region = kvmppc_core_prepare_memory_region_pr,
+       .commit_memory_region = kvmppc_core_commit_memory_region_pr,
+       .unmap_hva = kvm_unmap_hva_pr,
+       .unmap_hva_range = kvm_unmap_hva_range_pr,
+       .age_hva  = kvm_age_hva_pr,
+       .test_age_hva = kvm_test_age_hva_pr,
+       .set_spte_hva = kvm_set_spte_hva_pr,
+       .mmu_destroy  = kvmppc_mmu_destroy_pr,
+       .free_memslot = kvmppc_core_free_memslot_pr,
+       .create_memslot = kvmppc_core_create_memslot_pr,
+       .init_vm = kvmppc_core_init_vm_pr,
+       .destroy_vm = kvmppc_core_destroy_vm_pr,
+       .get_smmu_info = kvm_vm_ioctl_get_smmu_info_pr,
+       .emulate_op = kvmppc_core_emulate_op_pr,
+       .emulate_mtspr = kvmppc_core_emulate_mtspr_pr,
+       .emulate_mfspr = kvmppc_core_emulate_mfspr_pr,
+       .fast_vcpu_kick = kvm_vcpu_kick,
+       .arch_vm_ioctl  = kvm_arch_vm_ioctl_pr,
+};
+
+
+int kvmppc_book3s_init_pr(void)
+{
+       int r;
+
+       r = kvmppc_core_check_processor_compat_pr();
+       if (r < 0)
                return r;
 
-       r = kvmppc_mmu_hpte_sysinit();
+       kvm_ops_pr.owner = THIS_MODULE;
+       kvmppc_pr_ops = &kvm_ops_pr;
 
+       r = kvmppc_mmu_hpte_sysinit();
        return r;
 }
 
-static void kvmppc_book3s_exit(void)
+void kvmppc_book3s_exit_pr(void)
 {
+       kvmppc_pr_ops = NULL;
        kvmppc_mmu_hpte_sysexit();
-       kvm_exit();
 }
 
-module_init(kvmppc_book3s_init);
-module_exit(kvmppc_book3s_exit);
+/*
+ * We only support separate modules for book3s 64
+ */
+#ifdef CONFIG_PPC_BOOK3S_64
+
+module_init(kvmppc_book3s_init_pr);
+module_exit(kvmppc_book3s_exit_pr);
+
+MODULE_LICENSE("GPL");
+#endif