23 #include <linux/slab.h>
25 #include <linux/sched.h>
27 #include <linux/export.h>
35 #include <asm/cputable.h>
36 #include <asm/cacheflush.h>
37 #include <asm/tlbflush.h>
38 #include <asm/uaccess.h>
42 #include <asm/mmu_context.h>
44 #include <asm/processor.h>
48 #include <asm/switch_to.h>
58 static void kvmppc_end_cede(
struct kvm_vcpu *vcpu);
59 static int kvmppc_hv_setup_htab_rma(
struct kvm_vcpu *vcpu);
65 local_paca->kvm_hstate.kvm_vcpu = vcpu;
66 local_paca->kvm_hstate.kvm_vcore = vc;
81 vcpu->
arch.shregs.msr = msr;
82 kvmppc_end_cede(vcpu);
95 pr_err(
"pc = %.16lx msr = %.16llx trap = %x\n",
97 for (r = 0; r < 16; ++
r)
98 pr_err(
"r%2d = %.16lx r%d = %.16lx\n",
99 r, kvmppc_get_gpr(vcpu, r),
100 r+16, kvmppc_get_gpr(vcpu, r+16));
101 pr_err(
"ctr = %.16lx lr = %.16lx\n",
103 pr_err(
"srr0 = %.16llx srr1 = %.16llx\n",
104 vcpu->
arch.shregs.srr0, vcpu->
arch.shregs.srr1);
105 pr_err(
"sprg0 = %.16llx sprg1 = %.16llx\n",
106 vcpu->
arch.shregs.sprg0, vcpu->
arch.shregs.sprg1);
107 pr_err(
"sprg2 = %.16llx sprg3 = %.16llx\n",
108 vcpu->
arch.shregs.sprg2, vcpu->
arch.shregs.sprg3);
109 pr_err(
"cr = %.8x xer = %.16lx dsisr = %.8x\n",
110 vcpu->
arch.cr, vcpu->
arch.xer, vcpu->
arch.shregs.dsisr);
111 pr_err(
"dar = %.16llx\n", vcpu->
arch.shregs.dar);
112 pr_err(
"fault dar = %.16lx dsisr = %.8x\n",
113 vcpu->
arch.fault_dar, vcpu->
arch.fault_dsisr);
114 pr_err(
"SLB (%d entries):\n", vcpu->
arch.slb_max);
115 for (r = 0; r < vcpu->
arch.slb_max; ++
r)
116 pr_err(
" ESID = %.16llx VSID = %.16llx\n",
117 vcpu->
arch.slb[r].orige, vcpu->
arch.slb[r].origv);
118 pr_err(
"lpcr = %.16lx sdr1 = %.16lx last_inst = %.8x\n",
119 vcpu->
kvm->arch.lpcr, vcpu->
kvm->arch.sdr1,
120 vcpu->
arch.last_inst);
139 static void init_vpa(
struct kvm_vcpu *vcpu,
struct lppaca *vpa)
141 vpa->shared_proc = 1;
142 vpa->yield_count = 1;
154 static int vpa_is_registered(
struct kvmppc_vpa *vpap)
161 static unsigned long do_h_register_vpa(
struct kvm_vcpu *vcpu,
163 unsigned long vcpuid,
unsigned long vpa)
166 unsigned long len, nb;
177 subfunc = (flags >> H_VPA_FUNC_SHIFT) & H_VPA_FUNC_MASK;
178 if (subfunc == H_VPA_REG_VPA || subfunc == H_VPA_REG_DTL ||
179 subfunc == H_VPA_REG_SLB) {
188 if (subfunc == H_VPA_REG_VPA)
189 len = ((
struct reg_vpa *)va)->length.hword;
191 len = ((
struct reg_vpa *)va)->length.word;
195 if (len > nb || len <
sizeof(
struct reg_vpa))
204 spin_lock(&tvcpu->
arch.vpa_update_lock);
208 if (len <
sizeof(
struct lppaca))
210 vpap = &tvcpu->
arch.vpa;
215 if (len <
sizeof(
struct dtl_entry))
217 len -= len %
sizeof(
struct dtl_entry);
221 if (!vpa_is_registered(&tvcpu->
arch.vpa))
224 vpap = &tvcpu->
arch.dtl;
231 if (!vpa_is_registered(&tvcpu->
arch.vpa))
234 vpap = &tvcpu->
arch.slb_shadow;
238 case H_VPA_DEREG_VPA:
241 if (vpa_is_registered(&tvcpu->
arch.dtl) ||
242 vpa_is_registered(&tvcpu->
arch.slb_shadow))
245 vpap = &tvcpu->
arch.vpa;
249 case H_VPA_DEREG_DTL:
250 vpap = &tvcpu->
arch.dtl;
254 case H_VPA_DEREG_SLB:
255 vpap = &tvcpu->
arch.slb_shadow;
266 spin_unlock(&tvcpu->
arch.vpa_update_lock);
273 struct kvm *kvm = vcpu->
kvm;
288 spin_unlock(&vcpu->
arch.vpa_update_lock);
293 spin_lock(&vcpu->
arch.vpa_update_lock);
302 if (va && nb < vpap->len) {
318 static void kvmppc_update_vpas(
struct kvm_vcpu *vcpu)
320 spin_lock(&vcpu->
arch.vpa_update_lock);
321 if (vcpu->
arch.vpa.update_pending) {
322 kvmppc_update_vpa(vcpu, &vcpu->
arch.vpa);
323 init_vpa(vcpu, vcpu->
arch.vpa.pinned_addr);
325 if (vcpu->
arch.dtl.update_pending) {
326 kvmppc_update_vpa(vcpu, &vcpu->
arch.dtl);
327 vcpu->
arch.dtl_ptr = vcpu->
arch.dtl.pinned_addr;
328 vcpu->
arch.dtl_index = 0;
330 if (vcpu->
arch.slb_shadow.update_pending)
331 kvmppc_update_vpa(vcpu, &vcpu->
arch.slb_shadow);
332 spin_unlock(&vcpu->
arch.vpa_update_lock);
335 static void kvmppc_create_dtl_entry(
struct kvm_vcpu *vcpu,
338 struct dtl_entry *dt;
340 unsigned long old_stolen;
342 dt = vcpu->
arch.dtl_ptr;
343 vpa = vcpu->
arch.vpa.pinned_addr;
344 old_stolen = vcpu->
arch.stolen_logged;
348 memset(dt, 0,
sizeof(
struct dtl_entry));
349 dt->dispatch_reason = 7;
350 dt->processor_id = vc->
pcpu + vcpu->
arch.ptid;
351 dt->timebase = mftb();
352 dt->enqueue_to_dispatch_time = vc->
stolen_tb - old_stolen;
353 dt->srr0 = kvmppc_get_pc(vcpu);
354 dt->srr1 = vcpu->
arch.shregs.msr;
356 if (dt == vcpu->
arch.dtl.pinned_end)
357 dt = vcpu->
arch.dtl.pinned_addr;
358 vcpu->
arch.dtl_ptr = dt;
361 vpa->dtl_idx = ++vcpu->
arch.dtl_index;
366 unsigned long req = kvmppc_get_gpr(vcpu, 3);
373 kvmppc_get_gpr(vcpu, 5),
374 kvmppc_get_gpr(vcpu, 6),
375 kvmppc_get_gpr(vcpu, 7));
380 target = kvmppc_get_gpr(vcpu, 4);
386 tvcpu->
arch.prodded = 1;
388 if (vcpu->
arch.ceded) {
389 if (waitqueue_active(&vcpu->
wq)) {
391 vcpu->
stat.halt_wakeup++;
398 ret = do_h_register_vpa(vcpu, kvmppc_get_gpr(vcpu, 4),
399 kvmppc_get_gpr(vcpu, 5),
400 kvmppc_get_gpr(vcpu, 6));
405 kvmppc_set_gpr(vcpu, 3, ret);
406 vcpu->
arch.hcall_needed = 0;
415 vcpu->
stat.sum_exits++;
419 switch (vcpu->
arch.trap) {
422 vcpu->
stat.dec_exits++;
426 vcpu->
stat.ext_intr_exits++;
441 flags = vcpu->
arch.shregs.msr & 0x1f0000ull;
451 if (vcpu->
arch.shregs.msr & MSR_PR) {
458 for (i = 0; i < 9; ++
i)
459 run->
papr_hcall.args[i] = kvmppc_get_gpr(vcpu, 4 + i);
461 vcpu->
arch.hcall_needed = 1;
474 vcpu->
arch.fault_dar, vcpu->
arch.fault_dsisr);
478 kvmppc_get_pc(vcpu), 0);
492 vcpu->
arch.trap, kvmppc_get_pc(vcpu),
493 vcpu->
arch.shregs.msr);
510 for (i = 0; i < vcpu->
arch.slb_max; i++) {
511 sregs->
u.
s.ppc64.slb[
i].slbe = vcpu->
arch.slb[
i].orige;
512 sregs->
u.
s.ppc64.slb[
i].slbv = vcpu->
arch.slb[
i].origv;
526 for (i = 0; i < vcpu->
arch.slb_nr; i++) {
528 vcpu->
arch.slb[
j].orige = sregs->
u.
s.ppc64.slb[
i].slbe;
529 vcpu->
arch.slb[
j].origv = sregs->
u.
s.ppc64.slb[
i].slbv;
533 vcpu->
arch.slb_max =
j;
563 if (!r && (hior != 0))
601 vcpu->
arch.shared = &vcpu->
arch.shregs;
602 vcpu->
arch.last_cpu = -1;
603 vcpu->
arch.mmcr[0] = MMCR0_FC;
604 vcpu->
arch.ctrl = CTRL_RUNLATCH;
636 spin_lock(&vcore->
lock);
638 spin_unlock(&vcore->
lock);
639 vcpu->
arch.vcore = vcore;
655 spin_lock(&vcpu->
arch.vpa_update_lock);
656 if (vcpu->
arch.dtl.pinned_addr)
658 if (vcpu->
arch.slb_shadow.pinned_addr)
660 if (vcpu->
arch.vpa.pinned_addr)
662 spin_unlock(&vcpu->
arch.vpa_update_lock);
667 static void kvmppc_set_timer(
struct kvm_vcpu *vcpu)
669 unsigned long dec_nsec, now;
672 if (now > vcpu->
arch.dec_expires) {
682 vcpu->
arch.timer_running = 1;
685 static void kvmppc_end_cede(
struct kvm_vcpu *vcpu)
687 vcpu->
arch.ceded = 0;
688 if (vcpu->
arch.timer_running) {
690 vcpu->
arch.timer_running = 0;
697 static void kvmppc_remove_runnable(
struct kvmppc_vcore *vc,
714 static int kvmppc_grab_hwthread(
int cpu)
716 struct paca_struct *tpaca;
722 tpaca->kvm_hstate.hwthread_req = 1;
735 if (--timeout <= 0) {
736 pr_err(
"KVM: couldn't grab cpu %d\n", cpu);
744 static void kvmppc_release_hwthread(
int cpu)
746 struct paca_struct *tpaca;
749 tpaca->kvm_hstate.hwthread_req = 0;
750 tpaca->kvm_hstate.kvm_vcpu =
NULL;
753 static void kvmppc_start_thread(
struct kvm_vcpu *vcpu)
756 struct paca_struct *tpaca;
759 if (vcpu->
arch.timer_running) {
761 vcpu->
arch.timer_running = 0;
765 tpaca->kvm_hstate.kvm_vcpu = vcpu;
766 tpaca->kvm_hstate.kvm_vcore = vc;
767 tpaca->kvm_hstate.napping = 0;
770 #if defined(CONFIG_PPC_ICP_NATIVE) && defined(CONFIG_SMP)
771 if (vcpu->
arch.ptid) {
772 kvmppc_grab_hwthread(cpu);
779 static void kvmppc_wait_for_nap(
struct kvmppc_vcore *vc)
786 if (++i >= 1000000) {
787 pr_err(
"kvmppc_wait_for_nap timeout %d %d\n",
800 static int on_primary_thread(
void)
803 int thr = cpu_thread_in_core(cpu);
819 struct kvm_vcpu *vcpu, *vcpu0, *vnext;
822 int ptid,
i, need_vpa_update;
827 if (signal_pending(vcpu->
arch.run_task))
829 need_vpa_update |= vcpu->
arch.vpa.update_pending |
830 vcpu->
arch.slb_shadow.update_pending |
831 vcpu->
arch.dtl.update_pending;
849 if (need_vpa_update) {
850 spin_unlock(&vc->
lock);
852 kvmppc_update_vpas(vcpu);
853 spin_lock(&vc->lock);
875 if (!vcpu->arch.ceded) {
878 vcpu->
arch.ptid = ptid++;
885 vcpu->
arch.ptid = ptid++;
887 vc->stolen_tb += mftb() - vc->preempt_tb;
890 kvmppc_start_thread(vcpu);
891 kvmppc_create_dtl_entry(vcpu, vc);
895 kvmppc_grab_hwthread(vc->
pcpu + i);
898 spin_unlock(&vc->
lock);
903 kvmppc_release_hwthread(vc->
pcpu + i);
905 spin_lock(&vc->
lock);
910 if (vc->nap_count < vc->n_woken)
911 kvmppc_wait_for_nap(vc);
914 spin_unlock(&vc->lock);
926 if (now < vcpu->
arch.dec_expires &&
932 ret = kvmppc_handle_exit(vcpu->arch.kvm_run, vcpu,
933 vcpu->arch.run_task);
935 vcpu->arch.ret =
ret;
938 if (vcpu->arch.ceded) {
940 kvmppc_end_cede(vcpu);
942 kvmppc_set_timer(vcpu);
946 spin_lock(&vc->lock);
949 vc->preempt_tb = mftb();
953 kvmppc_remove_runnable(vc, vcpu);
979 static void kvmppc_vcore_blocked(
struct kvmppc_vcore *vc)
987 spin_unlock(&vc->
lock);
989 if (!v->
arch.ceded || v->
arch.pending_exceptions) {
997 spin_lock(&vc->
lock);
1010 vcpu->
arch.trap = 0;
1015 vc = vcpu->
arch.vcore;
1016 spin_lock(&vc->
lock);
1017 vcpu->
arch.ceded = 0;
1019 vcpu->
arch.kvm_run = kvm_run;
1020 prev_state = vcpu->
arch.state;
1034 kvmppc_start_thread(vcpu);
1043 spin_unlock(&vc->
lock);
1045 spin_lock(&vc->
lock);
1051 n_ceded += v->
arch.ceded;
1052 if (n_ceded == vc->n_runnable)
1053 kvmppc_vcore_blocked(vc);
1055 kvmppc_run_core(vc);
1060 if (signal_pending(v->arch.run_task)) {
1061 kvmppc_remove_runnable(vc, v);
1062 v->stat.signal_exits++;
1064 v->arch.ret = -
EINTR;
1071 if (signal_pending(
current)) {
1073 vc->vcore_state == VCORE_EXITING) {
1074 spin_unlock(&vc->lock);
1076 spin_lock(&vc->lock);
1079 kvmppc_remove_runnable(vc, vcpu);
1080 vcpu->
stat.signal_exits++;
1086 spin_unlock(&vc->lock);
1087 return vcpu->
arch.ret;
1094 if (!vcpu->
arch.sane) {
1102 if (signal_pending(
current)) {
1112 if (!vcpu->
kvm->arch.rma_setup_done) {
1113 r = kvmppc_hv_setup_htab_rma(vcpu);
1119 flush_altivec_to_thread(
current);
1121 vcpu->
arch.wqp = &vcpu->
arch.vcore->wq;
1125 r = kvmppc_run_vcpu(run, vcpu);
1128 !(vcpu->
arch.shregs.msr & MSR_PR)) {
1142 static inline int lpcr_rmls(
unsigned long rma_size)
1166 static int kvm_rma_fault(
struct vm_area_struct *vma,
struct vm_fault *vmf)
1171 if (vmf->pgoff >= ri->
npages)
1172 return VM_FAULT_SIGBUS;
1180 static const struct vm_operations_struct kvm_rma_vm_ops = {
1181 .fault = kvm_rma_fault,
1186 vma->
vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
1187 vma->
vm_ops = &kvm_rma_vm_ops;
1191 static int kvm_rma_release(
struct inode *
inode,
struct file *filp)
1200 .mmap = kvm_rma_mmap,
1201 .release = kvm_rma_release,
1228 (*sps)->page_shift = def->
shift;
1229 (*sps)->slb_enc = def->
sllp;
1230 (*sps)->enc[0].page_shift = def->
shift;
1231 (*sps)->enc[0].pte_enc = def->
penc;
1240 if (mmu_has_feature(MMU_FTR_1T_SEGMENT))
1245 sps = &info->
sps[0];
1246 kvmppc_add_seg_page_size(&sps, MMU_PAGE_4K);
1247 kvmppc_add_seg_page_size(&sps, MMU_PAGE_64K);
1248 kvmppc_add_seg_page_size(&sps, MMU_PAGE_16M);
1273 n = kvm_dirty_bitmap_bytes(memslot);
1290 static unsigned long slb_pgsize_encoding(
unsigned long psize)
1292 unsigned long senc = 0;
1294 if (psize > 0x1000) {
1296 if (psize == 0x10000)
1306 unsigned long *
phys;
1309 phys = kvm->
arch.slot_phys[mem->
slot];
1310 if (!kvm->
arch.using_mmu_notifiers && !phys) {
1312 phys =
vzalloc(npages *
sizeof(
unsigned long));
1322 static void unpin_slot(
struct kvm *kvm,
int slot_id)
1324 unsigned long *physp;
1328 physp = kvm->
arch.slot_phys[slot_id];
1329 npages = kvm->
arch.slot_npages[slot_id];
1331 spin_lock(&kvm->
arch.slot_phys_lock);
1332 for (j = 0; j < npages; j++) {
1340 kvm->
arch.slot_phys[slot_id] =
NULL;
1341 spin_unlock(&kvm->
arch.slot_phys_lock);
1351 static int kvmppc_hv_setup_htab_rma(
struct kvm_vcpu *vcpu)
1354 struct kvm *kvm = vcpu->
kvm;
1359 unsigned long lpcr, senc;
1360 unsigned long psize, porder;
1361 unsigned long rma_size;
1363 unsigned long *physp;
1364 unsigned long i, npages;
1367 if (kvm->
arch.rma_setup_done)
1371 if (!kvm->
arch.hpt_virt) {
1374 pr_err(
"KVM: Couldn't alloc HPT\n");
1395 porder = __ilog2(psize);
1400 ri = vma->
vm_file->private_data;
1408 pr_err(
"KVM: CPU requires an RMO\n");
1414 if (!(psize == 0x1000 || psize == 0x10000 ||
1415 psize == 0x1000000))
1419 senc = slb_pgsize_encoding(psize);
1422 lpcr = kvm->
arch.lpcr & ~LPCR_VRMASD;
1423 lpcr |= senc << (LPCR_VRMASD_SH - 4);
1424 kvm->
arch.lpcr = lpcr;
1432 if (rma_size > memslot->
npages)
1433 rma_size = memslot->
npages;
1435 rmls = lpcr_rmls(rma_size);
1438 pr_err(
"KVM: Can't use RMA of 0x%lx bytes\n", rma_size);
1445 lpcr = kvm->
arch.lpcr;
1448 lpcr &= ~((1ul << HID4_RMLS0_SH) |
1449 (3ul << HID4_RMLS2_SH));
1450 lpcr |= ((rmls >> 2) << HID4_RMLS0_SH) |
1451 ((rmls & 3) << HID4_RMLS2_SH);
1457 lpcr &= ~(LPCR_VPM0 | LPCR_VRMA_L);
1458 lpcr |= rmls << LPCR_RMLS_SH;
1461 kvm->
arch.lpcr = lpcr;
1462 pr_info(
"KVM: Using RMO at %lx size %lx (LPCR = %lx)\n",
1467 porder = __ilog2(npages);
1468 physp = kvm->
arch.slot_phys[memslot->
id];
1469 spin_lock(&kvm->
arch.slot_phys_lock);
1470 for (i = 0; i < npages; ++
i)
1472 spin_unlock(&kvm->
arch.slot_phys_lock);
1477 kvm->
arch.rma_setup_done = 1;
1490 unsigned long lpcr, lpid;
1497 kvm->
arch.lpid = lpid;
1499 INIT_LIST_HEAD(&kvm->
arch.spapr_tce_tables);
1507 kvm->
arch.host_lpid = 0;
1508 kvm->
arch.host_lpcr = lpcr =
mfspr(SPRN_HID4);
1509 lpcr &= ~((3 << HID4_LPID1_SH) | (0xful << HID4_LPID5_SH));
1510 lpcr |= ((lpid >> 4) << HID4_LPID1_SH) |
1511 ((lpid & 0xf) << HID4_LPID5_SH);
1515 kvm->
arch.host_lpcr = lpcr =
mfspr(SPRN_LPCR);
1516 lpcr &= LPCR_PECE | LPCR_LPES;
1517 lpcr |= (4
UL << LPCR_DPFD_SH) | LPCR_HDICE |
1518 LPCR_VPM0 | LPCR_VPM1;
1522 kvm->
arch.lpcr = lpcr;
1533 if (!kvm->
arch.using_mmu_notifiers)
1537 if (kvm->
arch.rma) {
1543 WARN_ON(!list_empty(&kvm->
arch.spapr_tce_tables));
1553 unsigned int inst,
int *advance)
1568 static int kvmppc_book3s_hv_init(
void)
1582 static void kvmppc_book3s_hv_exit(
void)