27 #define pt_element_t u64
28 #define guest_walker guest_walker64
29 #define FNAME(name) paging##64_##name
30 #define PT_BASE_ADDR_MASK PT64_BASE_ADDR_MASK
31 #define PT_LVL_ADDR_MASK(lvl) PT64_LVL_ADDR_MASK(lvl)
32 #define PT_LVL_OFFSET_MASK(lvl) PT64_LVL_OFFSET_MASK(lvl)
33 #define PT_INDEX(addr, level) PT64_INDEX(addr, level)
34 #define PT_LEVEL_BITS PT64_LEVEL_BITS
36 #define PT_MAX_FULL_LEVELS 4
37 #define CMPXCHG cmpxchg
39 #define CMPXCHG cmpxchg64
40 #define PT_MAX_FULL_LEVELS 2
43 #define pt_element_t u32
44 #define guest_walker guest_walker32
45 #define FNAME(name) paging##32_##name
46 #define PT_BASE_ADDR_MASK PT32_BASE_ADDR_MASK
47 #define PT_LVL_ADDR_MASK(lvl) PT32_LVL_ADDR_MASK(lvl)
48 #define PT_LVL_OFFSET_MASK(lvl) PT32_LVL_OFFSET_MASK(lvl)
49 #define PT_INDEX(addr, level) PT32_INDEX(addr, level)
50 #define PT_LEVEL_BITS PT32_LEVEL_BITS
51 #define PT_MAX_FULL_LEVELS 2
52 #define CMPXCHG cmpxchg
54 #error Invalid PTTYPE value
57 #define gpte_to_gfn_lvl FNAME(gpte_to_gfn_lvl)
58 #define gpte_to_gfn(pte) gpte_to_gfn_lvl((pte), PT_PAGE_TABLE_LEVEL)
68 pt_element_t
ptes[PT_MAX_FULL_LEVELS];
80 return (gpte & PT_LVL_ADDR_MASK(lvl)) >>
PAGE_SHIFT;
85 pt_element_t orig_pte, pt_element_t new_pte)
98 ret = CMPXCHG(&table[index], orig_pte, new_pte);
103 return (ret != orig_pte);
106 static int FNAME(update_accessed_dirty_bits)(
struct kvm_vcpu *vcpu,
112 pt_element_t
pte, orig_pte;
117 for (level = walker->
max_level; level >= walker->
level; --level) {
118 pte = orig_pte = walker->
ptes[level - 1];
119 table_gfn = walker->
table_gfn[level - 1];
120 ptep_user = walker->
ptep_user[level - 1];
123 trace_kvm_mmu_set_accessed_bit(table_gfn, index,
sizeof(pte));
126 if (level == walker->
level && write_fault && !is_dirty_gpte(pte)) {
127 trace_kvm_mmu_set_dirty_bit(table_gfn, index,
sizeof(pte));
154 unsigned index, pt_access, pte_access, accessed_dirty, shift;
164 trace_kvm_mmu_pagetable_walk(addr,
access);
171 pte = mmu->
get_pdptr(vcpu, (addr >> 30) & 3);
172 trace_kvm_mmu_paging_element(pte, walker->
level);
173 if (!is_present_gpte(pte))
179 ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) ||
183 pt_access = pte_access =
ACC_ALL;
188 unsigned long host_addr;
190 pt_access &= pte_access;
193 index = PT_INDEX(addr, walker->
level);
196 offset = index *
sizeof(pt_element_t);
197 pte_gpa = gfn_to_gpa(table_gfn) +
offset;
202 PFERR_USER_MASK|PFERR_WRITE_MASK);
205 real_gfn = gpa_to_gfn(real_gfn);
208 if (
unlikely(kvm_is_error_hva(host_addr)))
211 ptep_user = (pt_element_t
__user *)((
void *)host_addr +
offset);
216 trace_kvm_mmu_paging_element(pte, walker->
level);
218 if (
unlikely(!is_present_gpte(pte)))
227 accessed_dirty &=
pte;
228 pte_access = pt_access & gpte_access(vcpu, pte);
231 }
while (!is_last_gpte(mmu, walker->
level, pte));
242 gfn += pse36_gfn_delta(pte);
251 protect_clean_gpte(&pte_access, pte);
259 shift = write_fault >>
ilog2(PFERR_WRITE_MASK);
261 accessed_dirty &= pte >> shift;
264 ret =
FNAME(update_accessed_dirty_bits)(vcpu, mmu, walker, write_fault);
271 walker->pt_access = pt_access;
272 walker->pte_access = pte_access;
273 pgprintk(
"%s: pte %llx pte_access %x pt_access %x\n",
274 __func__, (
u64)pte, pte_access, pt_access);
278 errcode |= write_fault | user_fault;
279 if (fetch_fault && (mmu->nx ||
284 walker->fault.error_code_valid =
true;
285 walker->fault.error_code =
errcode;
286 walker->fault.address =
addr;
287 walker->fault.nested_page_fault = mmu != vcpu->
arch.walk_mmu;
289 trace_kvm_mmu_walker_error(walker->fault.error_code);
296 return FNAME(walk_addr_generic)(walker, vcpu, &vcpu->
arch.mmu,
addr,
304 return FNAME(walk_addr_generic)(walker, vcpu, &vcpu->
arch.nested_mmu,
308 static bool FNAME(prefetch_invalid_gpte)(
struct kvm_vcpu *vcpu,
315 if (!is_present_gpte(gpte))
318 if (!(gpte & PT_ACCESSED_MASK))
324 drop_spte(vcpu->
kvm, spte);
329 u64 *spte,
const void *
pte)
335 gpte = *(
const pt_element_t *)pte;
336 if (
FNAME(prefetch_invalid_gpte)(vcpu,
sp, spte, gpte))
339 pgprintk(
"%s: gpte %llx spte %p\n", __func__, (
u64)gpte, spte);
340 pte_access = sp->role.access & gpte_access(vcpu, gpte);
341 protect_clean_gpte(&pte_access, gpte);
343 if (mmu_invalid_pfn(pfn))
350 mmu_set_spte(vcpu, spte, sp->role.access, pte_access, 0, 0,
358 pt_element_t curr_pte;
365 base_gpa = pte_gpa & ~mask;
366 index = (pte_gpa - base_gpa) /
sizeof(pt_element_t);
373 &curr_pte,
sizeof(curr_pte));
375 return r || curr_pte != gw->
ptes[level - 1];
386 sp = page_header(
__pa(sptep));
392 return __direct_pte_prefetch(vcpu, sp, sptep);
406 if (is_shadow_present_pte(*spte))
411 if (
FNAME(prefetch_invalid_gpte)(vcpu,
sp, spte, gpte))
414 pte_access = sp->role.access & gpte_access(vcpu, gpte);
415 protect_clean_gpte(&pte_access, gpte);
417 pfn = pte_prefetch_gfn_to_pfn(vcpu, gfn,
419 if (mmu_invalid_pfn(pfn))
422 mmu_set_spte(vcpu, spte, sp->role.access, pte_access, 0, 0,
433 int user_fault,
int write_fault,
int hlevel,
440 unsigned direct_access;
443 if (!is_present_gpte(gw->
ptes[gw->
level - 1]))
448 top_level = vcpu->
arch.mmu.root_level;
457 if (
FNAME(gpte_changed)(vcpu, gw, top_level))
458 goto out_gpte_changed;
460 for (shadow_walk_init(&it, vcpu, addr);
461 shadow_walk_okay(&it) && it.level > gw->level;
462 shadow_walk_next(&it)) {
465 clear_sp_write_flooding_count(it.sptep);
466 drop_large_spte(vcpu, it.sptep);
469 if (!is_shadow_present_pte(*it.sptep)) {
470 table_gfn = gw->table_gfn[it.level - 2];
471 sp = kvm_mmu_get_page(vcpu, table_gfn, addr, it.level-1,
472 false, access, it.sptep);
479 if (
FNAME(gpte_changed)(vcpu, gw, it.level - 1))
480 goto out_gpte_changed;
483 link_shadow_page(it.sptep, sp);
487 shadow_walk_okay(&it) && it.level > hlevel;
488 shadow_walk_next(&it)) {
491 clear_sp_write_flooding_count(it.sptep);
492 validate_direct_spte(vcpu, it.sptep, direct_access);
494 drop_large_spte(vcpu, it.sptep);
496 if (is_shadow_present_pte(*it.sptep))
501 sp = kvm_mmu_get_page(vcpu, direct_gfn, addr, it.level-1,
502 true, direct_access, it.sptep);
503 link_shadow_page(it.sptep, sp);
506 clear_sp_write_flooding_count(it.sptep);
507 mmu_set_spte(vcpu, it.sptep, access, gw->pte_access,
508 user_fault, write_fault, emulate, it.level,
509 gw->gfn, pfn, prefault, map_writable);
510 FNAME(pte_prefetch)(vcpu, gw, it.sptep);
516 kvm_mmu_put_page(sp, it.sptep);
547 unsigned long mmu_seq;
550 pgprintk(
"%s: addr %lx err %x\n", __func__, addr, error_code);
553 return handle_mmio_page_fault(vcpu, addr, error_code,
554 mmu_is_nested(vcpu));
556 r = mmu_topup_memory_caches(vcpu);
569 pgprintk(
"%s: guest page fault\n", __func__);
571 inject_page_fault(vcpu, &walker.
fault);
577 force_pt_level = mapping_level_dirty_bitmap(vcpu, walker.
gfn);
580 if (!force_pt_level) {
581 level =
min(walker.
level, mapping_level(vcpu, walker.
gfn));
585 mmu_seq = vcpu->kvm->mmu_notifier_seq;
588 if (try_async_pf(vcpu, prefault, walker.
gfn, addr, &pfn, write_fault,
592 if (handle_abnormal_pfn(vcpu, mmu_is_nested(vcpu) ? 0 : addr,
596 spin_lock(&vcpu->kvm->mmu_lock);
597 if (mmu_notifier_retry(vcpu, mmu_seq))
601 kvm_mmu_free_some_pages(vcpu);
603 transparent_hugepage_adjust(vcpu, &walker.
gfn, &pfn, &level);
604 sptep =
FNAME(fetch)(vcpu,
addr, &walker, user_fault, write_fault,
607 pgprintk(
"%s: shadow pte %p %llx emulate %d\n", __func__,
608 sptep, *sptep, emulate);
610 ++vcpu->stat.pf_fixed;
612 spin_unlock(&vcpu->kvm->mmu_lock);
617 spin_unlock(&vcpu->kvm->mmu_lock);
631 return gfn_to_gpa(sp->
gfn) + offset *
sizeof(pt_element_t);
641 vcpu_clear_mmio_info(vcpu, gva);
647 mmu_topup_memory_caches(vcpu);
649 spin_lock(&vcpu->
kvm->mmu_lock);
651 level = iterator.level;
652 sptep = iterator.sptep;
654 sp = page_header(
__pa(sptep));
655 if (is_last_spte(*sptep, level)) {
662 pte_gpa =
FNAME(get_level1_sp_gpa)(
sp);
663 pte_gpa += (sptep - sp->
spt) *
sizeof(pt_element_t);
665 if (mmu_page_zap_pte(vcpu->
kvm, sp, sptep))
668 if (!rmap_can_add(vcpu))
672 sizeof(pt_element_t)))
675 FNAME(update_pte)(vcpu,
sp, sptep, &gpte);
681 spin_unlock(&vcpu->
kvm->mmu_lock);
694 gpa = gfn_to_gpa(walker.
gfn);
713 gpa = gfn_to_gpa(walker.
gfn);
736 int i, nr_present = 0;
743 first_pte_gpa =
FNAME(get_level1_sp_gpa)(
sp);
754 pte_gpa = first_pte_gpa + i *
sizeof(pt_element_t);
757 sizeof(pt_element_t)))
760 if (
FNAME(prefetch_invalid_gpte)(vcpu,
sp, &sp->spt[
i], gpte)) {
761 vcpu->
kvm->tlbs_dirty++;
766 pte_access = sp->role.access;
767 pte_access &= gpte_access(vcpu, gpte);
768 protect_clean_gpte(&pte_access, gpte);
770 if (sync_mmio_spte(&sp->spt[i], gfn, pte_access, &nr_present))
773 if (gfn != sp->gfns[i]) {
774 drop_spte(vcpu->
kvm, &sp->spt[i]);
775 vcpu->
kvm->tlbs_dirty++;
783 set_spte(vcpu, &sp->spt[i], pte_access, 0, 0,
785 spte_to_pfn(sp->spt[i]),
true,
false,
795 #undef PT_BASE_ADDR_MASK
797 #undef PT_LVL_ADDR_MASK
798 #undef PT_LVL_OFFSET_MASK
800 #undef PT_MAX_FULL_LEVELS
802 #undef gpte_to_gfn_lvl