41 #include <linux/sched.h>
46 #include <linux/module.h>
54 #include <asm/pgtable.h>
55 #include <asm/tlbflush.h>
56 #include <asm/fixmap.h>
57 #include <asm/mmu_context.h>
58 #include <asm/setup.h>
59 #include <asm/paravirt.h>
61 #include <asm/linkage.h>
67 #include <asm/xen/hypercall.h>
68 #include <asm/xen/hypervisor.h>
94 #define LEVEL1_IDENT_ENTRIES (PTRS_PER_PTE * 4)
95 static RESERVE_BRK_ARRAY(
pte_t, level1_ident_pgt, LEVEL1_IDENT_ENTRIES);
124 #define USER_LIMIT ((STACK_TOP_MAX + PGDIR_SIZE - 1) & PGDIR_MASK)
189 static bool xen_page_pinned(
void *
ptr)
193 return PagePinned(page);
201 trace_xen_mmu_set_domain_pte(ptep, pteval, domid);
203 mcs = xen_mc_entry(
sizeof(*u));
208 u->
val = pte_val_ma(pteval);
210 MULTI_mmu_update(mcs.
mc, mcs.
args, 1,
NULL, domid);
234 static void xen_extend_mmuext_op(
const struct mmuext_op *
op)
263 xen_extend_mmu_update(&u);
272 trace_xen_mmu_set_pmd(ptr, val);
276 if (!xen_page_pinned(ptr)) {
281 xen_set_pmd_hyper(ptr, val);
293 static bool xen_batched_set_pte(
pte_t *ptep,
pte_t pteval)
303 u.val = pte_val_ma(pteval);
304 xen_extend_mmu_update(&u);
311 static inline void __xen_set_pte(
pte_t *ptep,
pte_t pteval)
313 if (!xen_batched_set_pte(ptep, pteval)) {
324 u.val = pte_val_ma(pteval);
329 static void xen_set_pte(
pte_t *ptep,
pte_t pteval)
331 trace_xen_mmu_set_pte(ptep, pteval);
332 __xen_set_pte(ptep, pteval);
335 static void xen_set_pte_at(
struct mm_struct *mm,
unsigned long addr,
338 trace_xen_mmu_set_pte_at(mm, addr, ptep, pteval);
339 __xen_set_pte(ptep, pteval);
346 trace_xen_mmu_ptep_modify_prot_start(mm, addr, ptep, *ptep);
355 trace_xen_mmu_ptep_modify_prot_commit(mm, addr, ptep, pte);
359 u.
val = pte_val_ma(pte);
360 xen_extend_mmu_update(&u);
374 val = flags & ~_PAGE_PRESENT;
384 if (val & _PAGE_PRESENT) {
410 mfn &= ~IDENTITY_FRAME_BIT;
422 if (val & _PAGE_PRESENT) {
447 return pte_mfn_to_pfn(pteval);
453 return pte_mfn_to_pfn(pgd.
pgd);
479 WARN_ON(pat != 0x0007010600070106ull);
507 pte = iomap_pte(pte);
510 pte = pte_pfn_to_mfn(pte);
513 return native_make_pte(pte);
519 pgd = pte_pfn_to_mfn(pgd);
520 return native_make_pgd(pgd);
526 return pte_mfn_to_pfn(pmd.
pmd);
530 static void xen_set_pud_hyper(
pud_t *ptr,
pud_t val)
541 xen_extend_mmu_update(&u);
548 static void xen_set_pud(
pud_t *ptr,
pud_t val)
550 trace_xen_mmu_set_pud(ptr, val);
554 if (!xen_page_pinned(ptr)) {
559 xen_set_pud_hyper(ptr, val);
562 #ifdef CONFIG_X86_PAE
563 static void xen_set_pte_atomic(
pte_t *ptep,
pte_t pte)
565 trace_xen_mmu_set_pte_atomic(ptep, pte);
566 set_64bit((
u64 *)ptep, native_pte_val(pte));
569 static void xen_pte_clear(
struct mm_struct *mm,
unsigned long addr,
pte_t *ptep)
571 trace_xen_mmu_pte_clear(mm, addr, ptep);
572 if (!xen_batched_set_pte(ptep, native_make_pte(0)))
573 native_pte_clear(mm, addr, ptep);
576 static void xen_pmd_clear(
pmd_t *pmdp)
578 trace_xen_mmu_pmd_clear(pmdp);
585 pmd = pte_pfn_to_mfn(pmd);
586 return native_make_pmd(pmd);
590 #if PAGETABLE_LEVELS == 4
593 return pte_mfn_to_pfn(pud.
pud);
599 pud = pte_pfn_to_mfn(pud);
601 return native_make_pud(pud);
621 static void __xen_set_pgd_hyper(
pgd_t *ptr,
pgd_t val)
627 xen_extend_mmu_update(&u);
643 __xen_set_pgd_hyper(ptr, val);
650 static void xen_set_pgd(
pgd_t *ptr,
pgd_t val)
652 pgd_t *user_ptr = xen_get_user_pgd(ptr);
654 trace_xen_mmu_set_pgd(ptr, user_ptr, val);
658 if (!xen_page_pinned(ptr)) {
661 WARN_ON(xen_page_pinned(user_ptr));
671 __xen_set_pgd_hyper(ptr, val);
673 __xen_set_pgd_hyper(user_ptr, val);
700 unsigned hole_low, hole_high;
701 unsigned pgdidx_limit, pudidx_limit, pmdidx_limit;
702 unsigned pgdidx, pudidx, pmdidx;
731 for (pgdidx = 0; pgdidx <= pgdidx_limit; pgdidx++) {
734 if (pgdidx >= hole_low && pgdidx < hole_high)
748 if (pgdidx == pgdidx_limit &&
749 pudidx > pudidx_limit)
763 if (pgdidx == pgdidx_limit &&
764 pudidx == pudidx_limit &&
765 pmdidx > pmdidx_limit)
785 static int xen_pgd_walk(
struct mm_struct *mm,
790 return __xen_pgd_walk(mm, mm->
pgd,
func, limit);
799 #if USE_SPLIT_PTLOCKS
800 ptl = __pte_lockptr(page);
807 static void xen_pte_unlock(
void *
v)
813 static void xen_do_pin(
unsigned level,
unsigned long pfn)
820 xen_extend_mmuext_op(&op);
826 unsigned pgfl = TestSetPagePinned(page);
831 else if (PageHighMem(page))
836 void *pt = lowmem_page_address(page);
865 ptl = xen_pte_lock(page, mm);
867 MULTI_update_va_mapping(mcs.
mc, (
unsigned long)pt,
888 trace_xen_mmu_pgd_pin(mm, pgd);
892 if (__xen_pgd_walk(mm, pgd, xen_pin_page,
USER_LIMIT)) {
903 pgd_t *user_pgd = xen_get_user_pgd(pgd);
914 #ifdef CONFIG_X86_PAE
924 static void xen_pgd_pin(
struct mm_struct *mm)
926 __xen_pgd_pin(mm, mm->
pgd);
946 if (!PagePinned(page)) {
948 SetPageSavePinned(page);
960 static int __init xen_mark_pinned(
struct mm_struct *mm,
struct page *page,
967 static void __init xen_mark_init_mm_pinned(
void)
972 static int xen_unpin_page(
struct mm_struct *mm,
struct page *page,
975 unsigned pgfl = TestClearPagePinned(page);
977 if (pgfl && !PageHighMem(page)) {
978 void *pt = lowmem_page_address(page);
991 ptl = xen_pte_lock(page, mm);
999 MULTI_update_va_mapping(mcs.
mc, (
unsigned long)pt,
1015 trace_xen_mmu_pgd_unpin(mm, pgd);
1021 #ifdef CONFIG_X86_64
1023 pgd_t *user_pgd = xen_get_user_pgd(pgd);
1033 #ifdef CONFIG_X86_PAE
1039 __xen_pgd_walk(mm, pgd, xen_unpin_page,
USER_LIMIT);
1044 static void xen_pgd_unpin(
struct mm_struct *mm)
1046 __xen_pgd_unpin(mm, mm->
pgd);
1060 if (PageSavePinned(page)) {
1061 BUG_ON(!PagePinned(page));
1063 ClearPageSavePinned(page);
1088 static void drop_other_mm_ref(
void *
info)
1095 if (active_mm == mm &&
this_cpu_read(cpu_tlbstate.state) != TLBSTATE_OK)
1104 static void xen_drop_mm_ref(
struct mm_struct *mm)
1109 if (
current->active_mm == mm) {
1126 cpumask_copy(mask, mm_cpumask(mm));
1135 cpumask_set_cpu(cpu, mask);
1138 if (!cpumask_empty(mask))
1140 free_cpumask_var(mask);
1143 static void xen_drop_mm_ref(
struct mm_struct *mm)
1164 static void xen_exit_mmap(
struct mm_struct *mm)
1167 xen_drop_mm_ref(mm);
1173 if (xen_page_pinned(mm->
pgd))
1179 static void xen_post_allocator_init(
void);
1195 #ifdef CONFIG_X86_64
1196 static void __init xen_cleanhighmap(
unsigned long vaddr,
1197 unsigned long vaddr_end)
1208 if (vaddr < (
unsigned long)
_text || vaddr > kernel_end)
1216 static void __init xen_pagetable_init(
void)
1218 #ifdef CONFIG_X86_64
1224 #ifdef CONFIG_X86_64
1226 unsigned long new_mfn_list;
1244 xen_cleanhighmap(addr, addr + size);
1264 xen_cleanhighmap(addr, addr + size);
1274 xen_post_allocator_init();
1276 static void xen_write_cr2(
unsigned long cr2)
1281 static unsigned long xen_read_cr2(
void)
1296 trace_xen_mmu_flush_tlb_all(0);
1300 mcs = xen_mc_entry(
sizeof(*op));
1310 static void xen_flush_tlb(
void)
1315 trace_xen_mmu_flush_tlb(0);
1319 mcs = xen_mc_entry(
sizeof(*op));
1330 static void xen_flush_tlb_single(
unsigned long addr)
1335 trace_xen_mmu_flush_tlb_single(addr);
1339 mcs = xen_mc_entry(
sizeof(*op));
1350 static void xen_flush_tlb_others(
const struct cpumask *
cpus,
1351 struct mm_struct *mm,
unsigned long start,
1364 trace_xen_mmu_flush_tlb_others(cpus, mm, start, end);
1366 if (cpumask_empty(cpus))
1369 mcs = xen_mc_entry(
sizeof(*
args));
1388 static unsigned long xen_read_cr3(
void)
1393 static void set_current_cr3(
void *
v)
1398 static void __xen_write_cr3(
bool kernel,
unsigned long cr3)
1403 trace_xen_mmu_write_cr3(kernel, cr3);
1415 xen_extend_mmuext_op(&op);
1426 static void xen_write_cr3(
unsigned long cr3)
1436 __xen_write_cr3(
true, cr3);
1438 #ifdef CONFIG_X86_64
1440 pgd_t *user_pgd = xen_get_user_pgd(
__va(cr3));
1442 __xen_write_cr3(
false,
__pa(user_pgd));
1444 __xen_write_cr3(
false, 0);
1451 static int xen_pgd_alloc(
struct mm_struct *mm)
1458 #ifdef CONFIG_X86_64
1470 if (user_pgd !=
NULL) {
1485 #ifdef CONFIG_X86_64
1486 pgd_t *user_pgd = xen_get_user_pgd(pgd);
1493 #ifdef CONFIG_X86_32
1497 if (pte_val_ma(*ptep) & _PAGE_PRESENT)
1506 unsigned long pfn =
pte_pfn(pte);
1540 pte = mask_rw_pte(ptep, pte);
1544 native_set_pte(ptep, pte);
1547 static void pin_pagetable_pfn(
unsigned cmd,
unsigned long pfn)
1558 static void __init xen_alloc_pte_init(
struct mm_struct *mm,
unsigned long pfn)
1560 #ifdef CONFIG_FLATMEM
1568 static void __init xen_alloc_pmd_init(
struct mm_struct *mm,
unsigned long pfn)
1570 #ifdef CONFIG_FLATMEM
1578 static void __init xen_release_pte_init(
unsigned long pfn)
1584 static void __init xen_release_pmd_init(
unsigned long pfn)
1589 static inline void __pin_pagetable_pfn(
unsigned cmd,
unsigned long pfn)
1602 static inline void __set_pfn_prot(
unsigned long pfn,
pgprot_t prot)
1608 MULTI_update_va_mapping(mcs.
mc, (
unsigned long)addr,
1614 static inline void xen_alloc_ptpage(
struct mm_struct *mm,
unsigned long pfn,
1619 trace_xen_mmu_alloc_ptpage(mm, pfn, level, pinned);
1624 SetPagePinned(page);
1626 if (!PageHighMem(page)) {
1643 static void xen_alloc_pte(
struct mm_struct *mm,
unsigned long pfn)
1645 xen_alloc_ptpage(mm, pfn,
PT_PTE);
1648 static void xen_alloc_pmd(
struct mm_struct *mm,
unsigned long pfn)
1650 xen_alloc_ptpage(mm, pfn,
PT_PMD);
1654 static inline void xen_release_ptpage(
unsigned long pfn,
unsigned level)
1657 bool pinned = PagePinned(page);
1659 trace_xen_mmu_release_ptpage(pfn, level, pinned);
1662 if (!PageHighMem(page)) {
1668 __set_pfn_prot(pfn, PAGE_KERNEL);
1672 ClearPagePinned(page);
1676 static void xen_release_pte(
unsigned long pfn)
1678 xen_release_ptpage(pfn,
PT_PTE);
1681 static void xen_release_pmd(
unsigned long pfn)
1683 xen_release_ptpage(pfn,
PT_PMD);
1686 #if PAGETABLE_LEVELS == 4
1687 static void xen_alloc_pud(
struct mm_struct *mm,
unsigned long pfn)
1689 xen_alloc_ptpage(mm, pfn,
PT_PUD);
1692 static void xen_release_pud(
unsigned long pfn)
1694 xen_release_ptpage(pfn,
PT_PUD);
1700 #ifdef CONFIG_X86_32
1717 #ifdef CONFIG_X86_64
1738 return __ka(m2p(maddr));
1742 static void set_page_prot(
void *addr,
pgprot_t prot)
1750 #ifdef CONFIG_X86_32
1753 unsigned pmdidx, pteidx;
1762 for (pmdidx = 0; pmdidx < PTRS_PER_PMD && pfn <
max_pfn; pmdidx++) {
1767 pte_page = m2v(pmd[pmdidx].pmd);
1770 if (ident_pte == LEVEL1_IDENT_ENTRIES)
1773 pte_page = &level1_ident_pgt[ident_pte];
1780 for (pteidx = 0; pteidx <
PTRS_PER_PTE; pteidx++, pfn++) {
1783 #ifdef CONFIG_X86_32
1792 pte_page[pteidx] =
pte;
1796 for (pteidx = 0; pteidx < ident_pte; pteidx +=
PTRS_PER_PTE)
1812 #ifdef CONFIG_X86_32
1818 #ifdef CONFIG_X86_64
1819 static void convert_pfn_mfn(
void *v)
1827 pte[i] = xen_make_pte(pte[i].pte);
1829 static void __init check_pt_base(
unsigned long *pt_base,
unsigned long *pt_end,
1833 set_page_prot((
void *)addr, PAGE_KERNEL);
1838 set_page_prot((
void *)addr, PAGE_KERNEL);
1858 unsigned long addr[3];
1859 unsigned long pt_base, pt_end;
1877 convert_pfn_mfn(init_level4_pgt);
1880 convert_pfn_mfn(level3_ident_pgt);
1883 convert_pfn_mfn(level3_kernel_pgt);
1889 addr[0] = (
unsigned long)pgd;
1890 addr[1] = (
unsigned long)l3;
1891 addr[2] = (
unsigned long)l2;
1931 __xen_write_cr3(
true,
__pa(init_level4_pgt));
1941 check_pt_base(&pt_base, &pt_end, addr[i]);
1949 static RESERVE_BRK_ARRAY(
pmd_t, initial_kernel_pmd, PTRS_PER_PMD);
1950 static RESERVE_BRK_ARRAY(
pmd_t, swapper_kernel_pmd, PTRS_PER_PMD);
1952 static void __init xen_write_cr3_init(
unsigned long cr3)
1956 BUG_ON(read_cr3() !=
__pa(initial_page_table));
1969 swapper_kernel_pmd =
1971 copy_page(swapper_kernel_pmd, initial_kernel_pmd);
1973 __pgd(
__pa(swapper_kernel_pmd) | _PAGE_PRESENT);
1982 set_page_prot(initial_page_table, PAGE_KERNEL);
1983 set_page_prot(initial_kernel_pmd, PAGE_KERNEL);
1992 initial_kernel_pmd =
2000 copy_page(initial_kernel_pmd, kernel_pmd);
2002 xen_map_identity_early(initial_kernel_pmd, max_pfn);
2006 __pgd(
__pa(initial_kernel_pmd) | _PAGE_PRESENT);
2033 #ifdef CONFIG_X86_F00F_BUG
2036 #ifdef CONFIG_X86_32
2039 # ifdef CONFIG_HIGHMEM
2052 #ifdef CONFIG_X86_LOCAL_APIC
2058 #ifdef CONFIG_X86_IO_APIC
2059 case FIX_IO_APIC_BASE_0 ... FIX_IO_APIC_BASE_END:
2068 case FIX_PARAVIRT_BOOTMAP:
2082 #ifdef CONFIG_X86_64
2093 static void __init xen_post_allocator_init(
void)
2098 #if PAGETABLE_LEVELS == 4
2108 #if PAGETABLE_LEVELS == 4
2113 #ifdef CONFIG_X86_64
2116 xen_mark_init_mm_pinned();
2119 static void xen_leave_lazy_mmu(
void)
2128 .read_cr2 = xen_read_cr2,
2129 .write_cr2 = xen_write_cr2,
2131 .read_cr3 = xen_read_cr3,
2132 #ifdef CONFIG_X86_32
2133 .write_cr3 = xen_write_cr3_init,
2135 .write_cr3 = xen_write_cr3,
2138 .flush_tlb_user = xen_flush_tlb,
2139 .flush_tlb_kernel = xen_flush_tlb,
2140 .flush_tlb_single = xen_flush_tlb_single,
2141 .flush_tlb_others = xen_flush_tlb_others,
2146 .pgd_alloc = xen_pgd_alloc,
2147 .pgd_free = xen_pgd_free,
2149 .alloc_pte = xen_alloc_pte_init,
2150 .release_pte = xen_release_pte_init,
2151 .alloc_pmd = xen_alloc_pmd_init,
2152 .release_pmd = xen_release_pmd_init,
2154 .set_pte = xen_set_pte_init,
2155 .set_pte_at = xen_set_pte_at,
2156 .set_pmd = xen_set_pmd_hyper,
2158 .ptep_modify_prot_start = __ptep_modify_prot_start,
2159 .ptep_modify_prot_commit = __ptep_modify_prot_commit,
2161 .pte_val = PV_CALLEE_SAVE(xen_pte_val),
2162 .pgd_val = PV_CALLEE_SAVE(xen_pgd_val),
2164 .make_pte = PV_CALLEE_SAVE(xen_make_pte),
2165 .make_pgd = PV_CALLEE_SAVE(xen_make_pgd),
2167 #ifdef CONFIG_X86_PAE
2168 .set_pte_atomic = xen_set_pte_atomic,
2169 .pte_clear = xen_pte_clear,
2170 .pmd_clear = xen_pmd_clear,
2172 .set_pud = xen_set_pud_hyper,
2174 .make_pmd = PV_CALLEE_SAVE(xen_make_pmd),
2175 .pmd_val = PV_CALLEE_SAVE(xen_pmd_val),
2177 #if PAGETABLE_LEVELS == 4
2178 .pud_val = PV_CALLEE_SAVE(xen_pud_val),
2179 .make_pud = PV_CALLEE_SAVE(xen_make_pud),
2180 .set_pgd = xen_set_pgd_hyper,
2182 .alloc_pud = xen_alloc_pmd_init,
2183 .release_pud = xen_release_pmd_init,
2186 .activate_mm = xen_activate_mm,
2187 .dup_mmap = xen_dup_mmap,
2188 .exit_mmap = xen_exit_mmap,
2192 .leave = xen_leave_lazy_mmu,
2195 .set_fixmap = xen_set_fixmap,
2200 x86_init.mapping.pagetable_reserve = xen_mapping_pagetable_reserve;
2201 x86_init.paging.pagetable_init = xen_pagetable_init;
2208 #define MAX_CONTIG_ORDER 9
2211 #define VOID_PTE (mfn_pte(0, __pgprot(0)))
2212 static void xen_zap_pfn_range(
unsigned long vaddr,
unsigned int order,
2213 unsigned long *in_frames,
2214 unsigned long *out_frames)
2226 MULTI_update_va_mapping(mcs.
mc, vaddr,
VOID_PTE, 0);
2240 static void xen_remap_exchanged_ptes(
unsigned long vaddr,
int order,
2241 unsigned long *mfns,
2242 unsigned long first_mfn)
2249 limit = 1u <<
order;
2258 mfn = first_mfn +
i;
2260 if (i < (limit - 1))
2269 MULTI_update_va_mapping(mcs.
mc, vaddr,
2270 mfn_pte(mfn, PAGE_KERNEL), flags);
2286 static int xen_exchange_memory(
unsigned long extents_in,
unsigned int order_in,
2287 unsigned long *pfns_in,
2288 unsigned long extents_out,
2289 unsigned int order_out,
2290 unsigned long *mfns_out,
2291 unsigned int address_bits)
2298 .nr_extents = extents_in,
2299 .extent_order = order_in,
2300 .extent_start = pfns_in,
2304 .nr_extents = extents_out,
2305 .extent_order = order_out,
2306 .extent_start = mfns_out,
2307 .address_bits = address_bits,
2312 BUG_ON(extents_in << order_in != extents_out << order_out);
2318 BUG_ON(success && (rc != 0));
2324 unsigned int address_bits)
2326 unsigned long *in_frames = discontig_frames, out_frame;
2327 unsigned long flags;
2347 xen_zap_pfn_range(vstart, order, in_frames,
NULL);
2351 success = xen_exchange_memory(1
UL << order, 0, in_frames,
2352 1, order, &out_frame,
2357 xen_remap_exchanged_ptes(vstart, order,
NULL, out_frame);
2359 xen_remap_exchanged_ptes(vstart, order, in_frames, 0);
2363 return success ? 0 : -
ENOMEM;
2369 unsigned long *out_frames = discontig_frames, in_frame;
2370 unsigned long flags;
2387 xen_zap_pfn_range(vstart, order,
NULL, out_frames);
2390 success = xen_exchange_memory(1, order, &in_frame, 1
UL << order,
2395 xen_remap_exchanged_ptes(vstart, order, out_frames, 0);
2397 xen_remap_exchanged_ptes(vstart, order,
NULL, in_frame);
2403 #ifdef CONFIG_XEN_PVHVM
2404 #ifdef CONFIG_PROC_VMCORE
2415 static int xen_oldmem_pfn_is_ram(
unsigned long pfn)
2441 static void xen_hvm_exit_mmap(
struct mm_struct *mm)
2452 static int is_pagetable_dying_supported(
void)
2469 if (is_pagetable_dying_supported())
2471 #ifdef CONFIG_PROC_VMCORE
2477 #define REMAP_BATCH_SIZE 16
2486 unsigned long addr,
void *
data)
2500 unsigned long mfn,
int nr,
2504 struct mmu_update mmu_update[REMAP_BATCH_SIZE];
2506 unsigned long range;
2514 BUG_ON(!((vma->
vm_flags & (VM_PFNMAP | VM_IO)) == (VM_PFNMAP | VM_IO)));
2520 batch =
min(REMAP_BATCH_SIZE, nr);
2525 remap_area_mfn_pte_fn, &rmd);