13 #include <linux/module.h>
15 #include <linux/sched.h>
16 #include <linux/slab.h>
23 #include <linux/list.h>
24 #include <linux/rbtree.h>
30 #include <asm/uaccess.h>
31 #include <asm/tlbflush.h>
32 #include <asm/shmparam.h>
36 static void vunmap_pte_range(
pmd_t *
pmd,
unsigned long addr,
unsigned long end)
44 }
while (pte++, addr +=
PAGE_SIZE, addr != end);
47 static void vunmap_pmd_range(
pud_t *pud,
unsigned long addr,
unsigned long end)
55 if (pmd_none_or_clear_bad(pmd))
57 vunmap_pte_range(pmd, addr, next);
58 }
while (pmd++, addr = next, addr != end);
61 static void vunmap_pud_range(
pgd_t *pgd,
unsigned long addr,
unsigned long end)
69 if (pud_none_or_clear_bad(pud))
71 vunmap_pmd_range(pud, addr, next);
72 }
while (pud++, addr = next, addr != end);
75 static void vunmap_page_range(
unsigned long addr,
unsigned long end)
83 next = pgd_addr_end(addr, end);
84 if (pgd_none_or_clear_bad(pgd))
86 vunmap_pud_range(pgd, addr, next);
87 }
while (pgd++, addr = next, addr != end);
90 static int vmap_pte_range(
pmd_t *pmd,
unsigned long addr,
100 pte = pte_alloc_kernel(pmd, addr);
112 }
while (pte++, addr +=
PAGE_SIZE, addr != end);
116 static int vmap_pmd_range(
pud_t *pud,
unsigned long addr,
117 unsigned long end,
pgprot_t prot,
struct page **pages,
int *nr)
127 if (vmap_pte_range(pmd, addr, next, prot, pages, nr))
129 }
while (pmd++, addr = next, addr != end);
133 static int vmap_pud_range(
pgd_t *pgd,
unsigned long addr,
134 unsigned long end,
pgprot_t prot,
struct page **pages,
int *nr)
144 if (vmap_pmd_range(pud, addr, next, prot, pages, nr))
146 }
while (pud++, addr = next, addr != end);
156 static int vmap_page_range_noflush(
unsigned long start,
unsigned long end,
161 unsigned long addr =
start;
168 next = pgd_addr_end(addr, end);
169 err = vmap_pud_range(pgd, addr, next, prot, pages, &nr);
172 }
while (pgd++, addr = next, addr != end);
177 static int vmap_page_range(
unsigned long start,
unsigned long end,
182 ret = vmap_page_range_noflush(start, end, prot, pages);
194 #if defined(CONFIG_MODULES) && defined(MODULES_VADDR)
195 unsigned long addr = (
unsigned long)x;
199 return is_vmalloc_addr(x);
207 unsigned long addr = (
unsigned long) vmalloc_addr;
208 struct page *page =
NULL;
248 #define VM_LAZY_FREE 0x01
249 #define VM_LAZY_FREEING 0x02
250 #define VM_VM_AREA 0x04
268 static struct rb_node *free_vmap_cache;
269 static unsigned long cached_hole_size;
270 static unsigned long cached_vstart;
271 static unsigned long cached_align;
273 static unsigned long vmap_area_pcpu_hole;
275 static struct vmap_area *__find_vmap_area(
unsigned long addr)
294 static void __insert_vmap_area(
struct vmap_area *va)
313 rb_link_node(&va->
rb_node, parent, p);
321 list_add_rcu(&va->
list, &prev->
list);
323 list_add_rcu(&va->
list, &vmap_area_list);
326 static void purge_vmap_area_lazy(
void);
347 va = kmalloc_node(
sizeof(
struct vmap_area),
353 spin_lock(&vmap_area_lock);
363 if (!free_vmap_cache ||
364 size < cached_hole_size ||
365 vstart < cached_vstart ||
366 align < cached_align) {
368 cached_hole_size = 0;
369 free_vmap_cache =
NULL;
373 cached_align =
align;
376 if (free_vmap_cache) {
381 if (addr + size - 1 < addr)
385 addr =
ALIGN(vstart, align);
386 if (addr + size - 1 < addr)
395 if (tmp->
va_end >= addr) {
409 while (addr + size > first->
va_start && addr + size <= vend) {
410 if (addr + cached_hole_size < first->
va_start)
413 if (addr + size - 1 < addr)
416 if (list_is_last(&first->
list, &vmap_area_list))
424 if (addr + size > vend)
430 __insert_vmap_area(va);
431 free_vmap_cache = &va->
rb_node;
432 spin_unlock(&vmap_area_lock);
441 spin_unlock(&vmap_area_lock);
443 purge_vmap_area_lazy();
447 if (printk_ratelimit())
449 "vmap allocation for size %lu failed: "
450 "use vmalloc=<size> to increase size.\n", size);
452 return ERR_PTR(-
EBUSY);
455 static void __free_vmap_area(
struct vmap_area *va)
459 if (free_vmap_cache) {
460 if (va->
va_end < cached_vstart) {
461 free_vmap_cache =
NULL;
476 list_del_rcu(&va->
list);
485 vmap_area_pcpu_hole =
max(vmap_area_pcpu_hole, va->
va_end);
493 static void free_vmap_area(
struct vmap_area *va)
495 spin_lock(&vmap_area_lock);
496 __free_vmap_area(va);
497 spin_unlock(&vmap_area_lock);
503 static void unmap_vmap_area(
struct vmap_area *va)
508 static void vmap_debug_free_range(
unsigned long start,
unsigned long end)
523 #ifdef CONFIG_DEBUG_PAGEALLOC
524 vunmap_page_range(start, end);
545 static unsigned long lazy_max_pages(
void)
557 static void purge_fragmented_blocks_allcpus(
void);
565 atomic_set(&vmap_lazy_nr, lazy_max_pages()+1);
578 static void __purge_vmap_area_lazy(
unsigned long *start,
unsigned long *end,
579 int sync,
int force_flush)
592 if (!sync && !force_flush) {
593 if (!spin_trylock(&purge_lock))
596 spin_lock(&purge_lock);
599 purge_fragmented_blocks_allcpus();
602 list_for_each_entry_rcu(va, &vmap_area_list,
list) {
619 if (nr || force_flush)
623 spin_lock(&vmap_area_lock);
625 __free_vmap_area(va);
626 spin_unlock(&vmap_area_lock);
628 spin_unlock(&purge_lock);
635 static
void try_purge_vmap_area_lazy(
void)
637 unsigned long start =
ULONG_MAX, end = 0;
639 __purge_vmap_area_lazy(&start, &end, 0, 0);
645 static void purge_vmap_area_lazy(
void)
647 unsigned long start =
ULONG_MAX, end = 0;
649 __purge_vmap_area_lazy(&start, &end, 1, 0);
657 static void free_vmap_area_noflush(
struct vmap_area *va)
662 try_purge_vmap_area_lazy();
669 static void free_unmap_vmap_area_noflush(
struct vmap_area *va)
672 free_vmap_area_noflush(va);
678 static void free_unmap_vmap_area(
struct vmap_area *va)
681 free_unmap_vmap_area_noflush(va);
684 static struct vmap_area *find_vmap_area(
unsigned long addr)
688 spin_lock(&vmap_area_lock);
689 va = __find_vmap_area(addr);
690 spin_unlock(&vmap_area_lock);
695 static void free_unmap_vmap_area_addr(
unsigned long addr)
699 va = find_vmap_area(addr);
701 free_unmap_vmap_area(va);
716 #if BITS_PER_LONG == 32
717 #define VMALLOC_SPACE (128UL*1024*1024)
719 #define VMALLOC_SPACE (128UL*1024*1024*1024)
722 #define VMALLOC_PAGES (VMALLOC_SPACE / PAGE_SIZE)
723 #define VMAP_MAX_ALLOC BITS_PER_LONG
724 #define VMAP_BBMAP_BITS_MAX 1024
725 #define VMAP_BBMAP_BITS_MIN (VMAP_MAX_ALLOC*2)
726 #define VMAP_MIN(x, y) ((x) < (y) ? (x) : (y))
727 #define VMAP_MAX(x, y) ((x) > (y) ? (x) : (y))
728 #define VMAP_BBMAP_BITS \
729 VMAP_MIN(VMAP_BBMAP_BITS_MAX, \
730 VMAP_MAX(VMAP_BBMAP_BITS_MIN, \
731 VMALLOC_PAGES / roundup_pow_of_two(NR_CPUS) / 16))
733 #define VMAP_BLOCK_SIZE (VMAP_BBMAP_BITS * PAGE_SIZE)
772 static unsigned long addr_to_vb_idx(
unsigned long addr)
784 unsigned long vb_idx;
817 vb_idx = addr_to_vb_idx(va->
va_start);
818 spin_lock(&vmap_block_tree_lock);
820 spin_unlock(&vmap_block_tree_lock);
822 radix_tree_preload_end();
826 spin_lock(&vbq->
lock);
828 spin_unlock(&vbq->
lock);
834 static void free_vmap_block(
struct vmap_block *vb)
837 unsigned long vb_idx;
839 vb_idx = addr_to_vb_idx(vb->
va->va_start);
840 spin_lock(&vmap_block_tree_lock);
842 spin_unlock(&vmap_block_tree_lock);
845 free_vmap_area_noflush(vb->
va);
849 static void purge_fragmented_blocks(
int cpu)
857 list_for_each_entry_rcu(vb, &vbq->
free, free_list) {
862 spin_lock(&vb->
lock);
868 spin_lock(&vbq->
lock);
870 spin_unlock(&vbq->
lock);
871 spin_unlock(&vb->
lock);
874 spin_unlock(&vb->
lock);
884 static void purge_fragmented_blocks_thiscpu(
void)
889 static void purge_fragmented_blocks_allcpus(
void)
894 purge_fragmented_blocks(cpu);
897 static
void *vb_alloc(
unsigned long size,
gfp_t gfp_mask)
901 unsigned long addr = 0;
923 spin_lock(&vb->
lock);
924 if (vb->
free < 1
UL << order)
939 BUG_ON(addr_to_vb_idx(addr) !=
940 addr_to_vb_idx(vb->
va->va_start));
943 spin_lock(&vbq->
lock);
945 spin_unlock(&vbq->
lock);
947 spin_unlock(&vb->
lock);
950 spin_unlock(&vb->
lock);
954 purge_fragmented_blocks_thiscpu();
960 vb = new_vmap_block(gfp_mask);
969 static void vb_free(
const void *addr,
unsigned long size)
972 unsigned long vb_idx;
985 vb_idx = addr_to_vb_idx((
unsigned long)addr);
991 vunmap_page_range((
unsigned long)addr, (
unsigned long)addr + size);
993 spin_lock(&vb->
lock);
999 spin_unlock(&vb->
lock);
1000 free_vmap_block(vb);
1002 spin_unlock(&vb->
lock);
1020 unsigned long start =
ULONG_MAX, end = 0;
1035 spin_lock(&vb->
lock);
1056 spin_unlock(&vb->
lock);
1061 __purge_vmap_area_lazy(&start, &end, 1, flush);
1073 unsigned long addr = (
unsigned long)mem;
1081 vmap_debug_free_range(addr, addr+size);
1086 free_unmap_vmap_area_addr(addr);
1109 addr = (
unsigned long)mem;
1120 if (vmap_page_range(addr, addr + size, prot, pages) < 0) {
1142 BUG_ON(vmap_initialized);
1174 vm->
addr = (
void *)addr;
1190 INIT_LIST_HEAD(&vbq->
free);
1200 __insert_vmap_area(va);
1205 vmap_initialized =
true;
1228 pgprot_t prot,
struct page **pages)
1230 return vmap_page_range_noflush(addr, addr + size, prot, pages);
1249 vunmap_page_range(addr, addr + size);
1263 unsigned long end = addr +
size;
1266 vunmap_page_range(addr, end);
1272 unsigned long addr = (
unsigned long)area->
addr;
1276 err = vmap_page_range(addr, end, prot, *pages);
1301 static void insert_vmalloc_vmlist(
struct vm_struct *
vm)
1307 for (p = &vmlist; (tmp = *
p) !=
NULL; p = &tmp->
next) {
1319 setup_vmalloc_vm(vm, va, flags, caller);
1320 insert_vmalloc_vmlist(vm);
1323 static struct vm_struct *__get_vm_area_node(
unsigned long size,
1324 unsigned long align,
unsigned long flags,
unsigned long start,
1325 unsigned long end,
int node,
gfp_t gfp_mask,
const void *
caller)
1332 int bit = fls(size);
1355 va = alloc_vmap_area(size, align, start, end, node, gfp_mask);
1369 setup_vmalloc_vm(area, va, flags, caller);
1371 insert_vmalloc_vm(area, va, flags, caller);
1377 unsigned long start,
unsigned long end)
1379 return __get_vm_area_node(size, 1, flags, start, end, -1,
GFP_KERNEL,
1380 __builtin_return_address(0));
1385 unsigned long start,
unsigned long end,
1388 return __get_vm_area_node(size, 1, flags, start, end, -1,
GFP_KERNEL,
1404 -1,
GFP_KERNEL, __builtin_return_address(0));
1426 va = find_vmap_area((
unsigned long)addr);
1445 va = find_vmap_area((
unsigned long)addr);
1457 for (p = &vmlist; (tmp = *
p) != vm; p = &tmp->
next)
1464 free_unmap_vmap_area(va);
1472 static void __vunmap(
const void *addr,
int deallocate_pages)
1479 if ((
PAGE_SIZE-1) & (
unsigned long)addr) {
1480 WARN(1,
KERN_ERR "Trying to vfree() bad address (%p)\n", addr);
1486 WARN(1,
KERN_ERR "Trying to vfree() nonexistent vm area (%p)\n",
1492 debug_check_no_obj_freed(addr, area->
size);
1494 if (deallocate_pages) {
1497 for (i = 0; i < area->
nr_pages; i++) {
1498 struct page *page = area->
pages[
i];
1562 unsigned long flags,
pgprot_t prot)
1568 if (count > totalram_pages)
1572 __builtin_return_address(0));
1585 static void *__vmalloc_node(
unsigned long size,
unsigned long align,
1587 int node,
const void *caller);
1588 static void *__vmalloc_area_node(
struct vm_struct *area,
gfp_t gfp_mask,
1589 pgprot_t prot,
int node,
const void *caller)
1591 const int order = 0;
1592 struct page **
pages;
1593 unsigned int nr_pages, array_size,
i;
1597 array_size = (nr_pages *
sizeof(
struct page *));
1602 pages = __vmalloc_node(array_size, 1, nested_gfp|
__GFP_HIGHMEM,
1603 PAGE_KERNEL, node, caller);
1606 pages = kmalloc_node(array_size, nested_gfp, node);
1616 for (i = 0; i < area->
nr_pages; i++) {
1623 page = alloc_pages_node(node, tmp_mask, order);
1639 "vmalloc: allocation failure, allocated %ld of %ld bytes\n",
1661 unsigned long start,
unsigned long end,
gfp_t gfp_mask,
1662 pgprot_t prot,
int node,
const void *caller)
1666 unsigned long real_size =
size;
1669 if (!size || (size >>
PAGE_SHIFT) > totalram_pages)
1673 start, end, node, gfp_mask, caller);
1677 addr = __vmalloc_area_node(area, gfp_mask, prot, node, caller);
1685 insert_vmalloc_vmlist(area);
1698 "vmalloc: allocation failure: %lu bytes\n",
1716 static void *__vmalloc_node(
unsigned long size,
unsigned long align,
1718 int node,
const void *caller)
1721 gfp_mask, prot, node, caller);
1726 return __vmalloc_node(size, 1, gfp_mask, prot, -1,
1727 __builtin_return_address(0));
1731 static inline void *__vmalloc_node_flags(
unsigned long size,
1732 int node,
gfp_t flags)
1734 return __vmalloc_node(size, 1, flags, PAGE_KERNEL,
1735 node, __builtin_return_address(0));
1765 return __vmalloc_node_flags(size, -1,
1782 ret = __vmalloc_node(size,
SHMLBA,
1807 node, __builtin_return_address(0));
1825 return __vmalloc_node_flags(size, node,
1830 #ifndef PAGE_KERNEL_EXEC
1831 # define PAGE_KERNEL_EXEC PAGE_KERNEL
1849 -1, __builtin_return_address(0));
1852 #if defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA32)
1853 #define GFP_VMALLOC32 GFP_DMA32 | GFP_KERNEL
1854 #elif defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA)
1855 #define GFP_VMALLOC32 GFP_DMA | GFP_KERNEL
1857 #define GFP_VMALLOC32 GFP_KERNEL
1870 -1, __builtin_return_address(0));
1887 -1, __builtin_return_address(0));
1901 static int aligned_vread(
char *
buf,
char *addr,
unsigned long count)
1927 memcpy(buf, map + offset, length);
1940 static int aligned_vwrite(
char *buf,
char *addr,
unsigned long count)
1966 memcpy(map + offset, buf, length);
2003 long vread(
char *buf,
char *addr,
unsigned long count)
2011 if ((
unsigned long) addr + count < count)
2012 count = -(
unsigned long) addr;
2015 for (tmp = vmlist; count &&
tmp; tmp = tmp->next) {
2016 vaddr = (
char *) tmp->addr;
2019 while (addr < vaddr) {
2030 if (!(tmp->flags & VM_IOREMAP))
2031 aligned_vread(buf, addr, n);
2041 if (buf == buf_start)
2044 if (buf != buf_start + buflen)
2045 memset(buf, 0, buflen - (buf - buf_start));
2076 long vwrite(
char *buf,
char *addr,
unsigned long count)
2084 if ((
unsigned long) addr + count < count)
2085 count = -(
unsigned long) addr;
2089 for (tmp = vmlist; count &&
tmp; tmp = tmp->next) {
2090 vaddr = (
char *) tmp->addr;
2093 while (addr < vaddr) {
2103 if (!(tmp->flags & VM_IOREMAP)) {
2104 aligned_vwrite(buf, addr, n);
2133 unsigned long pgoff)
2139 if ((
PAGE_SIZE-1) & (
unsigned long)addr)
2164 }
while (usize > 0);
2166 vma->
vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
2211 __builtin_return_address(0));
2220 size,
f, ptes ? &ptes :
NULL)) {
2256 static bool pvm_find_next_prev(
unsigned long end,
2267 else if (end > va->
va_end)
2278 *pprev = node_to_va(
rb_prev(&(*pnext)->rb_node));
2281 *pnext = node_to_va(
rb_next(&(*pprev)->rb_node));
2302 static unsigned long pvm_determine_end(
struct vmap_area **pnext,
2304 unsigned long align)
2306 const unsigned long vmalloc_end =
VMALLOC_END & ~(align - 1);
2310 addr =
min((*pnext)->va_start & ~(align - 1), vmalloc_end);
2314 while (*pprev && (*pprev)->
va_end > addr) {
2316 *pprev = node_to_va(
rb_prev(&(*pnext)->rb_node));
2346 struct vm_struct **pcpu_get_vm_areas(
const unsigned long *offsets,
2347 const size_t *sizes,
int nr_vms,
2351 const unsigned long vmalloc_end =
VMALLOC_END & ~(align - 1);
2354 int area, area2, last_area, term_area;
2356 bool purged =
false;
2360 for (last_area = 0, area = 0; area < nr_vms; area++) {
2361 start = offsets[area];
2362 end = start + sizes[area];
2369 if (start > offsets[last_area])
2372 for (area2 = 0; area2 < nr_vms; area2++) {
2373 unsigned long start2 = offsets[area2];
2374 unsigned long end2 = start2 + sizes[area2];
2379 BUG_ON(start2 >= start && start2 < end);
2380 BUG_ON(end2 <= end && end2 > start);
2383 last_end = offsets[last_area] + sizes[last_area];
2385 if (vmalloc_end - vmalloc_start < last_end) {
2390 vms = kcalloc(nr_vms,
sizeof(vms[0]),
GFP_KERNEL);
2391 vas = kcalloc(nr_vms,
sizeof(vas[0]),
GFP_KERNEL);
2395 for (area = 0; area < nr_vms; area++) {
2398 if (!vas[area] || !vms[area])
2402 spin_lock(&vmap_area_lock);
2405 area = term_area = last_area;
2406 start = offsets[area];
2407 end = start + sizes[area];
2409 if (!pvm_find_next_prev(vmap_area_pcpu_hole, &next, &prev)) {
2410 base = vmalloc_end - last_end;
2413 base = pvm_determine_end(&next, &prev, align) -
end;
2423 if (base + last_end < vmalloc_start + last_end) {
2424 spin_unlock(&vmap_area_lock);
2426 purge_vmap_area_lazy();
2437 if (next && next->
va_start < base + end) {
2438 base = pvm_determine_end(&next, &prev, align) -
end;
2448 if (prev && prev->
va_end > base + start) {
2451 base = pvm_determine_end(&next, &prev, align) -
end;
2460 area = (area + nr_vms - 1) % nr_vms;
2461 if (area == term_area)
2463 start = offsets[area];
2464 end = start + sizes[area];
2465 pvm_find_next_prev(base + end, &next, &prev);
2469 for (area = 0; area < nr_vms; area++) {
2472 va->
va_start = base + offsets[area];
2474 __insert_vmap_area(va);
2477 vmap_area_pcpu_hole = base + offsets[last_area];
2479 spin_unlock(&vmap_area_lock);
2482 for (area = 0; area < nr_vms; area++)
2483 insert_vmalloc_vm(vms[area], vas[area],
VM_ALLOC,
2490 for (area = 0; area < nr_vms; area++) {
2507 void pcpu_free_vm_areas(
struct vm_struct **vms,
int nr_vms)
2511 for (i = 0; i < nr_vms; i++)
2517 #ifdef CONFIG_PROC_FS
2526 while (n > 0 && v) {
2537 static void *s_next(
struct seq_file *
m,
void *p, loff_t *
pos)
2545 static void s_stop(
struct seq_file *
m,
void *p)
2554 unsigned int nr, *counters = m->
private;
2561 for (nr = 0; nr < v->
nr_pages; nr++)
2562 counters[page_to_nid(v->
pages[nr])]++;
2586 if (v->
flags & VM_IOREMAP)
2601 show_numa_info(m, v);
2633 .
open = vmalloc_open,
2639 static int __init proc_vmalloc_init(
void)
2641 proc_create(
"vmallocinfo",
S_IRUSR,
NULL, &proc_vmalloc_operations);