56 #include <linux/bitmap.h>
59 #include <linux/list.h>
62 #include <linux/module.h>
66 #include <linux/slab.h>
72 #include <asm/cacheflush.h>
73 #include <asm/sections.h>
74 #include <asm/tlbflush.h>
77 #define PCPU_SLOT_BASE_SHIFT 5
78 #define PCPU_DFL_MAP_ALLOC 16
82 #ifndef __addr_to_pcpu_ptr
83 #define __addr_to_pcpu_ptr(addr) \
84 (void __percpu *)((unsigned long)(addr) - \
85 (unsigned long)pcpu_base_addr + \
86 (unsigned long)__per_cpu_start)
88 #ifndef __pcpu_ptr_to_addr
89 #define __pcpu_ptr_to_addr(ptr) \
90 (void __force *)((unsigned long)(ptr) + \
91 (unsigned long)pcpu_base_addr - \
92 (unsigned long)__per_cpu_start)
96 #define __addr_to_pcpu_ptr(addr) (void __percpu *)(addr)
97 #define __pcpu_ptr_to_addr(ptr) (void __force *)(ptr)
133 static const unsigned long *pcpu_group_offsets
__read_mostly;
150 static struct pcpu_chunk *pcpu_reserved_chunk;
151 static int pcpu_reserved_chunk_limit;
186 static bool pcpu_addr_in_first_chunk(
void *
addr)
188 void *first_start = pcpu_first_chunk->
base_addr;
190 return addr >= first_start && addr < first_start + pcpu_unit_size;
193 static bool pcpu_addr_in_reserved_chunk(
void *
addr)
195 void *first_start = pcpu_first_chunk->
base_addr;
197 return addr >= first_start &&
198 addr < first_start + pcpu_reserved_chunk_limit;
201 static int __pcpu_size_to_slot(
int size)
203 int highbit = fls(size);
207 static int pcpu_size_to_slot(
int size)
209 if (size == pcpu_unit_size)
210 return pcpu_nr_slots - 1;
211 return __pcpu_size_to_slot(size);
219 return pcpu_size_to_slot(chunk->
free_size);
236 return pcpu_unit_map[
cpu] * pcpu_unit_pages + page_idx;
239 static unsigned long pcpu_chunk_addr(
struct pcpu_chunk *chunk,
240 unsigned int cpu,
int page_idx)
247 int *rs,
int *re,
int end)
254 int *rs,
int *re,
int end)
266 #define pcpu_for_each_unpop_region(chunk, rs, re, start, end) \
267 for ((rs) = (start), pcpu_next_unpop((chunk), &(rs), &(re), (end)); \
269 (rs) = (re) + 1, pcpu_next_unpop((chunk), &(rs), &(re), (end)))
271 #define pcpu_for_each_pop_region(chunk, rs, re, start, end) \
272 for ((rs) = (start), pcpu_next_pop((chunk), &(rs), &(re), (end)); \
274 (rs) = (re) + 1, pcpu_next_pop((chunk), &(rs), &(re), (end)))
290 static void *pcpu_mem_zalloc(
size_t size)
308 static void pcpu_mem_free(
void *
ptr,
size_t size)
329 static void pcpu_chunk_relocate(
struct pcpu_chunk *chunk,
int oslot)
331 int nslot = pcpu_chunk_slot(chunk);
333 if (chunk != pcpu_reserved_chunk && oslot != nslot) {
335 list_move(&chunk->
list, &pcpu_slot[nslot]);
337 list_move_tail(&chunk->
list, &pcpu_slot[nslot]);
355 static int pcpu_need_to_extend(
struct pcpu_chunk *chunk)
363 while (new_alloc < chunk->
map_used + 2)
382 static int pcpu_extend_area_map(
struct pcpu_chunk *chunk,
int new_alloc)
385 size_t old_size = 0, new_size = new_alloc *
sizeof(
new[0]);
388 new = pcpu_mem_zalloc(new_size);
401 memcpy(
new, old, old_size);
408 spin_unlock_irqrestore(&pcpu_lock, flags);
414 pcpu_mem_free(old, old_size);
415 pcpu_mem_free(
new, new_size);
440 static void pcpu_split_block(
struct pcpu_chunk *chunk,
int i,
443 int nr_extra = !!head + !!
tail;
481 static int pcpu_alloc_area(
struct pcpu_chunk *chunk,
int size,
int align)
483 int oslot = pcpu_chunk_slot(chunk);
487 for (i = 0, off = 0; i < chunk->
map_used; off +=
abs(chunk->
map[i++])) {
488 bool is_last = i + 1 == chunk->
map_used;
492 head =
ALIGN(off, align) - off;
493 BUG_ON(i == 0 && head != 0);
495 if (chunk->
map[i] < 0)
497 if (chunk->
map[i] < head + size) {
498 max_contig =
max(chunk->
map[i], max_contig);
508 if (head && (head <
sizeof(
int) || chunk->
map[i - 1] > 0)) {
509 if (chunk->
map[i - 1] > 0)
522 if (tail <
sizeof(
int))
527 pcpu_split_block(chunk, i, head, tail);
531 max_contig =
max(chunk->
map[i - 1], max_contig);
534 max_contig =
max(chunk->
map[i + 1], max_contig);
547 pcpu_chunk_relocate(chunk, oslot);
552 pcpu_chunk_relocate(chunk, oslot);
570 static void pcpu_free_area(
struct pcpu_chunk *chunk,
int freeme)
572 int oslot = pcpu_chunk_slot(chunk);
575 for (i = 0, off = 0; i < chunk->
map_used; off +=
abs(chunk->
map[i++]))
585 if (i > 0 && chunk->
map[i - 1] >= 0) {
586 chunk->
map[i - 1] += chunk->
map[
i];
593 if (i + 1 < chunk->
map_used && chunk->
map[i + 1] >= 0) {
594 chunk->
map[
i] += chunk->
map[i + 1];
597 (chunk->
map_used - (i + 1)) *
sizeof(chunk->
map[0]));
601 pcpu_chunk_relocate(chunk, oslot);
604 static struct pcpu_chunk *pcpu_alloc_chunk(
void)
608 chunk = pcpu_mem_zalloc(pcpu_chunk_struct_size);
613 sizeof(chunk->
map[0]));
622 INIT_LIST_HEAD(&chunk->
list);
629 static void pcpu_free_chunk(
struct pcpu_chunk *chunk)
652 static int pcpu_populate_chunk(
struct pcpu_chunk *chunk,
int off,
int size);
653 static void pcpu_depopulate_chunk(
struct pcpu_chunk *chunk,
int off,
int size);
654 static struct pcpu_chunk *pcpu_create_chunk(
void);
655 static void pcpu_destroy_chunk(
struct pcpu_chunk *chunk);
656 static struct page *pcpu_addr_to_page(
void *
addr);
659 #ifdef CONFIG_NEED_PER_CPU_KM
675 if (pcpu_addr_in_first_chunk(addr)) {
677 if (pcpu_addr_in_reserved_chunk(addr))
678 return pcpu_reserved_chunk;
679 return pcpu_first_chunk;
690 return pcpu_get_page_chunk(pcpu_addr_to_page(addr));
709 static int warn_limit = 10;
712 int slot, off, new_alloc;
717 WARN(
true,
"illegal size (%zu) or align (%zu) for "
718 "percpu allocation\n", size, align);
726 if (reserved && pcpu_reserved_chunk) {
727 chunk = pcpu_reserved_chunk;
730 err =
"alloc from reserved chunk failed";
734 while ((new_alloc = pcpu_need_to_extend(chunk))) {
735 spin_unlock_irqrestore(&pcpu_lock, flags);
736 if (pcpu_extend_area_map(chunk, new_alloc) < 0) {
737 err =
"failed to extend area map of reserved chunk";
738 goto fail_unlock_mutex;
743 off = pcpu_alloc_area(chunk, size, align);
747 err =
"alloc from reserved chunk failed";
753 for (slot = pcpu_size_to_slot(size); slot < pcpu_nr_slots; slot++) {
758 new_alloc = pcpu_need_to_extend(chunk);
760 spin_unlock_irqrestore(&pcpu_lock, flags);
761 if (pcpu_extend_area_map(chunk,
763 err =
"failed to extend area map";
764 goto fail_unlock_mutex;
774 off = pcpu_alloc_area(chunk, size, align);
781 spin_unlock_irqrestore(&pcpu_lock, flags);
783 chunk = pcpu_create_chunk();
785 err =
"failed to allocate new chunk";
786 goto fail_unlock_mutex;
790 pcpu_chunk_relocate(chunk, -1);
794 spin_unlock_irqrestore(&pcpu_lock, flags);
797 if (pcpu_populate_chunk(chunk, off, size)) {
799 pcpu_free_area(chunk, off);
800 err =
"failed to populate";
812 spin_unlock_irqrestore(&pcpu_lock, flags);
816 pr_warning(
"PERCPU: allocation failed, size=%zu align=%zu, "
817 "%s\n", size, align, err);
820 pr_info(
"PERCPU: limit reached, disable warning\n");
841 return pcpu_alloc(size, align,
false);
863 return pcpu_alloc(size, align,
true);
878 struct list_head *head = &pcpu_slot[pcpu_nr_slots - 1];
882 spin_lock_irq(&pcpu_lock);
894 spin_unlock_irq(&pcpu_lock);
897 pcpu_depopulate_chunk(chunk, 0, pcpu_unit_size);
898 pcpu_destroy_chunk(chunk);
929 chunk = pcpu_chunk_addr_search(addr);
932 pcpu_free_area(chunk, off);
935 if (chunk->
free_size == pcpu_unit_size) {
945 spin_unlock_irqrestore(&pcpu_lock, flags);
970 if ((
void *)addr >= start && (
void *)addr < start + static_size)
1004 bool in_first_chunk =
false;
1005 unsigned long first_low, first_high;
1013 first_low = pcpu_chunk_addr(pcpu_first_chunk, pcpu_low_unit_cpu, 0);
1014 first_high = pcpu_chunk_addr(pcpu_first_chunk, pcpu_high_unit_cpu,
1016 if ((
unsigned long)addr >= first_low &&
1017 (
unsigned long)addr < first_high) {
1021 if (addr >= start && addr < start + pcpu_unit_size) {
1022 in_first_chunk =
true;
1028 if (in_first_chunk) {
1029 if (!is_vmalloc_addr(addr))
1058 size_t base_size, ai_size;
1062 base_size =
ALIGN(
sizeof(*ai) + nr_groups *
sizeof(ai->
groups[0]),
1063 __alignof__(ai->
groups[0].cpu_map[0]));
1064 ai_size = base_size + nr_units *
sizeof(ai->
groups[0].cpu_map[0]);
1074 for (unit = 0; unit < nr_units; unit++)
1101 static void pcpu_dump_alloc_info(
const char *lvl,
1104 int group_width = 1, cpu_width = 1,
width;
1105 char empty_str[] =
"--------";
1106 int alloc = 0, alloc_end = 0;
1117 empty_str[
min_t(
int, cpu_width,
sizeof(empty_str) - 1)] =
'\0';
1120 width = upa * (cpu_width + 1) + group_width + 3;
1123 printk(
"%spcpu-alloc: s%zu r%zu d%zu u%zu alloc=%zu*%zu",
1127 for (group = 0; group < ai->
nr_groups; group++) {
1129 int unit = 0, unit_end = 0;
1132 for (alloc_end += gi->
nr_units / upa;
1133 alloc < alloc_end; alloc++) {
1134 if (!(alloc % apl)) {
1136 printk(
"%spcpu-alloc: ", lvl);
1140 for (unit_end += upa; unit < unit_end; unit++)
1216 unsigned long *group_offsets;
1217 size_t *group_sizes;
1218 unsigned long *unit_off;
1225 #define PCPU_SETUP_BUG_ON(cond) do { \
1226 if (unlikely(cond)) { \
1227 pr_emerg("PERCPU: failed to initialize, %s", #cond); \
1228 pr_emerg("PERCPU: cpu_possible_mask=%s\n", cpus_buf); \
1229 pcpu_dump_alloc_info(KERN_EMERG, ai); \
1260 for (group = 0, unit = 0; group < ai->
nr_groups; group++, unit +=
i) {
1266 for (i = 0; i < gi->
nr_units; i++) {
1275 unit_map[
cpu] = unit +
i;
1279 if (pcpu_low_unit_cpu ==
NR_CPUS ||
1280 unit_off[cpu] < unit_off[pcpu_low_unit_cpu])
1281 pcpu_low_unit_cpu =
cpu;
1282 if (pcpu_high_unit_cpu ==
NR_CPUS ||
1283 unit_off[cpu] > unit_off[pcpu_high_unit_cpu])
1284 pcpu_high_unit_cpu =
cpu;
1287 pcpu_nr_units =
unit;
1293 #undef PCPU_SETUP_BUG_ON
1297 pcpu_group_offsets = group_offsets;
1298 pcpu_group_sizes = group_sizes;
1299 pcpu_unit_map = unit_map;
1304 pcpu_unit_size = pcpu_unit_pages <<
PAGE_SHIFT;
1306 pcpu_chunk_struct_size =
sizeof(
struct pcpu_chunk) +
1307 BITS_TO_LONGS(pcpu_unit_pages) *
sizeof(
unsigned long);
1313 pcpu_nr_slots = __pcpu_size_to_slot(pcpu_unit_size) + 2;
1314 pcpu_slot =
alloc_bootmem(pcpu_nr_slots *
sizeof(pcpu_slot[0]));
1315 for (i = 0; i < pcpu_nr_slots; i++)
1316 INIT_LIST_HEAD(&pcpu_slot[i]);
1326 INIT_LIST_HEAD(&schunk->
list);
1331 bitmap_fill(schunk->
populated, pcpu_unit_pages);
1335 pcpu_reserved_chunk = schunk;
1350 INIT_LIST_HEAD(&dchunk->
list);
1355 bitmap_fill(dchunk->
populated, pcpu_unit_pages);
1358 dchunk->
map[dchunk->
map_used++] = -pcpu_reserved_chunk_limit;
1363 pcpu_first_chunk = dchunk ?: schunk;
1364 pcpu_chunk_relocate(pcpu_first_chunk, -1);
1381 static int __init percpu_alloc_setup(
char *
str)
1385 #ifdef CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK
1386 else if (!
strcmp(str,
"embed"))
1389 #ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
1390 else if (!
strcmp(str,
"page"))
1394 pr_warning(
"PERCPU: unknown allocator %s specified\n", str);
1405 #if defined(CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK) || \
1406 !defined(CONFIG_HAVE_SETUP_PER_CPU_AREA)
1407 #define BUILD_EMBED_FIRST_CHUNK
1411 #if defined(CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK)
1412 #define BUILD_PAGE_FIRST_CHUNK
1416 #if defined(BUILD_EMBED_FIRST_CHUNK) || defined(BUILD_PAGE_FIRST_CHUNK)
1450 unsigned int cpu, tcpu;
1455 memset(group_map, 0,
sizeof(group_map));
1456 memset(group_cnt, 0,
sizeof(group_cnt));
1459 size_sum =
PFN_ALIGN(static_size + reserved_size +
1471 alloc_size =
roundup(min_unit_size, atom_size);
1472 upa = alloc_size / min_unit_size;
1473 while (alloc_size % upa || ((alloc_size / upa) & ~
PAGE_MASK))
1484 if (group_map[tcpu] == group && cpu_distance_fn &&
1488 nr_groups =
max(nr_groups, group + 1);
1502 for (upa = max_upa; upa; upa--) {
1503 int allocs = 0, wasted = 0;
1505 if (alloc_size % upa || ((alloc_size / upa) & ~
PAGE_MASK))
1508 for (group = 0; group <
nr_groups; group++) {
1510 allocs += this_allocs;
1511 wasted += this_allocs * upa - group_cnt[
group];
1523 if (allocs > last_allocs)
1531 for (group = 0; group <
nr_groups; group++)
1532 nr_units +=
roundup(group_cnt[group], upa);
1537 cpu_map = ai->
groups[0].cpu_map;
1539 for (group = 0; group <
nr_groups; group++) {
1541 cpu_map +=
roundup(group_cnt[group], upa);
1551 for (group = 0, unit = 0; group_cnt[
group]; group++) {
1562 if (group_map[cpu] == group)
1565 unit += gi->nr_units;
1567 BUG_ON(unit != nr_units);
1573 #if defined(BUILD_EMBED_FIRST_CHUNK)
1606 int __init pcpu_embed_first_chunk(
size_t reserved_size,
size_t dyn_size,
1613 void **areas =
NULL;
1615 size_t size_sum, areas_size, max_distance;
1618 ai = pcpu_build_alloc_info(reserved_size, dyn_size, atom_size,
1633 for (group = 0; group < ai->
nr_groups; group++) {
1646 goto out_free_areas;
1652 base =
min(ptr, base);
1660 for (group = 0; group < ai->
nr_groups; group++) {
1662 void *ptr = areas[
group];
1672 free_fn(ptr + size_sum, ai->
unit_size - size_sum);
1678 for (group = 0; group < ai->
nr_groups; group++) {
1680 max_distance =
max_t(
size_t, max_distance,
1681 ai->
groups[group].base_offset);
1687 pr_warning(
"PERCPU: max_distance=0x%zx too large for vmalloc "
1688 "space 0x%lx\n", max_distance,
1690 #ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
1697 pr_info(
"PERCPU: Embedded %zu pages/cpu @%p s%zu r%zu d%zu u%zu\n",
1705 for (group = 0; group < ai->
nr_groups; group++)
1706 free_fn(areas[group],
1716 #ifdef BUILD_PAGE_FIRST_CHUNK
1733 int __init pcpu_page_first_chunk(
size_t reserved_size,
1743 struct page **
pages;
1748 ai = pcpu_build_alloc_info(reserved_size, 0,
PAGE_SIZE,
NULL);
1764 for (i = 0; i < unit_pages; i++) {
1765 unsigned int cpu = ai->
groups[0].cpu_map[
unit];
1770 pr_warning(
"PERCPU: failed to allocate %s page "
1771 "for cpu%u\n", psize_str, cpu);
1788 for (i = 0; i < unit_pages; i++)
1789 populate_pte_fn(unit_addr + (i <<
PAGE_SHIFT));
1792 rc = __pcpu_map_pages(unit_addr, &pages[unit * unit_pages],
1795 panic(
"failed to map percpu area, err=%d\n", rc);
1810 pr_info(
"PERCPU: %d %s pages/cpu @%p s%zu r%zu d%zu\n",
1828 #ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA
1844 static void *
__init pcpu_dfl_fc_alloc(
unsigned int cpu,
size_t size,
1850 static void __init pcpu_dfl_fc_free(
void *ptr,
size_t size)
1857 unsigned long delta;
1867 pcpu_dfl_fc_alloc, pcpu_dfl_fc_free);
1869 panic(
"Failed to initialize percpu areas.");
1871 delta = (
unsigned long)pcpu_base_addr - (
unsigned long)__per_cpu_start;
1873 __per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu];
1897 panic(
"Failed to allocate memory for percpu areas.");
1905 ai->
groups[0].nr_units = 1;
1906 ai->
groups[0].cpu_map[0] = 0;
1909 panic(
"Failed to initialize percpu areas.");
1923 { pcpu_first_chunk, pcpu_reserved_chunk,
NULL };
1925 unsigned long flags;
1928 for (i = 0; (chunk = target_chunks[
i]); i++) {
1934 map = pcpu_mem_zalloc(size);
1940 spin_unlock_irqrestore(&pcpu_lock, flags);