2 #include <linux/kernel.h>
4 #include <linux/string.h>
9 #include <linux/ctype.h>
10 #include <linux/module.h>
12 #include <linux/sched.h>
30 #ifndef CONFIG_MEMORY_HOTPLUG
35 static int numa_distance_cnt;
36 static u8 *numa_distance;
44 #ifdef CONFIG_NUMA_EMU
48 #ifdef CONFIG_ACPI_NUMA
68 return __apicid_to_node[
apicid];
86 if (cpu_to_node_map) {
91 #ifdef CONFIG_DEBUG_PER_CPU_MAPS
129 alloc_bootmem_cpumask_var(&node_to_cpumask_map[node]);
132 pr_debug(
"Node to cpumask map for %d nodes\n", nr_node_ids);
144 pr_warning(
"NUMA: Warning: invalid memblk node %d [mem %#010Lx-%#010Lx]\n",
145 nid, start, end - 1);
150 pr_err(
"NUMA: too many memblk ranges\n");
189 return numa_add_memblk_to(nid, start, end, &
numa_meminfo);
193 static void __init setup_node_data(
int nid,
u64 start,
u64 end)
196 bool remapped =
false;
205 if (end && (end - start) < NODE_MIN_SIZE)
211 start =
roundup(start, ZONE_ALIGN);
214 nid, start, end - 1);
227 pr_err(
"Cannot find %zu bytes in node %d\n",
236 nd_pa, nd_pa + nd_size - 1, remapped ?
" (remapped)" :
"");
238 if (!remapped && tnid != nid)
267 for (i = 0; i < mi->
nr_blks; i++) {
280 for (i = 0; i < mi->
nr_blks; i++) {
283 for (j = i + 1; j < mi->
nr_blks; j++) {
294 pr_err(
"NUMA: node %d [mem %#010Lx-%#010Lx] overlaps with node %d [mem %#010Lx-%#010Lx]\n",
299 pr_warning(
"NUMA: Warning: node %d [mem %#010Lx-%#010Lx] overlaps with itself [mem %#010Lx-%#010Lx]\n",
313 for (k = 0; k < mi->
nr_blks; k++) {
318 if (start < bk->end && end > bk->
start)
323 printk(
KERN_INFO "NUMA: Node %d [mem %#010Lx-%#010Lx] + [mem %#010Lx-%#010Lx] -> [mem %#010Lx-%#010Lx]\n",
325 bj->
end - 1, start, end - 1);
334 mi->
blk[
i].start = mi->
blk[
i].end = 0;
350 if (mi->
blk[i].start != mi->
blk[i].end &&
363 size_t size = numa_distance_cnt * numa_distance_cnt *
sizeof(numa_distance[0]);
366 if (numa_distance_cnt)
368 numa_distance_cnt = 0;
369 numa_distance =
NULL;
372 static int __init numa_alloc_distance(
void)
380 nodes_parsed = numa_nodes_parsed;
381 numa_nodemask_from_meminfo(&nodes_parsed, &
numa_meminfo);
386 size = cnt * cnt *
sizeof(numa_distance[0]);
391 pr_warning(
"NUMA: Warning: can't allocate distance table!\n");
393 numa_distance = (
void *)1
LU;
398 numa_distance =
__va(phys);
399 numa_distance_cnt =
cnt;
402 for (i = 0; i <
cnt; i++)
403 for (j = 0; j <
cnt; j++)
404 numa_distance[i * cnt + j] = i == j ?
432 if (!numa_distance && numa_alloc_distance() < 0)
435 if (from >= numa_distance_cnt || to >= numa_distance_cnt ||
436 from < 0 || to < 0) {
437 pr_warn_once(
"NUMA: Warning: node ids are out of bound, from=%d to=%d distance=%d\n",
442 if ((
u8)distance != distance ||
444 pr_warn_once(
"NUMA: Warning: invalid distance parameter, from=%d to=%d distance=%d\n",
449 numa_distance[from * numa_distance_cnt + to] = distance;
454 if (from >= numa_distance_cnt || to >= numa_distance_cnt)
456 return numa_distance[from * numa_distance_cnt + to];
466 u64 numaram, e820ram;
470 for (i = 0; i < mi->
nr_blks; i++) {
474 numaram -= __absent_pages_in_range(mi->
blk[i].nid, s, e);
475 if ((
s64)numaram < 0)
483 printk(
KERN_ERR "NUMA: nodes only cover %LuMB of your %LuMB e820 RAM. Not used.\n",
502 for (i = 0; i < mi->
nr_blks; i++) {
511 #ifdef NODE_NOT_IN_PAGE_FLAGS
512 pfn_align = node_map_pfn_alignment();
513 if (pfn_align && pfn_align < PAGES_PER_SECTION) {
520 if (!numa_meminfo_cover_memory(mi))
528 for (i = 0; i < mi->
nr_blks; i++) {
529 if (nid != mi->
blk[i].nid)
531 start =
min(mi->
blk[i].start, start);
532 end =
max(mi->
blk[i].end, end);
536 setup_node_data(nid, start, end);
551 static void __init numa_init_array(
void)
566 static int __init numa_init(
int (*init_func)(
void))
594 for (i = 0; i < nr_cpu_ids; i++) {
595 int nid = early_cpu_to_node(i);
615 static int __init dummy_numa_init(
void)
618 numa_off ?
"NUMA turned off" :
"No NUMA configuration found");
638 #ifdef CONFIG_X86_NUMAQ
642 #ifdef CONFIG_ACPI_NUMA
646 #ifdef CONFIG_AMD_NUMA
652 numa_init(dummy_numa_init);
655 static __init int find_near_online_node(
int node)
700 node = find_near_online_node(node);
705 #ifndef CONFIG_DEBUG_PER_CPU_MAPS
707 # ifndef CONFIG_NUMA_EMU
710 cpumask_set_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
715 cpumask_clear_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
721 int __cpu_to_node(
int cpu)
725 "cpu_to_node(%d): usage too early!\n", cpu);
729 return per_cpu(x86_cpu_to_node_map, cpu);
737 int early_cpu_to_node(
int cpu)
744 "early_cpu_to_node(%d): no per_cpu area!\n", cpu);
748 return per_cpu(x86_cpu_to_node_map, cpu);
751 void debug_cpumask_set_cpu(
int cpu,
int node,
bool enable)
760 mask = node_to_cpumask_map[
node];
762 pr_err(
"node_to_cpumask_map[%i] NULL\n", node);
768 cpumask_set_cpu(cpu, mask);
770 cpumask_clear_cpu(cpu, mask);
772 cpulist_scnprintf(buf,
sizeof(buf), mask);
774 enable ?
"numa_add_cpu" :
"numa_remove_cpu",
779 # ifndef CONFIG_NUMA_EMU
780 static void __cpuinit numa_set_cpumask(
int cpu,
bool enable)
782 debug_cpumask_set_cpu(cpu, early_cpu_to_node(cpu), enable);
787 numa_set_cpumask(cpu,
true);
792 numa_set_cpumask(cpu,
false);
803 "cpumask_of_node(%d): node > nr_node_ids(%d)\n",
808 if (node_to_cpumask_map[node] ==
NULL) {
810 "cpumask_of_node(%d): no node_to_cpumask_map!\n",
815 return node_to_cpumask_map[
node];
821 #ifdef CONFIG_MEMORY_HOTPLUG
822 int memory_add_physaddr_to_nid(
u64 start)
825 int nid = mi->
blk[0].nid;
828 for (i = 0; i < mi->
nr_blks; i++)
829 if (mi->
blk[i].start <= start && mi->
blk[i].end > start)
830 nid = mi->
blk[
i].nid;