Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
mmu.c
Go to the documentation of this file.
1 /*
2  * linux/arch/arm/mm/mmu.c
3  *
4  * Copyright (C) 1995-2005 Russell King
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 as
8  * published by the Free Software Foundation.
9  */
10 #include <linux/module.h>
11 #include <linux/kernel.h>
12 #include <linux/errno.h>
13 #include <linux/init.h>
14 #include <linux/mman.h>
15 #include <linux/nodemask.h>
16 #include <linux/memblock.h>
17 #include <linux/fs.h>
18 #include <linux/vmalloc.h>
19 #include <linux/sizes.h>
20 
21 #include <asm/cp15.h>
22 #include <asm/cputype.h>
23 #include <asm/sections.h>
24 #include <asm/cachetype.h>
25 #include <asm/setup.h>
26 #include <asm/smp_plat.h>
27 #include <asm/tlb.h>
28 #include <asm/highmem.h>
29 #include <asm/system_info.h>
30 #include <asm/traps.h>
31 
32 #include <asm/mach/arch.h>
33 #include <asm/mach/map.h>
34 #include <asm/mach/pci.h>
35 
36 #include "mm.h"
37 
38 /*
39  * empty_zero_page is a special page that is used for
40  * zero-initialized data and COW.
41  */
43 EXPORT_SYMBOL(empty_zero_page);
44 
45 /*
46  * The pmd table for the upper-most set of pages.
47  */
49 
50 #define CPOLICY_UNCACHED 0
51 #define CPOLICY_BUFFERED 1
52 #define CPOLICY_WRITETHROUGH 2
53 #define CPOLICY_WRITEBACK 3
54 #define CPOLICY_WRITEALLOC 4
55 
56 static unsigned int cachepolicy __initdata = CPOLICY_WRITEBACK;
57 static unsigned int ecc_mask __initdata = 0;
60 
61 EXPORT_SYMBOL(pgprot_user);
62 EXPORT_SYMBOL(pgprot_kernel);
63 
64 struct cachepolicy {
65  const char policy[16];
66  unsigned int cr_mask;
69 };
70 
71 static struct cachepolicy cache_policies[] __initdata = {
72  {
73  .policy = "uncached",
74  .cr_mask = CR_W|CR_C,
75  .pmd = PMD_SECT_UNCACHED,
76  .pte = L_PTE_MT_UNCACHED,
77  }, {
78  .policy = "buffered",
79  .cr_mask = CR_C,
80  .pmd = PMD_SECT_BUFFERED,
81  .pte = L_PTE_MT_BUFFERABLE,
82  }, {
83  .policy = "writethrough",
84  .cr_mask = 0,
85  .pmd = PMD_SECT_WT,
86  .pte = L_PTE_MT_WRITETHROUGH,
87  }, {
88  .policy = "writeback",
89  .cr_mask = 0,
90  .pmd = PMD_SECT_WB,
91  .pte = L_PTE_MT_WRITEBACK,
92  }, {
93  .policy = "writealloc",
94  .cr_mask = 0,
95  .pmd = PMD_SECT_WBWA,
96  .pte = L_PTE_MT_WRITEALLOC,
97  }
98 };
99 
100 /*
101  * These are useful for identifying cache coherency
102  * problems by allowing the cache or the cache and
103  * writebuffer to be turned off. (Note: the write
104  * buffer should not be on and the cache off).
105  */
106 static int __init early_cachepolicy(char *p)
107 {
108  int i;
109 
110  for (i = 0; i < ARRAY_SIZE(cache_policies); i++) {
111  int len = strlen(cache_policies[i].policy);
112 
113  if (memcmp(p, cache_policies[i].policy, len) == 0) {
114  cachepolicy = i;
115  cr_alignment &= ~cache_policies[i].cr_mask;
116  cr_no_alignment &= ~cache_policies[i].cr_mask;
117  break;
118  }
119  }
120  if (i == ARRAY_SIZE(cache_policies))
121  printk(KERN_ERR "ERROR: unknown or unsupported cache policy\n");
122  /*
123  * This restriction is partly to do with the way we boot; it is
124  * unpredictable to have memory mapped using two different sets of
125  * memory attributes (shared, type, and cache attribs). We can not
126  * change these attributes once the initial assembly has setup the
127  * page tables.
128  */
129  if (cpu_architecture() >= CPU_ARCH_ARMv6) {
130  printk(KERN_WARNING "Only cachepolicy=writeback supported on ARMv6 and later\n");
132  }
133  flush_cache_all();
134  set_cr(cr_alignment);
135  return 0;
136 }
137 early_param("cachepolicy", early_cachepolicy);
138 
139 static int __init early_nocache(char *__unused)
140 {
141  char *p = "buffered";
142  printk(KERN_WARNING "nocache is deprecated; use cachepolicy=%s\n", p);
143  early_cachepolicy(p);
144  return 0;
145 }
146 early_param("nocache", early_nocache);
147 
148 static int __init early_nowrite(char *__unused)
149 {
150  char *p = "uncached";
151  printk(KERN_WARNING "nowb is deprecated; use cachepolicy=%s\n", p);
152  early_cachepolicy(p);
153  return 0;
154 }
155 early_param("nowb", early_nowrite);
156 
157 #ifndef CONFIG_ARM_LPAE
158 static int __init early_ecc(char *p)
159 {
160  if (memcmp(p, "on", 2) == 0)
161  ecc_mask = PMD_PROTECTION;
162  else if (memcmp(p, "off", 3) == 0)
163  ecc_mask = 0;
164  return 0;
165 }
166 early_param("ecc", early_ecc);
167 #endif
168 
169 static int __init noalign_setup(char *__unused)
170 {
171  cr_alignment &= ~CR_A;
172  cr_no_alignment &= ~CR_A;
173  set_cr(cr_alignment);
174  return 1;
175 }
176 __setup("noalign", noalign_setup);
177 
178 #ifndef CONFIG_SMP
179 void adjust_cr(unsigned long mask, unsigned long set)
180 {
181  unsigned long flags;
182 
183  mask &= ~CR_A;
184 
185  set &= mask;
186 
187  local_irq_save(flags);
188 
189  cr_no_alignment = (cr_no_alignment & ~mask) | set;
190  cr_alignment = (cr_alignment & ~mask) | set;
191 
192  set_cr((get_cr() & ~mask) | set);
193 
194  local_irq_restore(flags);
195 }
196 #endif
197 
198 #define PROT_PTE_DEVICE L_PTE_PRESENT|L_PTE_YOUNG|L_PTE_DIRTY|L_PTE_XN
199 #define PROT_SECT_DEVICE PMD_TYPE_SECT|PMD_SECT_AP_WRITE
200 
201 static struct mem_type mem_types[] = {
202  [MT_DEVICE] = { /* Strongly ordered / ARMv6 shared device */
203  .prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_SHARED |
204  L_PTE_SHARED,
205  .prot_l1 = PMD_TYPE_TABLE,
206  .prot_sect = PROT_SECT_DEVICE | PMD_SECT_S,
207  .domain = DOMAIN_IO,
208  },
209  [MT_DEVICE_NONSHARED] = { /* ARMv6 non-shared device */
211  .prot_l1 = PMD_TYPE_TABLE,
212  .prot_sect = PROT_SECT_DEVICE,
213  .domain = DOMAIN_IO,
214  },
215  [MT_DEVICE_CACHED] = { /* ioremap_cached */
216  .prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_CACHED,
217  .prot_l1 = PMD_TYPE_TABLE,
218  .prot_sect = PROT_SECT_DEVICE | PMD_SECT_WB,
219  .domain = DOMAIN_IO,
220  },
221  [MT_DEVICE_WC] = { /* ioremap_wc */
222  .prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_WC,
223  .prot_l1 = PMD_TYPE_TABLE,
224  .prot_sect = PROT_SECT_DEVICE,
225  .domain = DOMAIN_IO,
226  },
227  [MT_UNCACHED] = {
228  .prot_pte = PROT_PTE_DEVICE,
229  .prot_l1 = PMD_TYPE_TABLE,
230  .prot_sect = PMD_TYPE_SECT | PMD_SECT_XN,
231  .domain = DOMAIN_IO,
232  },
233  [MT_CACHECLEAN] = {
234  .prot_sect = PMD_TYPE_SECT | PMD_SECT_XN,
235  .domain = DOMAIN_KERNEL,
236  },
237 #ifndef CONFIG_ARM_LPAE
238  [MT_MINICLEAN] = {
240  .domain = DOMAIN_KERNEL,
241  },
242 #endif
243  [MT_LOW_VECTORS] = {
244  .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
245  L_PTE_RDONLY,
246  .prot_l1 = PMD_TYPE_TABLE,
247  .domain = DOMAIN_USER,
248  },
249  [MT_HIGH_VECTORS] = {
250  .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
252  .prot_l1 = PMD_TYPE_TABLE,
253  .domain = DOMAIN_USER,
254  },
255  [MT_MEMORY] = {
256  .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY,
257  .prot_l1 = PMD_TYPE_TABLE,
258  .prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE,
259  .domain = DOMAIN_KERNEL,
260  },
261  [MT_ROM] = {
262  .prot_sect = PMD_TYPE_SECT,
263  .domain = DOMAIN_KERNEL,
264  },
265  [MT_MEMORY_NONCACHED] = {
266  .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
268  .prot_l1 = PMD_TYPE_TABLE,
269  .prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE,
270  .domain = DOMAIN_KERNEL,
271  },
272  [MT_MEMORY_DTCM] = {
273  .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
274  L_PTE_XN,
275  .prot_l1 = PMD_TYPE_TABLE,
276  .prot_sect = PMD_TYPE_SECT | PMD_SECT_XN,
277  .domain = DOMAIN_KERNEL,
278  },
279  [MT_MEMORY_ITCM] = {
280  .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY,
281  .prot_l1 = PMD_TYPE_TABLE,
282  .domain = DOMAIN_KERNEL,
283  },
284  [MT_MEMORY_SO] = {
285  .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
287  .prot_l1 = PMD_TYPE_TABLE,
288  .prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_SECT_S |
290  .domain = DOMAIN_KERNEL,
291  },
292  [MT_MEMORY_DMA_READY] = {
293  .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY,
294  .prot_l1 = PMD_TYPE_TABLE,
295  .domain = DOMAIN_KERNEL,
296  },
297 };
298 
299 const struct mem_type *get_mem_type(unsigned int type)
300 {
301  return type < ARRAY_SIZE(mem_types) ? &mem_types[type] : NULL;
302 }
304 
305 /*
306  * Adjust the PMD section entries according to the CPU in use.
307  */
308 static void __init build_mem_type_table(void)
309 {
310  struct cachepolicy *cp;
311  unsigned int cr = get_cr();
312  pteval_t user_pgprot, kern_pgprot, vecs_pgprot;
313  int cpu_arch = cpu_architecture();
314  int i;
315 
316  if (cpu_arch < CPU_ARCH_ARMv6) {
317 #if defined(CONFIG_CPU_DCACHE_DISABLE)
320 #elif defined(CONFIG_CPU_DCACHE_WRITETHROUGH)
323 #endif
324  }
325  if (cpu_arch < CPU_ARCH_ARMv5) {
328  ecc_mask = 0;
329  }
330  if (is_smp())
332 
333  /*
334  * Strip out features not present on earlier architectures.
335  * Pre-ARMv5 CPUs don't have TEX bits. Pre-ARMv6 CPUs or those
336  * without extended page tables don't have the 'Shared' bit.
337  */
338  if (cpu_arch < CPU_ARCH_ARMv5)
339  for (i = 0; i < ARRAY_SIZE(mem_types); i++)
340  mem_types[i].prot_sect &= ~PMD_SECT_TEX(7);
341  if ((cpu_arch < CPU_ARCH_ARMv6 || !(cr & CR_XP)) && !cpu_is_xsc3())
342  for (i = 0; i < ARRAY_SIZE(mem_types); i++)
343  mem_types[i].prot_sect &= ~PMD_SECT_S;
344 
345  /*
346  * ARMv5 and lower, bit 4 must be set for page tables (was: cache
347  * "update-able on write" bit on ARM610). However, Xscale and
348  * Xscale3 require this bit to be cleared.
349  */
350  if (cpu_is_xscale() || cpu_is_xsc3()) {
351  for (i = 0; i < ARRAY_SIZE(mem_types); i++) {
352  mem_types[i].prot_sect &= ~PMD_BIT4;
353  mem_types[i].prot_l1 &= ~PMD_BIT4;
354  }
355  } else if (cpu_arch < CPU_ARCH_ARMv6) {
356  for (i = 0; i < ARRAY_SIZE(mem_types); i++) {
357  if (mem_types[i].prot_l1)
358  mem_types[i].prot_l1 |= PMD_BIT4;
359  if (mem_types[i].prot_sect)
360  mem_types[i].prot_sect |= PMD_BIT4;
361  }
362  }
363 
364  /*
365  * Mark the device areas according to the CPU/architecture.
366  */
367  if (cpu_is_xsc3() || (cpu_arch >= CPU_ARCH_ARMv6 && (cr & CR_XP))) {
368  if (!cpu_is_xsc3()) {
369  /*
370  * Mark device regions on ARMv6+ as execute-never
371  * to prevent speculative instruction fetches.
372  */
373  mem_types[MT_DEVICE].prot_sect |= PMD_SECT_XN;
374  mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_XN;
375  mem_types[MT_DEVICE_CACHED].prot_sect |= PMD_SECT_XN;
376  mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_XN;
377  }
378  if (cpu_arch >= CPU_ARCH_ARMv7 && (cr & CR_TRE)) {
379  /*
380  * For ARMv7 with TEX remapping,
381  * - shared device is SXCB=1100
382  * - nonshared device is SXCB=0100
383  * - write combine device mem is SXCB=0001
384  * (Uncached Normal memory)
385  */
386  mem_types[MT_DEVICE].prot_sect |= PMD_SECT_TEX(1);
387  mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_TEX(1);
388  mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_BUFFERABLE;
389  } else if (cpu_is_xsc3()) {
390  /*
391  * For Xscale3,
392  * - shared device is TEXCB=00101
393  * - nonshared device is TEXCB=01000
394  * - write combine device mem is TEXCB=00100
395  * (Inner/Outer Uncacheable in xsc3 parlance)
396  */
398  mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_TEX(2);
399  mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_TEX(1);
400  } else {
401  /*
402  * For ARMv6 and ARMv7 without TEX remapping,
403  * - shared device is TEXCB=00001
404  * - nonshared device is TEXCB=01000
405  * - write combine device mem is TEXCB=00100
406  * (Uncached Normal in ARMv6 parlance).
407  */
408  mem_types[MT_DEVICE].prot_sect |= PMD_SECT_BUFFERED;
409  mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_TEX(2);
410  mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_TEX(1);
411  }
412  } else {
413  /*
414  * On others, write combining is "Uncached/Buffered"
415  */
416  mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_BUFFERABLE;
417  }
418 
419  /*
420  * Now deal with the memory-type mappings
421  */
422  cp = &cache_policies[cachepolicy];
423  vecs_pgprot = kern_pgprot = user_pgprot = cp->pte;
424 
425  /*
426  * ARMv6 and above have extended page tables.
427  */
428  if (cpu_arch >= CPU_ARCH_ARMv6 && (cr & CR_XP)) {
429 #ifndef CONFIG_ARM_LPAE
430  /*
431  * Mark cache clean areas and XIP ROM read only
432  * from SVC mode and no access from userspace.
433  */
437 #endif
438 
439  if (is_smp()) {
440  /*
441  * Mark memory with the "shared" attribute
442  * for SMP systems
443  */
444  user_pgprot |= L_PTE_SHARED;
445  kern_pgprot |= L_PTE_SHARED;
446  vecs_pgprot |= L_PTE_SHARED;
447  mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_S;
448  mem_types[MT_DEVICE_WC].prot_pte |= L_PTE_SHARED;
449  mem_types[MT_DEVICE_CACHED].prot_sect |= PMD_SECT_S;
450  mem_types[MT_DEVICE_CACHED].prot_pte |= L_PTE_SHARED;
451  mem_types[MT_MEMORY].prot_sect |= PMD_SECT_S;
452  mem_types[MT_MEMORY].prot_pte |= L_PTE_SHARED;
456  }
457  }
458 
459  /*
460  * Non-cacheable Normal - intended for memory areas that must
461  * not cause dirty cache line writebacks when used
462  */
463  if (cpu_arch >= CPU_ARCH_ARMv6) {
464  if (cpu_arch >= CPU_ARCH_ARMv7 && (cr & CR_TRE)) {
465  /* Non-cacheable Normal is XCB = 001 */
466  mem_types[MT_MEMORY_NONCACHED].prot_sect |=
468  } else {
469  /* For both ARMv6 and non-TEX-remapping ARMv7 */
470  mem_types[MT_MEMORY_NONCACHED].prot_sect |=
471  PMD_SECT_TEX(1);
472  }
473  } else {
475  }
476 
477 #ifdef CONFIG_ARM_LPAE
478  /*
479  * Do not generate access flag faults for the kernel mappings.
480  */
481  for (i = 0; i < ARRAY_SIZE(mem_types); i++) {
482  mem_types[i].prot_pte |= PTE_EXT_AF;
483  if (mem_types[i].prot_sect)
484  mem_types[i].prot_sect |= PMD_SECT_AF;
485  }
486  kern_pgprot |= PTE_EXT_AF;
487  vecs_pgprot |= PTE_EXT_AF;
488 #endif
489 
490  for (i = 0; i < 16; i++) {
491  unsigned long v = pgprot_val(protection_map[i]);
492  protection_map[i] = __pgprot(v | user_pgprot);
493  }
494 
495  mem_types[MT_LOW_VECTORS].prot_pte |= vecs_pgprot;
496  mem_types[MT_HIGH_VECTORS].prot_pte |= vecs_pgprot;
497 
498  pgprot_user = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | user_pgprot);
499  pgprot_kernel = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG |
500  L_PTE_DIRTY | kern_pgprot);
501 
502  mem_types[MT_LOW_VECTORS].prot_l1 |= ecc_mask;
503  mem_types[MT_HIGH_VECTORS].prot_l1 |= ecc_mask;
504  mem_types[MT_MEMORY].prot_sect |= ecc_mask | cp->pmd;
505  mem_types[MT_MEMORY].prot_pte |= kern_pgprot;
506  mem_types[MT_MEMORY_DMA_READY].prot_pte |= kern_pgprot;
507  mem_types[MT_MEMORY_NONCACHED].prot_sect |= ecc_mask;
508  mem_types[MT_ROM].prot_sect |= cp->pmd;
509 
510  switch (cp->pmd) {
511  case PMD_SECT_WT:
512  mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_WT;
513  break;
514  case PMD_SECT_WB:
515  case PMD_SECT_WBWA:
516  mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_WB;
517  break;
518  }
519  printk("Memory policy: ECC %sabled, Data cache %s\n",
520  ecc_mask ? "en" : "dis", cp->policy);
521 
522  for (i = 0; i < ARRAY_SIZE(mem_types); i++) {
523  struct mem_type *t = &mem_types[i];
524  if (t->prot_l1)
525  t->prot_l1 |= PMD_DOMAIN(t->domain);
526  if (t->prot_sect)
527  t->prot_sect |= PMD_DOMAIN(t->domain);
528  }
529 }
530 
531 #ifdef CONFIG_ARM_DMA_MEM_BUFFERABLE
532 pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
533  unsigned long size, pgprot_t vma_prot)
534 {
535  if (!pfn_valid(pfn))
536  return pgprot_noncached(vma_prot);
537  else if (file->f_flags & O_SYNC)
538  return pgprot_writecombine(vma_prot);
539  return vma_prot;
540 }
542 #endif
543 
544 #define vectors_base() (vectors_high() ? 0xffff0000 : 0)
545 
546 static void __init *early_alloc_aligned(unsigned long sz, unsigned long align)
547 {
548  void *ptr = __va(memblock_alloc(sz, align));
549  memset(ptr, 0, sz);
550  return ptr;
551 }
552 
553 static void __init *early_alloc(unsigned long sz)
554 {
555  return early_alloc_aligned(sz, sz);
556 }
557 
558 static pte_t * __init early_pte_alloc(pmd_t *pmd, unsigned long addr, unsigned long prot)
559 {
560  if (pmd_none(*pmd)) {
561  pte_t *pte = early_alloc(PTE_HWTABLE_OFF + PTE_HWTABLE_SIZE);
562  __pmd_populate(pmd, __pa(pte), prot);
563  }
564  BUG_ON(pmd_bad(*pmd));
565  return pte_offset_kernel(pmd, addr);
566 }
567 
568 static void __init alloc_init_pte(pmd_t *pmd, unsigned long addr,
569  unsigned long end, unsigned long pfn,
570  const struct mem_type *type)
571 {
572  pte_t *pte = early_pte_alloc(pmd, addr, type->prot_l1);
573  do {
574  set_pte_ext(pte, pfn_pte(pfn, __pgprot(type->prot_pte)), 0);
575  pfn++;
576  } while (pte++, addr += PAGE_SIZE, addr != end);
577 }
578 
579 static void __init alloc_init_section(pud_t *pud, unsigned long addr,
580  unsigned long end, phys_addr_t phys,
581  const struct mem_type *type)
582 {
583  pmd_t *pmd = pmd_offset(pud, addr);
584 
585  /*
586  * Try a section mapping - end, addr and phys must all be aligned
587  * to a section boundary. Note that PMDs refer to the individual
588  * L1 entries, whereas PGDs refer to a group of L1 entries making
589  * up one logical pointer to an L2 table.
590  */
591  if (type->prot_sect && ((addr | end | phys) & ~SECTION_MASK) == 0) {
592  pmd_t *p = pmd;
593 
594 #ifndef CONFIG_ARM_LPAE
595  if (addr & SECTION_SIZE)
596  pmd++;
597 #endif
598 
599  do {
600  *pmd = __pmd(phys | type->prot_sect);
601  phys += SECTION_SIZE;
602  } while (pmd++, addr += SECTION_SIZE, addr != end);
603 
604  flush_pmd_entry(p);
605  } else {
606  /*
607  * No need to loop; pte's aren't interested in the
608  * individual L1 entries.
609  */
610  alloc_init_pte(pmd, addr, end, __phys_to_pfn(phys), type);
611  }
612 }
613 
614 static void __init alloc_init_pud(pgd_t *pgd, unsigned long addr,
615  unsigned long end, unsigned long phys, const struct mem_type *type)
616 {
617  pud_t *pud = pud_offset(pgd, addr);
618  unsigned long next;
619 
620  do {
621  next = pud_addr_end(addr, end);
622  alloc_init_section(pud, addr, next, phys, type);
623  phys += next - addr;
624  } while (pud++, addr = next, addr != end);
625 }
626 
627 #ifndef CONFIG_ARM_LPAE
628 static void __init create_36bit_mapping(struct map_desc *md,
629  const struct mem_type *type)
630 {
631  unsigned long addr, length, end;
633  pgd_t *pgd;
634 
635  addr = md->virtual;
636  phys = __pfn_to_phys(md->pfn);
637  length = PAGE_ALIGN(md->length);
638 
639  if (!(cpu_architecture() >= CPU_ARCH_ARMv6 || cpu_is_xsc3())) {
640  printk(KERN_ERR "MM: CPU does not support supersection "
641  "mapping for 0x%08llx at 0x%08lx\n",
642  (long long)__pfn_to_phys((u64)md->pfn), addr);
643  return;
644  }
645 
646  /* N.B. ARMv6 supersections are only defined to work with domain 0.
647  * Since domain assignments can in fact be arbitrary, the
648  * 'domain == 0' check below is required to insure that ARMv6
649  * supersections are only allocated for domain 0 regardless
650  * of the actual domain assignments in use.
651  */
652  if (type->domain) {
653  printk(KERN_ERR "MM: invalid domain in supersection "
654  "mapping for 0x%08llx at 0x%08lx\n",
655  (long long)__pfn_to_phys((u64)md->pfn), addr);
656  return;
657  }
658 
659  if ((addr | length | __pfn_to_phys(md->pfn)) & ~SUPERSECTION_MASK) {
660  printk(KERN_ERR "MM: cannot create mapping for 0x%08llx"
661  " at 0x%08lx invalid alignment\n",
662  (long long)__pfn_to_phys((u64)md->pfn), addr);
663  return;
664  }
665 
666  /*
667  * Shift bits [35:32] of address into bits [23:20] of PMD
668  * (See ARMv6 spec).
669  */
670  phys |= (((md->pfn >> (32 - PAGE_SHIFT)) & 0xF) << 20);
671 
672  pgd = pgd_offset_k(addr);
673  end = addr + length;
674  do {
675  pud_t *pud = pud_offset(pgd, addr);
676  pmd_t *pmd = pmd_offset(pud, addr);
677  int i;
678 
679  for (i = 0; i < 16; i++)
680  *pmd++ = __pmd(phys | type->prot_sect | PMD_SECT_SUPER);
681 
682  addr += SUPERSECTION_SIZE;
683  phys += SUPERSECTION_SIZE;
684  pgd += SUPERSECTION_SIZE >> PGDIR_SHIFT;
685  } while (addr != end);
686 }
687 #endif /* !CONFIG_ARM_LPAE */
688 
689 /*
690  * Create the page directory entries and any necessary
691  * page tables for the mapping specified by `md'. We
692  * are able to cope here with varying sizes and address
693  * offsets, and we take full advantage of sections and
694  * supersections.
695  */
696 static void __init create_mapping(struct map_desc *md)
697 {
698  unsigned long addr, length, end;
700  const struct mem_type *type;
701  pgd_t *pgd;
702 
703  if (md->virtual != vectors_base() && md->virtual < TASK_SIZE) {
704  printk(KERN_WARNING "BUG: not creating mapping for 0x%08llx"
705  " at 0x%08lx in user region\n",
706  (long long)__pfn_to_phys((u64)md->pfn), md->virtual);
707  return;
708  }
709 
710  if ((md->type == MT_DEVICE || md->type == MT_ROM) &&
711  md->virtual >= PAGE_OFFSET &&
712  (md->virtual < VMALLOC_START || md->virtual >= VMALLOC_END)) {
713  printk(KERN_WARNING "BUG: mapping for 0x%08llx"
714  " at 0x%08lx out of vmalloc space\n",
715  (long long)__pfn_to_phys((u64)md->pfn), md->virtual);
716  }
717 
718  type = &mem_types[md->type];
719 
720 #ifndef CONFIG_ARM_LPAE
721  /*
722  * Catch 36-bit addresses
723  */
724  if (md->pfn >= 0x100000) {
725  create_36bit_mapping(md, type);
726  return;
727  }
728 #endif
729 
730  addr = md->virtual & PAGE_MASK;
731  phys = __pfn_to_phys(md->pfn);
732  length = PAGE_ALIGN(md->length + (md->virtual & ~PAGE_MASK));
733 
734  if (type->prot_l1 == 0 && ((addr | phys | length) & ~SECTION_MASK)) {
735  printk(KERN_WARNING "BUG: map for 0x%08llx at 0x%08lx can not "
736  "be mapped using pages, ignoring.\n",
737  (long long)__pfn_to_phys(md->pfn), addr);
738  return;
739  }
740 
741  pgd = pgd_offset_k(addr);
742  end = addr + length;
743  do {
744  unsigned long next = pgd_addr_end(addr, end);
745 
746  alloc_init_pud(pgd, addr, next, phys, type);
747 
748  phys += next - addr;
749  addr = next;
750  } while (pgd++, addr != end);
751 }
752 
753 /*
754  * Create the architecture specific mappings
755  */
756 void __init iotable_init(struct map_desc *io_desc, int nr)
757 {
758  struct map_desc *md;
759  struct vm_struct *vm;
760 
761  if (!nr)
762  return;
763 
764  vm = early_alloc_aligned(sizeof(*vm) * nr, __alignof__(*vm));
765 
766  for (md = io_desc; nr; md++, nr--) {
767  create_mapping(md);
768  vm->addr = (void *)(md->virtual & PAGE_MASK);
769  vm->size = PAGE_ALIGN(md->length + (md->virtual & ~PAGE_MASK));
770  vm->phys_addr = __pfn_to_phys(md->pfn);
771  vm->flags = VM_IOREMAP | VM_ARM_STATIC_MAPPING;
772  vm->flags |= VM_ARM_MTYPE(md->type);
773  vm->caller = iotable_init;
774  vm_area_add_early(vm++);
775  }
776 }
777 
778 void __init vm_reserve_area_early(unsigned long addr, unsigned long size,
779  void *caller)
780 {
781  struct vm_struct *vm;
782 
783  vm = early_alloc_aligned(sizeof(*vm), __alignof__(*vm));
784  vm->addr = (void *)addr;
785  vm->size = size;
786  vm->flags = VM_IOREMAP | VM_ARM_EMPTY_MAPPING;
787  vm->caller = caller;
788  vm_area_add_early(vm);
789 }
790 
791 #ifndef CONFIG_ARM_LPAE
792 
793 /*
794  * The Linux PMD is made of two consecutive section entries covering 2MB
795  * (see definition in include/asm/pgtable-2level.h). However a call to
796  * create_mapping() may optimize static mappings by using individual
797  * 1MB section mappings. This leaves the actual PMD potentially half
798  * initialized if the top or bottom section entry isn't used, leaving it
799  * open to problems if a subsequent ioremap() or vmalloc() tries to use
800  * the virtual space left free by that unused section entry.
801  *
802  * Let's avoid the issue by inserting dummy vm entries covering the unused
803  * PMD halves once the static mappings are in place.
804  */
805 
806 static void __init pmd_empty_section_gap(unsigned long addr)
807 {
808  vm_reserve_area_early(addr, SECTION_SIZE, pmd_empty_section_gap);
809 }
810 
811 static void __init fill_pmd_gaps(void)
812 {
813  struct vm_struct *vm;
814  unsigned long addr, next = 0;
815  pmd_t *pmd;
816 
817  /* we're still single threaded hence no lock needed here */
818  for (vm = vmlist; vm; vm = vm->next) {
819  if (!(vm->flags & (VM_ARM_STATIC_MAPPING | VM_ARM_EMPTY_MAPPING)))
820  continue;
821  addr = (unsigned long)vm->addr;
822  if (addr < next)
823  continue;
824 
825  /*
826  * Check if this vm starts on an odd section boundary.
827  * If so and the first section entry for this PMD is free
828  * then we block the corresponding virtual address.
829  */
830  if ((addr & ~PMD_MASK) == SECTION_SIZE) {
831  pmd = pmd_off_k(addr);
832  if (pmd_none(*pmd))
833  pmd_empty_section_gap(addr & PMD_MASK);
834  }
835 
836  /*
837  * Then check if this vm ends on an odd section boundary.
838  * If so and the second section entry for this PMD is empty
839  * then we block the corresponding virtual address.
840  */
841  addr += vm->size;
842  if ((addr & ~PMD_MASK) == SECTION_SIZE) {
843  pmd = pmd_off_k(addr) + 1;
844  if (pmd_none(*pmd))
845  pmd_empty_section_gap(addr);
846  }
847 
848  /* no need to look at any vm entry until we hit the next PMD */
849  next = (addr + PMD_SIZE - 1) & PMD_MASK;
850  }
851 }
852 
853 #else
854 #define fill_pmd_gaps() do { } while (0)
855 #endif
856 
857 #if defined(CONFIG_PCI) && !defined(CONFIG_NEED_MACH_IO_H)
858 static void __init pci_reserve_io(void)
859 {
860  struct vm_struct *vm;
861  unsigned long addr;
862 
863  /* we're still single threaded hence no lock needed here */
864  for (vm = vmlist; vm; vm = vm->next) {
865  if (!(vm->flags & VM_ARM_STATIC_MAPPING))
866  continue;
867  addr = (unsigned long)vm->addr;
868  addr &= ~(SZ_2M - 1);
869  if (addr == PCI_IO_VIRT_BASE)
870  return;
871 
872  }
873  vm_reserve_area_early(PCI_IO_VIRT_BASE, SZ_2M, pci_reserve_io);
874 }
875 #else
876 #define pci_reserve_io() do { } while (0)
877 #endif
878 
879 static void * __initdata vmalloc_min =
880  (void *)(VMALLOC_END - (240 << 20) - VMALLOC_OFFSET);
881 
882 /*
883  * vmalloc=size forces the vmalloc area to be exactly 'size'
884  * bytes. This can be used to increase (or decrease) the vmalloc
885  * area - the default is 240m.
886  */
887 static int __init early_vmalloc(char *arg)
888 {
889  unsigned long vmalloc_reserve = memparse(arg, NULL);
890 
891  if (vmalloc_reserve < SZ_16M) {
892  vmalloc_reserve = SZ_16M;
894  "vmalloc area too small, limiting to %luMB\n",
895  vmalloc_reserve >> 20);
896  }
897 
898  if (vmalloc_reserve > VMALLOC_END - (PAGE_OFFSET + SZ_32M)) {
899  vmalloc_reserve = VMALLOC_END - (PAGE_OFFSET + SZ_32M);
901  "vmalloc area is too big, limiting to %luMB\n",
902  vmalloc_reserve >> 20);
903  }
904 
905  vmalloc_min = (void *)(VMALLOC_END - vmalloc_reserve);
906  return 0;
907 }
908 early_param("vmalloc", early_vmalloc);
909 
910 phys_addr_t arm_lowmem_limit __initdata = 0;
911 
913 {
914  int i, j, highmem = 0;
915 
916  for (i = 0, j = 0; i < meminfo.nr_banks; i++) {
917  struct membank *bank = &meminfo.bank[j];
918  *bank = meminfo.bank[i];
919 
920  if (bank->start > ULONG_MAX)
921  highmem = 1;
922 
923 #ifdef CONFIG_HIGHMEM
924  if (__va(bank->start) >= vmalloc_min ||
925  __va(bank->start) < (void *)PAGE_OFFSET)
926  highmem = 1;
927 
928  bank->highmem = highmem;
929 
930  /*
931  * Split those memory banks which are partially overlapping
932  * the vmalloc area greatly simplifying things later.
933  */
934  if (!highmem && __va(bank->start) < vmalloc_min &&
935  bank->size > vmalloc_min - __va(bank->start)) {
936  if (meminfo.nr_banks >= NR_BANKS) {
937  printk(KERN_CRIT "NR_BANKS too low, "
938  "ignoring high memory\n");
939  } else {
940  memmove(bank + 1, bank,
941  (meminfo.nr_banks - i) * sizeof(*bank));
942  meminfo.nr_banks++;
943  i++;
944  bank[1].size -= vmalloc_min - __va(bank->start);
945  bank[1].start = __pa(vmalloc_min - 1) + 1;
946  bank[1].highmem = highmem = 1;
947  j++;
948  }
949  bank->size = vmalloc_min - __va(bank->start);
950  }
951 #else
952  bank->highmem = highmem;
953 
954  /*
955  * Highmem banks not allowed with !CONFIG_HIGHMEM.
956  */
957  if (highmem) {
958  printk(KERN_NOTICE "Ignoring RAM at %.8llx-%.8llx "
959  "(!CONFIG_HIGHMEM).\n",
960  (unsigned long long)bank->start,
961  (unsigned long long)bank->start + bank->size - 1);
962  continue;
963  }
964 
965  /*
966  * Check whether this memory bank would entirely overlap
967  * the vmalloc area.
968  */
969  if (__va(bank->start) >= vmalloc_min ||
970  __va(bank->start) < (void *)PAGE_OFFSET) {
971  printk(KERN_NOTICE "Ignoring RAM at %.8llx-%.8llx "
972  "(vmalloc region overlap).\n",
973  (unsigned long long)bank->start,
974  (unsigned long long)bank->start + bank->size - 1);
975  continue;
976  }
977 
978  /*
979  * Check whether this memory bank would partially overlap
980  * the vmalloc area.
981  */
982  if (__va(bank->start + bank->size - 1) >= vmalloc_min ||
983  __va(bank->start + bank->size - 1) <= __va(bank->start)) {
984  unsigned long newsize = vmalloc_min - __va(bank->start);
985  printk(KERN_NOTICE "Truncating RAM at %.8llx-%.8llx "
986  "to -%.8llx (vmalloc region overlap).\n",
987  (unsigned long long)bank->start,
988  (unsigned long long)bank->start + bank->size - 1,
989  (unsigned long long)bank->start + newsize - 1);
990  bank->size = newsize;
991  }
992 #endif
993  if (!bank->highmem && bank->start + bank->size > arm_lowmem_limit)
994  arm_lowmem_limit = bank->start + bank->size;
995 
996  j++;
997  }
998 #ifdef CONFIG_HIGHMEM
999  if (highmem) {
1000  const char *reason = NULL;
1001 
1002  if (cache_is_vipt_aliasing()) {
1003  /*
1004  * Interactions between kmap and other mappings
1005  * make highmem support with aliasing VIPT caches
1006  * rather difficult.
1007  */
1008  reason = "with VIPT aliasing cache";
1009  }
1010  if (reason) {
1011  printk(KERN_CRIT "HIGHMEM is not supported %s, ignoring high memory\n",
1012  reason);
1013  while (j > 0 && meminfo.bank[j - 1].highmem)
1014  j--;
1015  }
1016  }
1017 #endif
1018  meminfo.nr_banks = j;
1019  high_memory = __va(arm_lowmem_limit - 1) + 1;
1021 }
1022 
1023 static inline void prepare_page_table(void)
1024 {
1025  unsigned long addr;
1026  phys_addr_t end;
1027 
1028  /*
1029  * Clear out all the mappings below the kernel image.
1030  */
1031  for (addr = 0; addr < MODULES_VADDR; addr += PMD_SIZE)
1032  pmd_clear(pmd_off_k(addr));
1033 
1034 #ifdef CONFIG_XIP_KERNEL
1035  /* The XIP kernel is mapped in the module area -- skip over it */
1036  addr = ((unsigned long)_etext + PMD_SIZE - 1) & PMD_MASK;
1037 #endif
1038  for ( ; addr < PAGE_OFFSET; addr += PMD_SIZE)
1039  pmd_clear(pmd_off_k(addr));
1040 
1041  /*
1042  * Find the end of the first block of lowmem.
1043  */
1044  end = memblock.memory.regions[0].base + memblock.memory.regions[0].size;
1045  if (end >= arm_lowmem_limit)
1046  end = arm_lowmem_limit;
1047 
1048  /*
1049  * Clear out all the kernel space mappings, except for the first
1050  * memory bank, up to the vmalloc region.
1051  */
1052  for (addr = __phys_to_virt(end);
1053  addr < VMALLOC_START; addr += PMD_SIZE)
1054  pmd_clear(pmd_off_k(addr));
1055 }
1056 
1057 #ifdef CONFIG_ARM_LPAE
1058 /* the first page is reserved for pgd */
1059 #define SWAPPER_PG_DIR_SIZE (PAGE_SIZE + \
1060  PTRS_PER_PGD * PTRS_PER_PMD * sizeof(pmd_t))
1061 #else
1062 #define SWAPPER_PG_DIR_SIZE (PTRS_PER_PGD * sizeof(pgd_t))
1063 #endif
1064 
1065 /*
1066  * Reserve the special regions of memory
1067  */
1069 {
1070  /*
1071  * Reserve the page tables. These are already in use,
1072  * and can only be in node 0.
1073  */
1075 
1076 #ifdef CONFIG_SA1111
1077  /*
1078  * Because of the SA1111 DMA bug, we want to preserve our
1079  * precious DMA-able memory...
1080  */
1082 #endif
1083 }
1084 
1085 /*
1086  * Set up the device mappings. Since we clear out the page tables for all
1087  * mappings above VMALLOC_START, we will remove any debug device mappings.
1088  * This means you have to be careful how you debug this function, or any
1089  * called function. This means you can't use any function or debugging
1090  * method which may touch any device, otherwise the kernel _will_ crash.
1091  */
1092 static void __init devicemaps_init(struct machine_desc *mdesc)
1093 {
1094  struct map_desc map;
1095  unsigned long addr;
1096  void *vectors;
1097 
1098  /*
1099  * Allocate the vector page early.
1100  */
1101  vectors = early_alloc(PAGE_SIZE);
1102 
1103  early_trap_init(vectors);
1104 
1105  for (addr = VMALLOC_START; addr; addr += PMD_SIZE)
1106  pmd_clear(pmd_off_k(addr));
1107 
1108  /*
1109  * Map the kernel if it is XIP.
1110  * It is always first in the modulearea.
1111  */
1112 #ifdef CONFIG_XIP_KERNEL
1113  map.pfn = __phys_to_pfn(CONFIG_XIP_PHYS_ADDR & SECTION_MASK);
1114  map.virtual = MODULES_VADDR;
1115  map.length = ((unsigned long)_etext - map.virtual + ~SECTION_MASK) & SECTION_MASK;
1116  map.type = MT_ROM;
1117  create_mapping(&map);
1118 #endif
1119 
1120  /*
1121  * Map the cache flushing regions.
1122  */
1123 #ifdef FLUSH_BASE
1125  map.virtual = FLUSH_BASE;
1126  map.length = SZ_1M;
1128  create_mapping(&map);
1129 #endif
1130 #ifdef FLUSH_BASE_MINICACHE
1132  map.virtual = FLUSH_BASE_MINICACHE;
1133  map.length = SZ_1M;
1134  map.type = MT_MINICLEAN;
1135  create_mapping(&map);
1136 #endif
1137 
1138  /*
1139  * Create a mapping for the machine vectors at the high-vectors
1140  * location (0xffff0000). If we aren't using high-vectors, also
1141  * create a mapping at the low-vectors virtual address.
1142  */
1143  map.pfn = __phys_to_pfn(virt_to_phys(vectors));
1144  map.virtual = 0xffff0000;
1145  map.length = PAGE_SIZE;
1147  create_mapping(&map);
1148 
1149  if (!vectors_high()) {
1150  map.virtual = 0;
1152  create_mapping(&map);
1153  }
1154 
1155  /*
1156  * Ask the machine support to map in the statically mapped devices.
1157  */
1158  if (mdesc->map_io)
1159  mdesc->map_io();
1160  fill_pmd_gaps();
1161 
1162  /* Reserve fixed i/o space in VMALLOC region */
1163  pci_reserve_io();
1164 
1165  /*
1166  * Finally flush the caches and tlb to ensure that we're in a
1167  * consistent state wrt the writebuffer. This also ensures that
1168  * any write-allocated cache lines in the vector page are written
1169  * back. After this point, we can start to touch devices again.
1170  */
1172  flush_cache_all();
1173 }
1174 
1175 static void __init kmap_init(void)
1176 {
1177 #ifdef CONFIG_HIGHMEM
1178  pkmap_page_table = early_pte_alloc(pmd_off_k(PKMAP_BASE),
1180 #endif
1181 }
1182 
1183 static void __init map_lowmem(void)
1184 {
1185  struct memblock_region *reg;
1186 
1187  /* Map all the lowmem memory banks. */
1188  for_each_memblock(memory, reg) {
1189  phys_addr_t start = reg->base;
1190  phys_addr_t end = start + reg->size;
1191  struct map_desc map;
1192 
1193  if (end > arm_lowmem_limit)
1194  end = arm_lowmem_limit;
1195  if (start >= end)
1196  break;
1197 
1198  map.pfn = __phys_to_pfn(start);
1199  map.virtual = __phys_to_virt(start);
1200  map.length = end - start;
1201  map.type = MT_MEMORY;
1202 
1203  create_mapping(&map);
1204  }
1205 }
1206 
1207 /*
1208  * paging_init() sets up the page tables, initialises the zone memory
1209  * maps, and sets up the zero page, bad page and bad page tables.
1210  */
1211 void __init paging_init(struct machine_desc *mdesc)
1212 {
1213  void *zero_page;
1214 
1216 
1217  build_mem_type_table();
1218  prepare_page_table();
1219  map_lowmem();
1221  devicemaps_init(mdesc);
1222  kmap_init();
1223 
1224  top_pmd = pmd_off_k(0xffff0000);
1225 
1226  /* allocate the zero page. */
1227  zero_page = early_alloc(PAGE_SIZE);
1228 
1229  bootmem_init();
1230 
1231  empty_zero_page = virt_to_page(zero_page);
1232  __flush_dcache_page(NULL, empty_zero_page);
1233 }