Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
hugetlbpage.c
Go to the documentation of this file.
1 /*
2  * Copyright 2010 Tilera Corporation. All Rights Reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License
6  * as published by the Free Software Foundation, version 2.
7  *
8  * This program is distributed in the hope that it will be useful, but
9  * WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
11  * NON INFRINGEMENT. See the GNU General Public License for
12  * more details.
13  *
14  * TILE Huge TLB Page Support for Kernel.
15  * Taken from i386 hugetlb implementation:
16  * Copyright (C) 2002, Rohit Seth <[email protected]>
17  */
18 
19 #include <linux/init.h>
20 #include <linux/fs.h>
21 #include <linux/mm.h>
22 #include <linux/hugetlb.h>
23 #include <linux/pagemap.h>
24 #include <linux/slab.h>
25 #include <linux/err.h>
26 #include <linux/sysctl.h>
27 #include <linux/mman.h>
28 #include <asm/tlb.h>
29 #include <asm/tlbflush.h>
30 #include <asm/setup.h>
31 
32 #ifdef CONFIG_HUGETLB_SUPER_PAGES
33 
34 /*
35  * Provide an additional huge page size (in addition to the regular default
36  * huge page size) if no "hugepagesz" arguments are specified.
37  * Note that it must be smaller than the default huge page size so
38  * that it's possible to allocate them on demand from the buddy allocator.
39  * You can change this to 64K (on a 16K build), 256K, 1M, or 4M,
40  * or not define it at all.
41  */
42 #define ADDITIONAL_HUGE_SIZE (1024 * 1024UL)
43 
44 /* "Extra" page-size multipliers, one per level of the page table. */
45 int huge_shift[HUGE_SHIFT_ENTRIES] = {
46 #ifdef ADDITIONAL_HUGE_SIZE
47 #define ADDITIONAL_HUGE_SHIFT __builtin_ctzl(ADDITIONAL_HUGE_SIZE / PAGE_SIZE)
48  [HUGE_SHIFT_PAGE] = ADDITIONAL_HUGE_SHIFT
49 #endif
50 };
51 
52 /*
53  * This routine is a hybrid of pte_alloc_map() and pte_alloc_kernel().
54  * It assumes that L2 PTEs are never in HIGHMEM (we don't support that).
55  * It locks the user pagetable, and bumps up the mm->nr_ptes field,
56  * but otherwise allocate the page table using the kernel versions.
57  */
58 static pte_t *pte_alloc_hugetlb(struct mm_struct *mm, pmd_t *pmd,
59  unsigned long address)
60 {
61  pte_t *new;
62 
63  if (pmd_none(*pmd)) {
64  new = pte_alloc_one_kernel(mm, address);
65  if (!new)
66  return NULL;
67 
68  smp_wmb(); /* See comment in __pte_alloc */
69 
70  spin_lock(&mm->page_table_lock);
71  if (likely(pmd_none(*pmd))) { /* Has another populated it ? */
72  mm->nr_ptes++;
73  pmd_populate_kernel(mm, pmd, new);
74  new = NULL;
75  } else
76  VM_BUG_ON(pmd_trans_splitting(*pmd));
77  spin_unlock(&mm->page_table_lock);
78  if (new)
79  pte_free_kernel(mm, new);
80  }
81 
82  return pte_offset_kernel(pmd, address);
83 }
84 #endif
85 
87  unsigned long addr, unsigned long sz)
88 {
89  pgd_t *pgd;
90  pud_t *pud;
91 
92  addr &= -sz; /* Mask off any low bits in the address. */
93 
94  pgd = pgd_offset(mm, addr);
95  pud = pud_alloc(mm, pgd, addr);
96 
97 #ifdef CONFIG_HUGETLB_SUPER_PAGES
98  if (sz >= PGDIR_SIZE) {
99  BUG_ON(sz != PGDIR_SIZE &&
100  sz != PGDIR_SIZE << huge_shift[HUGE_SHIFT_PGDIR]);
101  return (pte_t *)pud;
102  } else {
103  pmd_t *pmd = pmd_alloc(mm, pud, addr);
104  if (sz >= PMD_SIZE) {
105  BUG_ON(sz != PMD_SIZE &&
106  sz != (PMD_SIZE << huge_shift[HUGE_SHIFT_PMD]));
107  return (pte_t *)pmd;
108  }
109  else {
110  if (sz != PAGE_SIZE << huge_shift[HUGE_SHIFT_PAGE])
111  panic("Unexpected page size %#lx\n", sz);
112  return pte_alloc_hugetlb(mm, pmd, addr);
113  }
114  }
115 #else
116  BUG_ON(sz != PMD_SIZE);
117  return (pte_t *) pmd_alloc(mm, pud, addr);
118 #endif
119 }
120 
121 static pte_t *get_pte(pte_t *base, int index, int level)
122 {
123  pte_t *ptep = base + index;
124 #ifdef CONFIG_HUGETLB_SUPER_PAGES
125  if (!pte_present(*ptep) && huge_shift[level] != 0) {
126  unsigned long mask = -1UL << huge_shift[level];
127  pte_t *super_ptep = base + (index & mask);
128  pte_t pte = *super_ptep;
129  if (pte_present(pte) && pte_super(pte))
130  ptep = super_ptep;
131  }
132 #endif
133  return ptep;
134 }
135 
136 pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
137 {
138  pgd_t *pgd;
139  pud_t *pud;
140  pmd_t *pmd;
141 #ifdef CONFIG_HUGETLB_SUPER_PAGES
142  pte_t *pte;
143 #endif
144 
145  /* Get the top-level page table entry. */
146  pgd = (pgd_t *)get_pte((pte_t *)mm->pgd, pgd_index(addr), 0);
147  if (!pgd_present(*pgd))
148  return NULL;
149 
150  /* We don't have four levels. */
151  pud = pud_offset(pgd, addr);
152 #ifndef __PAGETABLE_PUD_FOLDED
153 # error support fourth page table level
154 #endif
155 
156  /* Check for an L0 huge PTE, if we have three levels. */
157 #ifndef __PAGETABLE_PMD_FOLDED
158  if (pud_huge(*pud))
159  return (pte_t *)pud;
160 
161  pmd = (pmd_t *)get_pte((pte_t *)pud_page_vaddr(*pud),
162  pmd_index(addr), 1);
163  if (!pmd_present(*pmd))
164  return NULL;
165 #else
166  pmd = pmd_offset(pud, addr);
167 #endif
168 
169  /* Check for an L1 huge PTE. */
170  if (pmd_huge(*pmd))
171  return (pte_t *)pmd;
172 
173 #ifdef CONFIG_HUGETLB_SUPER_PAGES
174  /* Check for an L2 huge PTE. */
175  pte = get_pte((pte_t *)pmd_page_vaddr(*pmd), pte_index(addr), 2);
176  if (!pte_present(*pte))
177  return NULL;
178  if (pte_super(*pte))
179  return pte;
180 #endif
181 
182  return NULL;
183 }
184 
185 struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address,
186  int write)
187 {
188  return ERR_PTR(-EINVAL);
189 }
190 
191 int pmd_huge(pmd_t pmd)
192 {
193  return !!(pmd_val(pmd) & _PAGE_HUGE_PAGE);
194 }
195 
196 int pud_huge(pud_t pud)
197 {
198  return !!(pud_val(pud) & _PAGE_HUGE_PAGE);
199 }
200 
201 struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
202  pmd_t *pmd, int write)
203 {
204  struct page *page;
205 
206  page = pte_page(*(pte_t *)pmd);
207  if (page)
208  page += ((address & ~PMD_MASK) >> PAGE_SHIFT);
209  return page;
210 }
211 
212 struct page *follow_huge_pud(struct mm_struct *mm, unsigned long address,
213  pud_t *pud, int write)
214 {
215  struct page *page;
216 
217  page = pte_page(*(pte_t *)pud);
218  if (page)
219  page += ((address & ~PUD_MASK) >> PAGE_SHIFT);
220  return page;
221 }
222 
223 int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
224 {
225  return 0;
226 }
227 
228 #ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA
229 static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file,
230  unsigned long addr, unsigned long len,
231  unsigned long pgoff, unsigned long flags)
232 {
233  struct hstate *h = hstate_file(file);
234  struct mm_struct *mm = current->mm;
235  struct vm_area_struct *vma;
236  unsigned long start_addr;
237 
238  if (len > mm->cached_hole_size) {
239  start_addr = mm->free_area_cache;
240  } else {
241  start_addr = TASK_UNMAPPED_BASE;
242  mm->cached_hole_size = 0;
243  }
244 
245 full_search:
246  addr = ALIGN(start_addr, huge_page_size(h));
247 
248  for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
249  /* At this point: (!vma || addr < vma->vm_end). */
250  if (TASK_SIZE - len < addr) {
251  /*
252  * Start a new search - just in case we missed
253  * some holes.
254  */
255  if (start_addr != TASK_UNMAPPED_BASE) {
256  start_addr = TASK_UNMAPPED_BASE;
257  mm->cached_hole_size = 0;
258  goto full_search;
259  }
260  return -ENOMEM;
261  }
262  if (!vma || addr + len <= vma->vm_start) {
263  mm->free_area_cache = addr + len;
264  return addr;
265  }
266  if (addr + mm->cached_hole_size < vma->vm_start)
267  mm->cached_hole_size = vma->vm_start - addr;
268  addr = ALIGN(vma->vm_end, huge_page_size(h));
269  }
270 }
271 
272 static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file,
273  unsigned long addr0, unsigned long len,
274  unsigned long pgoff, unsigned long flags)
275 {
276  struct hstate *h = hstate_file(file);
277  struct mm_struct *mm = current->mm;
278  struct vm_area_struct *vma, *prev_vma;
279  unsigned long base = mm->mmap_base, addr = addr0;
280  unsigned long largest_hole = mm->cached_hole_size;
281  int first_time = 1;
282 
283  /* don't allow allocations above current base */
284  if (mm->free_area_cache > base)
285  mm->free_area_cache = base;
286 
287  if (len <= largest_hole) {
288  largest_hole = 0;
289  mm->free_area_cache = base;
290  }
291 try_again:
292  /* make sure it can fit in the remaining address space */
293  if (mm->free_area_cache < len)
294  goto fail;
295 
296  /* either no address requested or can't fit in requested address hole */
297  addr = (mm->free_area_cache - len) & huge_page_mask(h);
298  do {
299  /*
300  * Lookup failure means no vma is above this address,
301  * i.e. return with success:
302  */
303  vma = find_vma_prev(mm, addr, &prev_vma);
304  if (!vma) {
305  return addr;
306  break;
307  }
308 
309  /*
310  * new region fits between prev_vma->vm_end and
311  * vma->vm_start, use it:
312  */
313  if (addr + len <= vma->vm_start &&
314  (!prev_vma || (addr >= prev_vma->vm_end))) {
315  /* remember the address as a hint for next time */
316  mm->cached_hole_size = largest_hole;
317  mm->free_area_cache = addr;
318  return addr;
319  } else {
320  /* pull free_area_cache down to the first hole */
321  if (mm->free_area_cache == vma->vm_end) {
322  mm->free_area_cache = vma->vm_start;
323  mm->cached_hole_size = largest_hole;
324  }
325  }
326 
327  /* remember the largest hole we saw so far */
328  if (addr + largest_hole < vma->vm_start)
329  largest_hole = vma->vm_start - addr;
330 
331  /* try just below the current vma->vm_start */
332  addr = (vma->vm_start - len) & huge_page_mask(h);
333 
334  } while (len <= vma->vm_start);
335 
336 fail:
337  /*
338  * if hint left us with no space for the requested
339  * mapping then try again:
340  */
341  if (first_time) {
342  mm->free_area_cache = base;
343  largest_hole = 0;
344  first_time = 0;
345  goto try_again;
346  }
347  /*
348  * A failed mmap() very likely causes application failure,
349  * so fall back to the bottom-up function here. This scenario
350  * can happen with large stack limits and large mmap()
351  * allocations.
352  */
354  mm->cached_hole_size = ~0UL;
355  addr = hugetlb_get_unmapped_area_bottomup(file, addr0,
356  len, pgoff, flags);
357 
358  /*
359  * Restore the topdown base:
360  */
361  mm->free_area_cache = base;
362  mm->cached_hole_size = ~0UL;
363 
364  return addr;
365 }
366 
367 unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
368  unsigned long len, unsigned long pgoff, unsigned long flags)
369 {
370  struct hstate *h = hstate_file(file);
371  struct mm_struct *mm = current->mm;
372  struct vm_area_struct *vma;
373 
374  if (len & ~huge_page_mask(h))
375  return -EINVAL;
376  if (len > TASK_SIZE)
377  return -ENOMEM;
378 
379  if (flags & MAP_FIXED) {
380  if (prepare_hugepage_range(file, addr, len))
381  return -EINVAL;
382  return addr;
383  }
384 
385  if (addr) {
386  addr = ALIGN(addr, huge_page_size(h));
387  vma = find_vma(mm, addr);
388  if (TASK_SIZE - len >= addr &&
389  (!vma || addr + len <= vma->vm_start))
390  return addr;
391  }
392  if (current->mm->get_unmapped_area == arch_get_unmapped_area)
393  return hugetlb_get_unmapped_area_bottomup(file, addr, len,
394  pgoff, flags);
395  else
396  return hugetlb_get_unmapped_area_topdown(file, addr, len,
397  pgoff, flags);
398 }
399 #endif /* HAVE_ARCH_HUGETLB_UNMAPPED_AREA */
400 
401 #ifdef CONFIG_HUGETLB_SUPER_PAGES
402 static __init int __setup_hugepagesz(unsigned long ps)
403 {
404  int log_ps = __builtin_ctzl(ps);
405  int level, base_shift;
406 
407  if ((1UL << log_ps) != ps || (log_ps & 1) != 0) {
408  pr_warn("Not enabling %ld byte huge pages;"
409  " must be a power of four.\n", ps);
410  return -EINVAL;
411  }
412 
413  if (ps > 64*1024*1024*1024UL) {
414  pr_warn("Not enabling %ld MB huge pages;"
415  " largest legal value is 64 GB .\n", ps >> 20);
416  return -EINVAL;
417  } else if (ps >= PUD_SIZE) {
418  static long hv_jpage_size;
419  if (hv_jpage_size == 0)
420  hv_jpage_size = hv_sysconf(HV_SYSCONF_PAGE_SIZE_JUMBO);
421  if (hv_jpage_size != PUD_SIZE) {
422  pr_warn("Not enabling >= %ld MB huge pages:"
423  " hypervisor reports size %ld\n",
424  PUD_SIZE >> 20, hv_jpage_size);
425  return -EINVAL;
426  }
427  level = 0;
428  base_shift = PUD_SHIFT;
429  } else if (ps >= PMD_SIZE) {
430  level = 1;
431  base_shift = PMD_SHIFT;
432  } else if (ps > PAGE_SIZE) {
433  level = 2;
434  base_shift = PAGE_SHIFT;
435  } else {
436  pr_err("hugepagesz: huge page size %ld too small\n", ps);
437  return -EINVAL;
438  }
439 
440  if (log_ps != base_shift) {
441  int shift_val = log_ps - base_shift;
442  if (huge_shift[level] != 0) {
443  int old_shift = base_shift + huge_shift[level];
444  pr_warn("Not enabling %ld MB huge pages;"
445  " already have size %ld MB.\n",
446  ps >> 20, (1UL << old_shift) >> 20);
447  return -EINVAL;
448  }
449  if (hv_set_pte_super_shift(level, shift_val) != 0) {
450  pr_warn("Not enabling %ld MB huge pages;"
451  " no hypervisor support.\n", ps >> 20);
452  return -EINVAL;
453  }
454  printk(KERN_DEBUG "Enabled %ld MB huge pages\n", ps >> 20);
455  huge_shift[level] = shift_val;
456  }
457 
458  hugetlb_add_hstate(log_ps - PAGE_SHIFT);
459 
460  return 0;
461 }
462 
463 static bool saw_hugepagesz;
464 
465 static __init int setup_hugepagesz(char *opt)
466 {
467  if (!saw_hugepagesz) {
468  saw_hugepagesz = true;
469  memset(huge_shift, 0, sizeof(huge_shift));
470  }
471  return __setup_hugepagesz(memparse(opt, NULL));
472 }
473 __setup("hugepagesz=", setup_hugepagesz);
474 
475 #ifdef ADDITIONAL_HUGE_SIZE
476 /*
477  * Provide an additional huge page size if no "hugepagesz" args are given.
478  * In that case, all the cores have properly set up their hv super_shift
479  * already, but we need to notify the hugetlb code to enable the
480  * new huge page size from the Linux point of view.
481  */
482 static __init int add_default_hugepagesz(void)
483 {
484  if (!saw_hugepagesz) {
485  BUILD_BUG_ON(ADDITIONAL_HUGE_SIZE >= PMD_SIZE ||
486  ADDITIONAL_HUGE_SIZE <= PAGE_SIZE);
487  BUILD_BUG_ON((PAGE_SIZE << ADDITIONAL_HUGE_SHIFT) !=
488  ADDITIONAL_HUGE_SIZE);
489  BUILD_BUG_ON(ADDITIONAL_HUGE_SHIFT & 1);
490  hugetlb_add_hstate(ADDITIONAL_HUGE_SHIFT);
491  }
492  return 0;
493 }
494 arch_initcall(add_default_hugepagesz);
495 #endif
496 
497 #endif /* CONFIG_HUGETLB_SUPER_PAGES */