Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
tsb.c
Go to the documentation of this file.
1 /* arch/sparc64/mm/tsb.c
2  *
3  * Copyright (C) 2006, 2008 David S. Miller <[email protected]>
4  */
5 
6 #include <linux/kernel.h>
7 #include <linux/preempt.h>
8 #include <linux/slab.h>
9 #include <asm/page.h>
10 #include <asm/tlbflush.h>
11 #include <asm/tlb.h>
12 #include <asm/mmu_context.h>
13 #include <asm/pgtable.h>
14 #include <asm/tsb.h>
15 #include <asm/oplib.h>
16 
17 extern struct tsb swapper_tsb[KERNEL_TSB_NENTRIES];
18 
19 static inline unsigned long tsb_hash(unsigned long vaddr, unsigned long hash_shift, unsigned long nentries)
20 {
21  vaddr >>= hash_shift;
22  return vaddr & (nentries - 1);
23 }
24 
25 static inline int tag_compare(unsigned long tag, unsigned long vaddr)
26 {
27  return (tag == (vaddr >> 22));
28 }
29 
30 /* TSB flushes need only occur on the processor initiating the address
31  * space modification, not on each cpu the address space has run on.
32  * Only the TLB flush needs that treatment.
33  */
34 
35 void flush_tsb_kernel_range(unsigned long start, unsigned long end)
36 {
37  unsigned long v;
38 
39  for (v = start; v < end; v += PAGE_SIZE) {
40  unsigned long hash = tsb_hash(v, PAGE_SHIFT,
42  struct tsb *ent = &swapper_tsb[hash];
43 
44  if (tag_compare(ent->tag, v))
45  ent->tag = (1UL << TSB_TAG_INVALID_BIT);
46  }
47 }
48 
49 static void __flush_tsb_one(struct tlb_batch *tb, unsigned long hash_shift,
50  unsigned long tsb, unsigned long nentries)
51 {
52  unsigned long i;
53 
54  for (i = 0; i < tb->tlb_nr; i++) {
55  unsigned long v = tb->vaddrs[i];
56  unsigned long tag, ent, hash;
57 
58  v &= ~0x1UL;
59 
60  hash = tsb_hash(v, hash_shift, nentries);
61  ent = tsb + (hash * sizeof(struct tsb));
62  tag = (v >> 22UL);
63 
64  tsb_flush(ent, tag);
65  }
66 }
67 
68 void flush_tsb_user(struct tlb_batch *tb)
69 {
70  struct mm_struct *mm = tb->mm;
71  unsigned long nentries, base, flags;
72 
73  spin_lock_irqsave(&mm->context.lock, flags);
74 
75  base = (unsigned long) mm->context.tsb_block[MM_TSB_BASE].tsb;
76  nentries = mm->context.tsb_block[MM_TSB_BASE].tsb_nentries;
77  if (tlb_type == cheetah_plus || tlb_type == hypervisor)
78  base = __pa(base);
79  __flush_tsb_one(tb, PAGE_SHIFT, base, nentries);
80 
81 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
82  if (mm->context.tsb_block[MM_TSB_HUGE].tsb) {
83  base = (unsigned long) mm->context.tsb_block[MM_TSB_HUGE].tsb;
84  nentries = mm->context.tsb_block[MM_TSB_HUGE].tsb_nentries;
85  if (tlb_type == cheetah_plus || tlb_type == hypervisor)
86  base = __pa(base);
87  __flush_tsb_one(tb, HPAGE_SHIFT, base, nentries);
88  }
89 #endif
90  spin_unlock_irqrestore(&mm->context.lock, flags);
91 }
92 
93 #define HV_PGSZ_IDX_BASE HV_PGSZ_IDX_8K
94 #define HV_PGSZ_MASK_BASE HV_PGSZ_MASK_8K
95 
96 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
97 #define HV_PGSZ_IDX_HUGE HV_PGSZ_IDX_4MB
98 #define HV_PGSZ_MASK_HUGE HV_PGSZ_MASK_4MB
99 #endif
100 
101 static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_idx, unsigned long tsb_bytes)
102 {
103  unsigned long tsb_reg, base, tsb_paddr;
104  unsigned long page_sz, tte;
105 
106  mm->context.tsb_block[tsb_idx].tsb_nentries =
107  tsb_bytes / sizeof(struct tsb);
108 
109  base = TSBMAP_BASE;
111  tsb_paddr = __pa(mm->context.tsb_block[tsb_idx].tsb);
112  BUG_ON(tsb_paddr & (tsb_bytes - 1UL));
113 
114  /* Use the smallest page size that can map the whole TSB
115  * in one TLB entry.
116  */
117  switch (tsb_bytes) {
118  case 8192 << 0:
119  tsb_reg = 0x0UL;
120 #ifdef DCACHE_ALIASING_POSSIBLE
121  base += (tsb_paddr & 8192);
122 #endif
123  page_sz = 8192;
124  break;
125 
126  case 8192 << 1:
127  tsb_reg = 0x1UL;
128  page_sz = 64 * 1024;
129  break;
130 
131  case 8192 << 2:
132  tsb_reg = 0x2UL;
133  page_sz = 64 * 1024;
134  break;
135 
136  case 8192 << 3:
137  tsb_reg = 0x3UL;
138  page_sz = 64 * 1024;
139  break;
140 
141  case 8192 << 4:
142  tsb_reg = 0x4UL;
143  page_sz = 512 * 1024;
144  break;
145 
146  case 8192 << 5:
147  tsb_reg = 0x5UL;
148  page_sz = 512 * 1024;
149  break;
150 
151  case 8192 << 6:
152  tsb_reg = 0x6UL;
153  page_sz = 512 * 1024;
154  break;
155 
156  case 8192 << 7:
157  tsb_reg = 0x7UL;
158  page_sz = 4 * 1024 * 1024;
159  break;
160 
161  default:
162  printk(KERN_ERR "TSB[%s:%d]: Impossible TSB size %lu, killing process.\n",
163  current->comm, current->pid, tsb_bytes);
164  do_exit(SIGSEGV);
165  }
166  tte |= pte_sz_bits(page_sz);
167 
168  if (tlb_type == cheetah_plus || tlb_type == hypervisor) {
169  /* Physical mapping, no locked TLB entry for TSB. */
170  tsb_reg |= tsb_paddr;
171 
172  mm->context.tsb_block[tsb_idx].tsb_reg_val = tsb_reg;
173  mm->context.tsb_block[tsb_idx].tsb_map_vaddr = 0;
174  mm->context.tsb_block[tsb_idx].tsb_map_pte = 0;
175  } else {
176  tsb_reg |= base;
177  tsb_reg |= (tsb_paddr & (page_sz - 1UL));
178  tte |= (tsb_paddr & ~(page_sz - 1UL));
179 
180  mm->context.tsb_block[tsb_idx].tsb_reg_val = tsb_reg;
181  mm->context.tsb_block[tsb_idx].tsb_map_vaddr = base;
182  mm->context.tsb_block[tsb_idx].tsb_map_pte = tte;
183  }
184 
185  /* Setup the Hypervisor TSB descriptor. */
186  if (tlb_type == hypervisor) {
187  struct hv_tsb_descr *hp = &mm->context.tsb_descr[tsb_idx];
188 
189  switch (tsb_idx) {
190  case MM_TSB_BASE:
192  break;
193 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
194  case MM_TSB_HUGE:
195  hp->pgsz_idx = HV_PGSZ_IDX_HUGE;
196  break;
197 #endif
198  default:
199  BUG();
200  }
201  hp->assoc = 1;
202  hp->num_ttes = tsb_bytes / 16;
203  hp->ctx_idx = 0;
204  switch (tsb_idx) {
205  case MM_TSB_BASE:
207  break;
208 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
209  case MM_TSB_HUGE:
210  hp->pgsz_mask = HV_PGSZ_MASK_HUGE;
211  break;
212 #endif
213  default:
214  BUG();
215  }
216  hp->tsb_base = tsb_paddr;
217  hp->resv = 0;
218  }
219 }
220 
222 
223 static struct kmem_cache *tsb_caches[8] __read_mostly;
224 
225 static const char *tsb_cache_names[8] = {
226  "tsb_8KB",
227  "tsb_16KB",
228  "tsb_32KB",
229  "tsb_64KB",
230  "tsb_128KB",
231  "tsb_256KB",
232  "tsb_512KB",
233  "tsb_1MB",
234 };
235 
237 {
238  unsigned long i;
239 
240  pgtable_cache = kmem_cache_create("pgtable_cache",
242  0,
243  _clear_page);
244  if (!pgtable_cache) {
245  prom_printf("pgtable_cache_init(): Could not create!\n");
246  prom_halt();
247  }
248 
249  for (i = 0; i < 8; i++) {
250  unsigned long size = 8192 << i;
251  const char *name = tsb_cache_names[i];
252 
253  tsb_caches[i] = kmem_cache_create(name,
254  size, size,
255  0, NULL);
256  if (!tsb_caches[i]) {
257  prom_printf("Could not create %s cache\n", name);
258  prom_halt();
259  }
260  }
261 }
262 
264 
265 static unsigned long tsb_size_to_rss_limit(unsigned long new_size)
266 {
267  unsigned long num_ents = (new_size / sizeof(struct tsb));
268 
269  if (sysctl_tsb_ratio < 0)
270  return num_ents - (num_ents >> -sysctl_tsb_ratio);
271  else
272  return num_ents + (num_ents >> sysctl_tsb_ratio);
273 }
274 
275 /* When the RSS of an address space exceeds tsb_rss_limit for a TSB,
276  * do_sparc64_fault() invokes this routine to try and grow it.
277  *
278  * When we reach the maximum TSB size supported, we stick ~0UL into
279  * tsb_rss_limit for that TSB so the grow checks in do_sparc64_fault()
280  * will not trigger any longer.
281  *
282  * The TSB can be anywhere from 8K to 1MB in size, in increasing powers
283  * of two. The TSB must be aligned to it's size, so f.e. a 512K TSB
284  * must be 512K aligned. It also must be physically contiguous, so we
285  * cannot use vmalloc().
286  *
287  * The idea here is to grow the TSB when the RSS of the process approaches
288  * the number of entries that the current TSB can hold at once. Currently,
289  * we trigger when the RSS hits 3/4 of the TSB capacity.
290  */
291 void tsb_grow(struct mm_struct *mm, unsigned long tsb_index, unsigned long rss)
292 {
293  unsigned long max_tsb_size = 1 * 1024 * 1024;
294  unsigned long new_size, old_size, flags;
295  struct tsb *old_tsb, *new_tsb;
296  unsigned long new_cache_index, old_cache_index;
297  unsigned long new_rss_limit;
298  gfp_t gfp_flags;
299 
300  if (max_tsb_size > (PAGE_SIZE << MAX_ORDER))
301  max_tsb_size = (PAGE_SIZE << MAX_ORDER);
302 
303  new_cache_index = 0;
304  for (new_size = 8192; new_size < max_tsb_size; new_size <<= 1UL) {
305  new_rss_limit = tsb_size_to_rss_limit(new_size);
306  if (new_rss_limit > rss)
307  break;
308  new_cache_index++;
309  }
310 
311  if (new_size == max_tsb_size)
312  new_rss_limit = ~0UL;
313 
314 retry_tsb_alloc:
315  gfp_flags = GFP_KERNEL;
316  if (new_size > (PAGE_SIZE * 2))
317  gfp_flags = __GFP_NOWARN | __GFP_NORETRY;
318 
319  new_tsb = kmem_cache_alloc_node(tsb_caches[new_cache_index],
320  gfp_flags, numa_node_id());
321  if (unlikely(!new_tsb)) {
322  /* Not being able to fork due to a high-order TSB
323  * allocation failure is very bad behavior. Just back
324  * down to a 0-order allocation and force no TSB
325  * growing for this address space.
326  */
327  if (mm->context.tsb_block[tsb_index].tsb == NULL &&
328  new_cache_index > 0) {
329  new_cache_index = 0;
330  new_size = 8192;
331  new_rss_limit = ~0UL;
332  goto retry_tsb_alloc;
333  }
334 
335  /* If we failed on a TSB grow, we are under serious
336  * memory pressure so don't try to grow any more.
337  */
338  if (mm->context.tsb_block[tsb_index].tsb != NULL)
339  mm->context.tsb_block[tsb_index].tsb_rss_limit = ~0UL;
340  return;
341  }
342 
343  /* Mark all tags as invalid. */
344  tsb_init(new_tsb, new_size);
345 
346  /* Ok, we are about to commit the changes. If we are
347  * growing an existing TSB the locking is very tricky,
348  * so WATCH OUT!
349  *
350  * We have to hold mm->context.lock while committing to the
351  * new TSB, this synchronizes us with processors in
352  * flush_tsb_user() and switch_mm() for this address space.
353  *
354  * But even with that lock held, processors run asynchronously
355  * accessing the old TSB via TLB miss handling. This is OK
356  * because those actions are just propagating state from the
357  * Linux page tables into the TSB, page table mappings are not
358  * being changed. If a real fault occurs, the processor will
359  * synchronize with us when it hits flush_tsb_user(), this is
360  * also true for the case where vmscan is modifying the page
361  * tables. The only thing we need to be careful with is to
362  * skip any locked TSB entries during copy_tsb().
363  *
364  * When we finish committing to the new TSB, we have to drop
365  * the lock and ask all other cpus running this address space
366  * to run tsb_context_switch() to see the new TSB table.
367  */
368  spin_lock_irqsave(&mm->context.lock, flags);
369 
370  old_tsb = mm->context.tsb_block[tsb_index].tsb;
371  old_cache_index =
372  (mm->context.tsb_block[tsb_index].tsb_reg_val & 0x7UL);
373  old_size = (mm->context.tsb_block[tsb_index].tsb_nentries *
374  sizeof(struct tsb));
375 
376 
377  /* Handle multiple threads trying to grow the TSB at the same time.
378  * One will get in here first, and bump the size and the RSS limit.
379  * The others will get in here next and hit this check.
380  */
381  if (unlikely(old_tsb &&
382  (rss < mm->context.tsb_block[tsb_index].tsb_rss_limit))) {
383  spin_unlock_irqrestore(&mm->context.lock, flags);
384 
385  kmem_cache_free(tsb_caches[new_cache_index], new_tsb);
386  return;
387  }
388 
389  mm->context.tsb_block[tsb_index].tsb_rss_limit = new_rss_limit;
390 
391  if (old_tsb) {
392  extern void copy_tsb(unsigned long old_tsb_base,
393  unsigned long old_tsb_size,
394  unsigned long new_tsb_base,
395  unsigned long new_tsb_size);
396  unsigned long old_tsb_base = (unsigned long) old_tsb;
397  unsigned long new_tsb_base = (unsigned long) new_tsb;
398 
399  if (tlb_type == cheetah_plus || tlb_type == hypervisor) {
400  old_tsb_base = __pa(old_tsb_base);
401  new_tsb_base = __pa(new_tsb_base);
402  }
403  copy_tsb(old_tsb_base, old_size, new_tsb_base, new_size);
404  }
405 
406  mm->context.tsb_block[tsb_index].tsb = new_tsb;
407  setup_tsb_params(mm, tsb_index, new_size);
408 
409  spin_unlock_irqrestore(&mm->context.lock, flags);
410 
411  /* If old_tsb is NULL, we're being invoked for the first time
412  * from init_new_context().
413  */
414  if (old_tsb) {
415  /* Reload it on the local cpu. */
416  tsb_context_switch(mm);
417 
418  /* Now force other processors to do the same. */
419  preempt_disable();
420  smp_tsb_sync(mm);
421  preempt_enable();
422 
423  /* Now it is safe to free the old tsb. */
424  kmem_cache_free(tsb_caches[old_cache_index], old_tsb);
425  }
426 }
427 
428 int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
429 {
430 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
431  unsigned long huge_pte_count;
432 #endif
433  unsigned int i;
434 
436 
437  mm->context.sparc64_ctx_val = 0UL;
438 
439 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
440  /* We reset it to zero because the fork() page copying
441  * will re-increment the counters as the parent PTEs are
442  * copied into the child address space.
443  */
444  huge_pte_count = mm->context.huge_pte_count;
445  mm->context.huge_pte_count = 0;
446 #endif
447 
448  mm->context.pgtable_page = NULL;
449 
450  /* copy_mm() copies over the parent's mm_struct before calling
451  * us, so we need to zero out the TSB pointer or else tsb_grow()
452  * will be confused and think there is an older TSB to free up.
453  */
454  for (i = 0; i < MM_NUM_TSBS; i++)
455  mm->context.tsb_block[i].tsb = NULL;
456 
457  /* If this is fork, inherit the parent's TSB size. We would
458  * grow it to that size on the first page fault anyways.
459  */
460  tsb_grow(mm, MM_TSB_BASE, get_mm_rss(mm));
461 
462 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
463  if (unlikely(huge_pte_count))
464  tsb_grow(mm, MM_TSB_HUGE, huge_pte_count);
465 #endif
466 
467  if (unlikely(!mm->context.tsb_block[MM_TSB_BASE].tsb))
468  return -ENOMEM;
469 
470  return 0;
471 }
472 
473 static void tsb_destroy_one(struct tsb_config *tp)
474 {
475  unsigned long cache_index;
476 
477  if (!tp->tsb)
478  return;
479  cache_index = tp->tsb_reg_val & 0x7UL;
480  kmem_cache_free(tsb_caches[cache_index], tp->tsb);
481  tp->tsb = NULL;
482  tp->tsb_reg_val = 0UL;
483 }
484 
485 void destroy_context(struct mm_struct *mm)
486 {
487  unsigned long flags, i;
488  struct page *page;
489 
490  for (i = 0; i < MM_NUM_TSBS; i++)
491  tsb_destroy_one(&mm->context.tsb_block[i]);
492 
493  page = mm->context.pgtable_page;
494  if (page && put_page_testzero(page)) {
495  pgtable_page_dtor(page);
496  free_hot_cold_page(page, 0);
497  }
498 
500 
501  if (CTX_VALID(mm->context)) {
502  unsigned long nr = CTX_NRBITS(mm->context);
503  mmu_context_bmap[nr>>6] &= ~(1UL << (nr & 63));
504  }
505 
506  spin_unlock_irqrestore(&ctx_alloc_lock, flags);
507 }