Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
hash_native_64.c
Go to the documentation of this file.
1 /*
2  * native hashtable management.
3  *
4  * SMP scalability work:
5  * Copyright (C) 2001 Anton Blanchard <[email protected]>, IBM
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License
9  * as published by the Free Software Foundation; either version
10  * 2 of the License, or (at your option) any later version.
11  */
12 
13 #undef DEBUG_LOW
14 
15 #include <linux/spinlock.h>
16 #include <linux/bitops.h>
17 #include <linux/of.h>
18 #include <linux/threads.h>
19 #include <linux/smp.h>
20 
21 #include <asm/machdep.h>
22 #include <asm/mmu.h>
23 #include <asm/mmu_context.h>
24 #include <asm/pgtable.h>
25 #include <asm/tlbflush.h>
26 #include <asm/tlb.h>
27 #include <asm/cputable.h>
28 #include <asm/udbg.h>
29 #include <asm/kexec.h>
30 #include <asm/ppc-opcode.h>
31 
32 #ifdef DEBUG_LOW
33 #define DBG_LOW(fmt...) udbg_printf(fmt)
34 #else
35 #define DBG_LOW(fmt...)
36 #endif
37 
38 #define HPTE_LOCK_BIT 3
39 
40 DEFINE_RAW_SPINLOCK(native_tlbie_lock);
41 
42 static inline void __tlbie(unsigned long vpn, int psize, int ssize)
43 {
44  unsigned long va;
45  unsigned int penc;
46 
47  /*
48  * We need 14 to 65 bits of va for a tlibe of 4K page
49  * With vpn we ignore the lower VPN_SHIFT bits already.
50  * And top two bits are already ignored because we can
51  * only accomadate 76 bits in a 64 bit vpn with a VPN_SHIFT
52  * of 12.
53  */
54  va = vpn << VPN_SHIFT;
55  /*
56  * clear top 16 bits of 64bit va, non SLS segment
57  * Older versions of the architecture (2.02 and earler) require the
58  * masking of the top 16 bits.
59  */
60  va &= ~(0xffffULL << 48);
61 
62  switch (psize) {
63  case MMU_PAGE_4K:
64  va |= ssize << 8;
65  asm volatile(ASM_FTR_IFCLR("tlbie %0,0", PPC_TLBIE(%1,%0), %2)
66  : : "r" (va), "r"(0), "i" (CPU_FTR_ARCH_206)
67  : "memory");
68  break;
69  default:
70  /* We need 14 to 14 + i bits of va */
71  penc = mmu_psize_defs[psize].penc;
72  va &= ~((1ul << mmu_psize_defs[psize].shift) - 1);
73  va |= penc << 12;
74  va |= ssize << 8;
75  va |= 1; /* L */
76  asm volatile(ASM_FTR_IFCLR("tlbie %0,1", PPC_TLBIE(%1,%0), %2)
77  : : "r" (va), "r"(0), "i" (CPU_FTR_ARCH_206)
78  : "memory");
79  break;
80  }
81 }
82 
83 static inline void __tlbiel(unsigned long vpn, int psize, int ssize)
84 {
85  unsigned long va;
86  unsigned int penc;
87 
88  /* VPN_SHIFT can be atmost 12 */
89  va = vpn << VPN_SHIFT;
90  /*
91  * clear top 16 bits of 64 bit va, non SLS segment
92  * Older versions of the architecture (2.02 and earler) require the
93  * masking of the top 16 bits.
94  */
95  va &= ~(0xffffULL << 48);
96 
97  switch (psize) {
98  case MMU_PAGE_4K:
99  va |= ssize << 8;
100  asm volatile(".long 0x7c000224 | (%0 << 11) | (0 << 21)"
101  : : "r"(va) : "memory");
102  break;
103  default:
104  /* We need 14 to 14 + i bits of va */
105  penc = mmu_psize_defs[psize].penc;
106  va &= ~((1ul << mmu_psize_defs[psize].shift) - 1);
107  va |= penc << 12;
108  va |= ssize << 8;
109  va |= 1; /* L */
110  asm volatile(".long 0x7c000224 | (%0 << 11) | (1 << 21)"
111  : : "r"(va) : "memory");
112  break;
113  }
114 
115 }
116 
117 static inline void tlbie(unsigned long vpn, int psize, int ssize, int local)
118 {
119  unsigned int use_local = local && mmu_has_feature(MMU_FTR_TLBIEL);
120  int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
121 
122  if (use_local)
123  use_local = mmu_psize_defs[psize].tlbiel;
124  if (lock_tlbie && !use_local)
125  raw_spin_lock(&native_tlbie_lock);
126  asm volatile("ptesync": : :"memory");
127  if (use_local) {
128  __tlbiel(vpn, psize, ssize);
129  asm volatile("ptesync": : :"memory");
130  } else {
131  __tlbie(vpn, psize, ssize);
132  asm volatile("eieio; tlbsync; ptesync": : :"memory");
133  }
134  if (lock_tlbie && !use_local)
135  raw_spin_unlock(&native_tlbie_lock);
136 }
137 
138 static inline void native_lock_hpte(struct hash_pte *hptep)
139 {
140  unsigned long *word = &hptep->v;
141 
142  while (1) {
144  break;
145  while(test_bit(HPTE_LOCK_BIT, word))
146  cpu_relax();
147  }
148 }
149 
150 static inline void native_unlock_hpte(struct hash_pte *hptep)
151 {
152  unsigned long *word = &hptep->v;
153 
155 }
156 
157 static long native_hpte_insert(unsigned long hpte_group, unsigned long vpn,
158  unsigned long pa, unsigned long rflags,
159  unsigned long vflags, int psize, int ssize)
160 {
161  struct hash_pte *hptep = htab_address + hpte_group;
162  unsigned long hpte_v, hpte_r;
163  int i;
164 
165  if (!(vflags & HPTE_V_BOLTED)) {
166  DBG_LOW(" insert(group=%lx, vpn=%016lx, pa=%016lx,"
167  " rflags=%lx, vflags=%lx, psize=%d)\n",
168  hpte_group, vpn, pa, rflags, vflags, psize);
169  }
170 
171  for (i = 0; i < HPTES_PER_GROUP; i++) {
172  if (! (hptep->v & HPTE_V_VALID)) {
173  /* retry with lock held */
174  native_lock_hpte(hptep);
175  if (! (hptep->v & HPTE_V_VALID))
176  break;
177  native_unlock_hpte(hptep);
178  }
179 
180  hptep++;
181  }
182 
183  if (i == HPTES_PER_GROUP)
184  return -1;
185 
186  hpte_v = hpte_encode_v(vpn, psize, ssize) | vflags | HPTE_V_VALID;
187  hpte_r = hpte_encode_r(pa, psize) | rflags;
188 
189  if (!(vflags & HPTE_V_BOLTED)) {
190  DBG_LOW(" i=%x hpte_v=%016lx, hpte_r=%016lx\n",
191  i, hpte_v, hpte_r);
192  }
193 
194  hptep->r = hpte_r;
195  /* Guarantee the second dword is visible before the valid bit */
196  eieio();
197  /*
198  * Now set the first dword including the valid bit
199  * NOTE: this also unlocks the hpte
200  */
201  hptep->v = hpte_v;
202 
203  __asm__ __volatile__ ("ptesync" : : : "memory");
204 
205  return i | (!!(vflags & HPTE_V_SECONDARY) << 3);
206 }
207 
208 static long native_hpte_remove(unsigned long hpte_group)
209 {
210  struct hash_pte *hptep;
211  int i;
212  int slot_offset;
213  unsigned long hpte_v;
214 
215  DBG_LOW(" remove(group=%lx)\n", hpte_group);
216 
217  /* pick a random entry to start at */
218  slot_offset = mftb() & 0x7;
219 
220  for (i = 0; i < HPTES_PER_GROUP; i++) {
221  hptep = htab_address + hpte_group + slot_offset;
222  hpte_v = hptep->v;
223 
224  if ((hpte_v & HPTE_V_VALID) && !(hpte_v & HPTE_V_BOLTED)) {
225  /* retry with lock held */
226  native_lock_hpte(hptep);
227  hpte_v = hptep->v;
228  if ((hpte_v & HPTE_V_VALID)
229  && !(hpte_v & HPTE_V_BOLTED))
230  break;
231  native_unlock_hpte(hptep);
232  }
233 
234  slot_offset++;
235  slot_offset &= 0x7;
236  }
237 
238  if (i == HPTES_PER_GROUP)
239  return -1;
240 
241  /* Invalidate the hpte. NOTE: this also unlocks it */
242  hptep->v = 0;
243 
244  return i;
245 }
246 
247 static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
248  unsigned long vpn, int psize, int ssize,
249  int local)
250 {
251  struct hash_pte *hptep = htab_address + slot;
252  unsigned long hpte_v, want_v;
253  int ret = 0;
254 
255  want_v = hpte_encode_v(vpn, psize, ssize);
256 
257  DBG_LOW(" update(vpn=%016lx, avpnv=%016lx, group=%lx, newpp=%lx)",
258  vpn, want_v & HPTE_V_AVPN, slot, newpp);
259 
260  native_lock_hpte(hptep);
261 
262  hpte_v = hptep->v;
263 
264  /* Even if we miss, we need to invalidate the TLB */
265  if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID)) {
266  DBG_LOW(" -> miss\n");
267  ret = -1;
268  } else {
269  DBG_LOW(" -> hit\n");
270  /* Update the HPTE */
271  hptep->r = (hptep->r & ~(HPTE_R_PP | HPTE_R_N)) |
272  (newpp & (HPTE_R_PP | HPTE_R_N | HPTE_R_C));
273  }
274  native_unlock_hpte(hptep);
275 
276  /* Ensure it is out of the tlb too. */
277  tlbie(vpn, psize, ssize, local);
278 
279  return ret;
280 }
281 
282 static long native_hpte_find(unsigned long vpn, int psize, int ssize)
283 {
284  struct hash_pte *hptep;
285  unsigned long hash;
286  unsigned long i;
287  long slot;
288  unsigned long want_v, hpte_v;
289 
290  hash = hpt_hash(vpn, mmu_psize_defs[psize].shift, ssize);
291  want_v = hpte_encode_v(vpn, psize, ssize);
292 
293  /* Bolted mappings are only ever in the primary group */
294  slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
295  for (i = 0; i < HPTES_PER_GROUP; i++) {
296  hptep = htab_address + slot;
297  hpte_v = hptep->v;
298 
299  if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID))
300  /* HPTE matches */
301  return slot;
302  ++slot;
303  }
304 
305  return -1;
306 }
307 
308 /*
309  * Update the page protection bits. Intended to be used to create
310  * guard pages for kernel data structures on pages which are bolted
311  * in the HPT. Assumes pages being operated on will not be stolen.
312  *
313  * No need to lock here because we should be the only user.
314  */
315 static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea,
316  int psize, int ssize)
317 {
318  unsigned long vpn;
319  unsigned long vsid;
320  long slot;
321  struct hash_pte *hptep;
322 
323  vsid = get_kernel_vsid(ea, ssize);
324  vpn = hpt_vpn(ea, vsid, ssize);
325 
326  slot = native_hpte_find(vpn, psize, ssize);
327  if (slot == -1)
328  panic("could not find page to bolt\n");
329  hptep = htab_address + slot;
330 
331  /* Update the HPTE */
332  hptep->r = (hptep->r & ~(HPTE_R_PP | HPTE_R_N)) |
333  (newpp & (HPTE_R_PP | HPTE_R_N));
334 
335  /* Ensure it is out of the tlb too. */
336  tlbie(vpn, psize, ssize, 0);
337 }
338 
339 static void native_hpte_invalidate(unsigned long slot, unsigned long vpn,
340  int psize, int ssize, int local)
341 {
342  struct hash_pte *hptep = htab_address + slot;
343  unsigned long hpte_v;
344  unsigned long want_v;
345  unsigned long flags;
346 
347  local_irq_save(flags);
348 
349  DBG_LOW(" invalidate(vpn=%016lx, hash: %lx)\n", vpn, slot);
350 
351  want_v = hpte_encode_v(vpn, psize, ssize);
352  native_lock_hpte(hptep);
353  hpte_v = hptep->v;
354 
355  /* Even if we miss, we need to invalidate the TLB */
356  if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID))
357  native_unlock_hpte(hptep);
358  else
359  /* Invalidate the hpte. NOTE: this also unlocks it */
360  hptep->v = 0;
361 
362  /* Invalidate the TLB */
363  tlbie(vpn, psize, ssize, local);
364 
365  local_irq_restore(flags);
366 }
367 
368 #define LP_SHIFT 12
369 #define LP_BITS 8
370 #define LP_MASK(i) ((0xFF >> (i)) << LP_SHIFT)
371 
372 static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
373  int *psize, int *ssize, unsigned long *vpn)
374 {
375  unsigned long avpn, pteg, vpi;
376  unsigned long hpte_r = hpte->r;
377  unsigned long hpte_v = hpte->v;
378  unsigned long vsid, seg_off;
379  int i, size, shift, penc;
380 
381  if (!(hpte_v & HPTE_V_LARGE))
382  size = MMU_PAGE_4K;
383  else {
384  for (i = 0; i < LP_BITS; i++) {
385  if ((hpte_r & LP_MASK(i+1)) == LP_MASK(i+1))
386  break;
387  }
388  penc = LP_MASK(i+1) >> LP_SHIFT;
389  for (size = 0; size < MMU_PAGE_COUNT; size++) {
390 
391  /* 4K pages are not represented by LP */
392  if (size == MMU_PAGE_4K)
393  continue;
394 
395  /* valid entries have a shift value */
396  if (!mmu_psize_defs[size].shift)
397  continue;
398 
399  if (penc == mmu_psize_defs[size].penc)
400  break;
401  }
402  }
403 
404  /* This works for all page sizes, and for 256M and 1T segments */
405  *ssize = hpte_v >> HPTE_V_SSIZE_SHIFT;
406  shift = mmu_psize_defs[size].shift;
407 
408  avpn = (HPTE_V_AVPN_VAL(hpte_v) & ~mmu_psize_defs[size].avpnm);
409  pteg = slot / HPTES_PER_GROUP;
410  if (hpte_v & HPTE_V_SECONDARY)
411  pteg = ~pteg;
412 
413  switch (*ssize) {
414  case MMU_SEGSIZE_256M:
415  /* We only have 28 - 23 bits of seg_off in avpn */
416  seg_off = (avpn & 0x1f) << 23;
417  vsid = avpn >> 5;
418  /* We can find more bits from the pteg value */
419  if (shift < 23) {
420  vpi = (vsid ^ pteg) & htab_hash_mask;
421  seg_off |= vpi << shift;
422  }
423  *vpn = vsid << (SID_SHIFT - VPN_SHIFT) | seg_off >> VPN_SHIFT;
424  case MMU_SEGSIZE_1T:
425  /* We only have 40 - 23 bits of seg_off in avpn */
426  seg_off = (avpn & 0x1ffff) << 23;
427  vsid = avpn >> 17;
428  if (shift < 23) {
429  vpi = (vsid ^ (vsid << 25) ^ pteg) & htab_hash_mask;
430  seg_off |= vpi << shift;
431  }
432  *vpn = vsid << (SID_SHIFT_1T - VPN_SHIFT) | seg_off >> VPN_SHIFT;
433  default:
434  *vpn = size = 0;
435  }
436  *psize = size;
437 }
438 
439 /*
440  * clear all mappings on kexec. All cpus are in real mode (or they will
441  * be when they isi), and we are the only one left. We rely on our kernel
442  * mapping being 0xC0's and the hardware ignoring those two real bits.
443  *
444  * TODO: add batching support when enabled. remember, no dynamic memory here,
445  * athough there is the control page available...
446  */
447 static void native_hpte_clear(void)
448 {
449  unsigned long vpn = 0;
450  unsigned long slot, slots, flags;
451  struct hash_pte *hptep = htab_address;
452  unsigned long hpte_v;
453  unsigned long pteg_count;
454  int psize, ssize;
455 
456  pteg_count = htab_hash_mask + 1;
457 
458  local_irq_save(flags);
459 
460  /* we take the tlbie lock and hold it. Some hardware will
461  * deadlock if we try to tlbie from two processors at once.
462  */
463  raw_spin_lock(&native_tlbie_lock);
464 
465  slots = pteg_count * HPTES_PER_GROUP;
466 
467  for (slot = 0; slot < slots; slot++, hptep++) {
468  /*
469  * we could lock the pte here, but we are the only cpu
470  * running, right? and for crash dump, we probably
471  * don't want to wait for a maybe bad cpu.
472  */
473  hpte_v = hptep->v;
474 
475  /*
476  * Call __tlbie() here rather than tlbie() since we
477  * already hold the native_tlbie_lock.
478  */
479  if (hpte_v & HPTE_V_VALID) {
480  hpte_decode(hptep, slot, &psize, &ssize, &vpn);
481  hptep->v = 0;
482  __tlbie(vpn, psize, ssize);
483  }
484  }
485 
486  asm volatile("eieio; tlbsync; ptesync":::"memory");
487  raw_spin_unlock(&native_tlbie_lock);
488  local_irq_restore(flags);
489 }
490 
491 /*
492  * Batched hash table flush, we batch the tlbie's to avoid taking/releasing
493  * the lock all the time
494  */
495 static void native_flush_hash_range(unsigned long number, int local)
496 {
497  unsigned long vpn;
498  unsigned long hash, index, hidx, shift, slot;
499  struct hash_pte *hptep;
500  unsigned long hpte_v;
501  unsigned long want_v;
502  unsigned long flags;
503  real_pte_t pte;
504  struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);
505  unsigned long psize = batch->psize;
506  int ssize = batch->ssize;
507  int i;
508 
509  local_irq_save(flags);
510 
511  for (i = 0; i < number; i++) {
512  vpn = batch->vpn[i];
513  pte = batch->pte[i];
514 
515  pte_iterate_hashed_subpages(pte, psize, vpn, index, shift) {
516  hash = hpt_hash(vpn, shift, ssize);
517  hidx = __rpte_to_hidx(pte, index);
518  if (hidx & _PTEIDX_SECONDARY)
519  hash = ~hash;
520  slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
521  slot += hidx & _PTEIDX_GROUP_IX;
522  hptep = htab_address + slot;
523  want_v = hpte_encode_v(vpn, psize, ssize);
524  native_lock_hpte(hptep);
525  hpte_v = hptep->v;
526  if (!HPTE_V_COMPARE(hpte_v, want_v) ||
527  !(hpte_v & HPTE_V_VALID))
528  native_unlock_hpte(hptep);
529  else
530  hptep->v = 0;
532  }
533 
534  if (mmu_has_feature(MMU_FTR_TLBIEL) &&
535  mmu_psize_defs[psize].tlbiel && local) {
536  asm volatile("ptesync":::"memory");
537  for (i = 0; i < number; i++) {
538  vpn = batch->vpn[i];
539  pte = batch->pte[i];
540 
541  pte_iterate_hashed_subpages(pte, psize,
542  vpn, index, shift) {
543  __tlbiel(vpn, psize, ssize);
545  }
546  asm volatile("ptesync":::"memory");
547  } else {
548  int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
549 
550  if (lock_tlbie)
551  raw_spin_lock(&native_tlbie_lock);
552 
553  asm volatile("ptesync":::"memory");
554  for (i = 0; i < number; i++) {
555  vpn = batch->vpn[i];
556  pte = batch->pte[i];
557 
558  pte_iterate_hashed_subpages(pte, psize,
559  vpn, index, shift) {
560  __tlbie(vpn, psize, ssize);
562  }
563  asm volatile("eieio; tlbsync; ptesync":::"memory");
564 
565  if (lock_tlbie)
566  raw_spin_unlock(&native_tlbie_lock);
567  }
568 
569  local_irq_restore(flags);
570 }
571 
573 {
574  ppc_md.hpte_invalidate = native_hpte_invalidate;
575  ppc_md.hpte_updatepp = native_hpte_updatepp;
576  ppc_md.hpte_updateboltedpp = native_hpte_updateboltedpp;
577  ppc_md.hpte_insert = native_hpte_insert;
578  ppc_md.hpte_remove = native_hpte_remove;
579  ppc_md.hpte_clear_all = native_hpte_clear;
580  ppc_md.flush_hash_range = native_flush_hash_range;
581 }