Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
iommu.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation
3  *
4  * Rewrite, cleanup, new allocation schemes, virtual merging:
5  * Copyright (C) 2004 Olof Johansson, IBM Corporation
6  * and Ben. Herrenschmidt, IBM Corporation
7  *
8  * Dynamic DMA mapping support, bus-independent parts.
9  *
10  * This program is free software; you can redistribute it and/or modify
11  * it under the terms of the GNU General Public License as published by
12  * the Free Software Foundation; either version 2 of the License, or
13  * (at your option) any later version.
14  *
15  * This program is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18  * GNU General Public License for more details.
19  *
20  * You should have received a copy of the GNU General Public License
21  * along with this program; if not, write to the Free Software
22  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23  */
24 
25 
26 #include <linux/init.h>
27 #include <linux/types.h>
28 #include <linux/slab.h>
29 #include <linux/mm.h>
30 #include <linux/spinlock.h>
31 #include <linux/string.h>
32 #include <linux/dma-mapping.h>
33 #include <linux/bitmap.h>
34 #include <linux/iommu-helper.h>
35 #include <linux/crash_dump.h>
36 #include <linux/hash.h>
37 #include <linux/fault-inject.h>
38 #include <linux/pci.h>
39 #include <asm/io.h>
40 #include <asm/prom.h>
41 #include <asm/iommu.h>
42 #include <asm/pci-bridge.h>
43 #include <asm/machdep.h>
44 #include <asm/kdump.h>
45 #include <asm/fadump.h>
46 #include <asm/vio.h>
47 
48 #define DBG(...)
49 
50 static int novmerge;
51 
52 static void __iommu_free(struct iommu_table *, dma_addr_t, unsigned int);
53 
54 static int __init setup_iommu(char *str)
55 {
56  if (!strcmp(str, "novmerge"))
57  novmerge = 1;
58  else if (!strcmp(str, "vmerge"))
59  novmerge = 0;
60  return 1;
61 }
62 
63 __setup("iommu=", setup_iommu);
64 
65 static DEFINE_PER_CPU(unsigned int, iommu_pool_hash);
66 
67 /*
68  * We precalculate the hash to avoid doing it on every allocation.
69  *
70  * The hash is important to spread CPUs across all the pools. For example,
71  * on a POWER7 with 4 way SMT we want interrupts on the primary threads and
72  * with 4 pools all primary threads would map to the same pool.
73  */
74 static int __init setup_iommu_pool_hash(void)
75 {
76  unsigned int i;
77 
79  per_cpu(iommu_pool_hash, i) = hash_32(i, IOMMU_POOL_HASHBITS);
80 
81  return 0;
82 }
83 subsys_initcall(setup_iommu_pool_hash);
84 
85 #ifdef CONFIG_FAIL_IOMMU
86 
87 static DECLARE_FAULT_ATTR(fail_iommu);
88 
89 static int __init setup_fail_iommu(char *str)
90 {
91  return setup_fault_attr(&fail_iommu, str);
92 }
93 __setup("fail_iommu=", setup_fail_iommu);
94 
95 static bool should_fail_iommu(struct device *dev)
96 {
97  return dev->archdata.fail_iommu && should_fail(&fail_iommu, 1);
98 }
99 
100 static int __init fail_iommu_debugfs(void)
101 {
102  struct dentry *dir = fault_create_debugfs_attr("fail_iommu",
103  NULL, &fail_iommu);
104 
105  return IS_ERR(dir) ? PTR_ERR(dir) : 0;
106 }
107 late_initcall(fail_iommu_debugfs);
108 
109 static ssize_t fail_iommu_show(struct device *dev,
110  struct device_attribute *attr, char *buf)
111 {
112  return sprintf(buf, "%d\n", dev->archdata.fail_iommu);
113 }
114 
115 static ssize_t fail_iommu_store(struct device *dev,
116  struct device_attribute *attr, const char *buf,
117  size_t count)
118 {
119  int i;
120 
121  if (count > 0 && sscanf(buf, "%d", &i) > 0)
122  dev->archdata.fail_iommu = (i == 0) ? 0 : 1;
123 
124  return count;
125 }
126 
127 static DEVICE_ATTR(fail_iommu, S_IRUGO|S_IWUSR, fail_iommu_show,
128  fail_iommu_store);
129 
130 static int fail_iommu_bus_notify(struct notifier_block *nb,
131  unsigned long action, void *data)
132 {
133  struct device *dev = data;
134 
135  if (action == BUS_NOTIFY_ADD_DEVICE) {
136  if (device_create_file(dev, &dev_attr_fail_iommu))
137  pr_warn("Unable to create IOMMU fault injection sysfs "
138  "entries\n");
139  } else if (action == BUS_NOTIFY_DEL_DEVICE) {
140  device_remove_file(dev, &dev_attr_fail_iommu);
141  }
142 
143  return 0;
144 }
145 
146 static struct notifier_block fail_iommu_bus_notifier = {
147  .notifier_call = fail_iommu_bus_notify
148 };
149 
150 static int __init fail_iommu_setup(void)
151 {
152 #ifdef CONFIG_PCI
153  bus_register_notifier(&pci_bus_type, &fail_iommu_bus_notifier);
154 #endif
155 #ifdef CONFIG_IBMVIO
156  bus_register_notifier(&vio_bus_type, &fail_iommu_bus_notifier);
157 #endif
158 
159  return 0;
160 }
161 /*
162  * Must execute after PCI and VIO subsystem have initialised but before
163  * devices are probed.
164  */
165 arch_initcall(fail_iommu_setup);
166 #else
167 static inline bool should_fail_iommu(struct device *dev)
168 {
169  return false;
170 }
171 #endif
172 
173 static unsigned long iommu_range_alloc(struct device *dev,
174  struct iommu_table *tbl,
175  unsigned long npages,
176  unsigned long *handle,
177  unsigned long mask,
178  unsigned int align_order)
179 {
180  unsigned long n, end, start;
181  unsigned long limit;
182  int largealloc = npages > 15;
183  int pass = 0;
184  unsigned long align_mask;
185  unsigned long boundary_size;
186  unsigned long flags;
187  unsigned int pool_nr;
188  struct iommu_pool *pool;
189 
190  align_mask = 0xffffffffffffffffl >> (64 - align_order);
191 
192  /* This allocator was derived from x86_64's bit string search */
193 
194  /* Sanity check */
195  if (unlikely(npages == 0)) {
196  if (printk_ratelimit())
197  WARN_ON(1);
198  return DMA_ERROR_CODE;
199  }
200 
201  if (should_fail_iommu(dev))
202  return DMA_ERROR_CODE;
203 
204  /*
205  * We don't need to disable preemption here because any CPU can
206  * safely use any IOMMU pool.
207  */
208  pool_nr = __raw_get_cpu_var(iommu_pool_hash) & (tbl->nr_pools - 1);
209 
210  if (largealloc)
211  pool = &(tbl->large_pool);
212  else
213  pool = &(tbl->pools[pool_nr]);
214 
215  spin_lock_irqsave(&(pool->lock), flags);
216 
217 again:
218  if ((pass == 0) && handle && *handle &&
219  (*handle >= pool->start) && (*handle < pool->end))
220  start = *handle;
221  else
222  start = pool->hint;
223 
224  limit = pool->end;
225 
226  /* The case below can happen if we have a small segment appended
227  * to a large, or when the previous alloc was at the very end of
228  * the available space. If so, go back to the initial start.
229  */
230  if (start >= limit)
231  start = pool->start;
232 
233  if (limit + tbl->it_offset > mask) {
234  limit = mask - tbl->it_offset + 1;
235  /* If we're constrained on address range, first try
236  * at the masked hint to avoid O(n) search complexity,
237  * but on second pass, start at 0 in pool 0.
238  */
239  if ((start & mask) >= limit || pass > 0) {
240  spin_unlock(&(pool->lock));
241  pool = &(tbl->pools[0]);
242  spin_lock(&(pool->lock));
243  start = pool->start;
244  } else {
245  start &= mask;
246  }
247  }
248 
249  if (dev)
250  boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
251  1 << IOMMU_PAGE_SHIFT);
252  else
253  boundary_size = ALIGN(1UL << 32, 1 << IOMMU_PAGE_SHIFT);
254  /* 4GB boundary for iseries_hv_alloc and iseries_hv_map */
255 
256  n = iommu_area_alloc(tbl->it_map, limit, start, npages,
257  tbl->it_offset, boundary_size >> IOMMU_PAGE_SHIFT,
258  align_mask);
259  if (n == -1) {
260  if (likely(pass == 0)) {
261  /* First try the pool from the start */
262  pool->hint = pool->start;
263  pass++;
264  goto again;
265 
266  } else if (pass <= tbl->nr_pools) {
267  /* Now try scanning all the other pools */
268  spin_unlock(&(pool->lock));
269  pool_nr = (pool_nr + 1) & (tbl->nr_pools - 1);
270  pool = &tbl->pools[pool_nr];
271  spin_lock(&(pool->lock));
272  pool->hint = pool->start;
273  pass++;
274  goto again;
275 
276  } else {
277  /* Give up */
278  spin_unlock_irqrestore(&(pool->lock), flags);
279  return DMA_ERROR_CODE;
280  }
281  }
282 
283  end = n + npages;
284 
285  /* Bump the hint to a new block for small allocs. */
286  if (largealloc) {
287  /* Don't bump to new block to avoid fragmentation */
288  pool->hint = end;
289  } else {
290  /* Overflow will be taken care of at the next allocation */
291  pool->hint = (end + tbl->it_blocksize - 1) &
292  ~(tbl->it_blocksize - 1);
293  }
294 
295  /* Update handle for SG allocations */
296  if (handle)
297  *handle = end;
298 
299  spin_unlock_irqrestore(&(pool->lock), flags);
300 
301  return n;
302 }
303 
304 static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl,
305  void *page, unsigned int npages,
307  unsigned long mask, unsigned int align_order,
308  struct dma_attrs *attrs)
309 {
310  unsigned long entry;
312  int build_fail;
313 
314  entry = iommu_range_alloc(dev, tbl, npages, NULL, mask, align_order);
315 
316  if (unlikely(entry == DMA_ERROR_CODE))
317  return DMA_ERROR_CODE;
318 
319  entry += tbl->it_offset; /* Offset into real TCE table */
320  ret = entry << IOMMU_PAGE_SHIFT; /* Set the return dma address */
321 
322  /* Put the TCEs in the HW table */
323  build_fail = ppc_md.tce_build(tbl, entry, npages,
324  (unsigned long)page & IOMMU_PAGE_MASK,
325  direction, attrs);
326 
327  /* ppc_md.tce_build() only returns non-zero for transient errors.
328  * Clean up the table bitmap in this case and return
329  * DMA_ERROR_CODE. For all other errors the functionality is
330  * not altered.
331  */
332  if (unlikely(build_fail)) {
333  __iommu_free(tbl, ret, npages);
334  return DMA_ERROR_CODE;
335  }
336 
337  /* Flush/invalidate TLB caches if necessary */
338  if (ppc_md.tce_flush)
339  ppc_md.tce_flush(tbl);
340 
341  /* Make sure updates are seen by hardware */
342  mb();
343 
344  return ret;
345 }
346 
347 static bool iommu_free_check(struct iommu_table *tbl, dma_addr_t dma_addr,
348  unsigned int npages)
349 {
350  unsigned long entry, free_entry;
351 
352  entry = dma_addr >> IOMMU_PAGE_SHIFT;
353  free_entry = entry - tbl->it_offset;
354 
355  if (((free_entry + npages) > tbl->it_size) ||
356  (entry < tbl->it_offset)) {
357  if (printk_ratelimit()) {
358  printk(KERN_INFO "iommu_free: invalid entry\n");
359  printk(KERN_INFO "\tentry = 0x%lx\n", entry);
360  printk(KERN_INFO "\tdma_addr = 0x%llx\n", (u64)dma_addr);
361  printk(KERN_INFO "\tTable = 0x%llx\n", (u64)tbl);
362  printk(KERN_INFO "\tbus# = 0x%llx\n", (u64)tbl->it_busno);
363  printk(KERN_INFO "\tsize = 0x%llx\n", (u64)tbl->it_size);
364  printk(KERN_INFO "\tstartOff = 0x%llx\n", (u64)tbl->it_offset);
365  printk(KERN_INFO "\tindex = 0x%llx\n", (u64)tbl->it_index);
366  WARN_ON(1);
367  }
368 
369  return false;
370  }
371 
372  return true;
373 }
374 
375 static struct iommu_pool *get_pool(struct iommu_table *tbl,
376  unsigned long entry)
377 {
378  struct iommu_pool *p;
379  unsigned long largepool_start = tbl->large_pool.start;
380 
381  /* The large pool is the last pool at the top of the table */
382  if (entry >= largepool_start) {
383  p = &tbl->large_pool;
384  } else {
385  unsigned int pool_nr = entry / tbl->poolsize;
386 
387  BUG_ON(pool_nr > tbl->nr_pools);
388  p = &tbl->pools[pool_nr];
389  }
390 
391  return p;
392 }
393 
394 static void __iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
395  unsigned int npages)
396 {
397  unsigned long entry, free_entry;
398  unsigned long flags;
399  struct iommu_pool *pool;
400 
401  entry = dma_addr >> IOMMU_PAGE_SHIFT;
402  free_entry = entry - tbl->it_offset;
403 
404  pool = get_pool(tbl, free_entry);
405 
406  if (!iommu_free_check(tbl, dma_addr, npages))
407  return;
408 
409  ppc_md.tce_free(tbl, entry, npages);
410 
411  spin_lock_irqsave(&(pool->lock), flags);
412  bitmap_clear(tbl->it_map, free_entry, npages);
413  spin_unlock_irqrestore(&(pool->lock), flags);
414 }
415 
416 static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
417  unsigned int npages)
418 {
419  __iommu_free(tbl, dma_addr, npages);
420 
421  /* Make sure TLB cache is flushed if the HW needs it. We do
422  * not do an mb() here on purpose, it is not needed on any of
423  * the current platforms.
424  */
425  if (ppc_md.tce_flush)
426  ppc_md.tce_flush(tbl);
427 }
428 
429 int iommu_map_sg(struct device *dev, struct iommu_table *tbl,
430  struct scatterlist *sglist, int nelems,
431  unsigned long mask, enum dma_data_direction direction,
432  struct dma_attrs *attrs)
433 {
434  dma_addr_t dma_next = 0, dma_addr;
435  struct scatterlist *s, *outs, *segstart;
436  int outcount, incount, i, build_fail = 0;
437  unsigned int align;
438  unsigned long handle;
439  unsigned int max_seg_size;
440 
441  BUG_ON(direction == DMA_NONE);
442 
443  if ((nelems == 0) || !tbl)
444  return 0;
445 
446  outs = s = segstart = &sglist[0];
447  outcount = 1;
448  incount = nelems;
449  handle = 0;
450 
451  /* Init first segment length for backout at failure */
452  outs->dma_length = 0;
453 
454  DBG("sg mapping %d elements:\n", nelems);
455 
456  max_seg_size = dma_get_max_seg_size(dev);
457  for_each_sg(sglist, s, nelems, i) {
458  unsigned long vaddr, npages, entry, slen;
459 
460  slen = s->length;
461  /* Sanity check */
462  if (slen == 0) {
463  dma_next = 0;
464  continue;
465  }
466  /* Allocate iommu entries for that segment */
467  vaddr = (unsigned long) sg_virt(s);
468  npages = iommu_num_pages(vaddr, slen, IOMMU_PAGE_SIZE);
469  align = 0;
470  if (IOMMU_PAGE_SHIFT < PAGE_SHIFT && slen >= PAGE_SIZE &&
471  (vaddr & ~PAGE_MASK) == 0)
472  align = PAGE_SHIFT - IOMMU_PAGE_SHIFT;
473  entry = iommu_range_alloc(dev, tbl, npages, &handle,
474  mask >> IOMMU_PAGE_SHIFT, align);
475 
476  DBG(" - vaddr: %lx, size: %lx\n", vaddr, slen);
477 
478  /* Handle failure */
479  if (unlikely(entry == DMA_ERROR_CODE)) {
480  if (printk_ratelimit())
481  dev_info(dev, "iommu_alloc failed, tbl %p "
482  "vaddr %lx npages %lu\n", tbl, vaddr,
483  npages);
484  goto failure;
485  }
486 
487  /* Convert entry to a dma_addr_t */
488  entry += tbl->it_offset;
489  dma_addr = entry << IOMMU_PAGE_SHIFT;
490  dma_addr |= (s->offset & ~IOMMU_PAGE_MASK);
491 
492  DBG(" - %lu pages, entry: %lx, dma_addr: %lx\n",
493  npages, entry, dma_addr);
494 
495  /* Insert into HW table */
496  build_fail = ppc_md.tce_build(tbl, entry, npages,
497  vaddr & IOMMU_PAGE_MASK,
498  direction, attrs);
499  if(unlikely(build_fail))
500  goto failure;
501 
502  /* If we are in an open segment, try merging */
503  if (segstart != s) {
504  DBG(" - trying merge...\n");
505  /* We cannot merge if:
506  * - allocated dma_addr isn't contiguous to previous allocation
507  */
508  if (novmerge || (dma_addr != dma_next) ||
509  (outs->dma_length + s->length > max_seg_size)) {
510  /* Can't merge: create a new segment */
511  segstart = s;
512  outcount++;
513  outs = sg_next(outs);
514  DBG(" can't merge, new segment.\n");
515  } else {
516  outs->dma_length += s->length;
517  DBG(" merged, new len: %ux\n", outs->dma_length);
518  }
519  }
520 
521  if (segstart == s) {
522  /* This is a new segment, fill entries */
523  DBG(" - filling new segment.\n");
524  outs->dma_address = dma_addr;
525  outs->dma_length = slen;
526  }
527 
528  /* Calculate next page pointer for contiguous check */
529  dma_next = dma_addr + slen;
530 
531  DBG(" - dma next is: %lx\n", dma_next);
532  }
533 
534  /* Flush/invalidate TLB caches if necessary */
535  if (ppc_md.tce_flush)
536  ppc_md.tce_flush(tbl);
537 
538  DBG("mapped %d elements:\n", outcount);
539 
540  /* For the sake of iommu_unmap_sg, we clear out the length in the
541  * next entry of the sglist if we didn't fill the list completely
542  */
543  if (outcount < incount) {
544  outs = sg_next(outs);
545  outs->dma_address = DMA_ERROR_CODE;
546  outs->dma_length = 0;
547  }
548 
549  /* Make sure updates are seen by hardware */
550  mb();
551 
552  return outcount;
553 
554  failure:
555  for_each_sg(sglist, s, nelems, i) {
556  if (s->dma_length != 0) {
557  unsigned long vaddr, npages;
558 
559  vaddr = s->dma_address & IOMMU_PAGE_MASK;
560  npages = iommu_num_pages(s->dma_address, s->dma_length,
561  IOMMU_PAGE_SIZE);
562  __iommu_free(tbl, vaddr, npages);
564  s->dma_length = 0;
565  }
566  if (s == outs)
567  break;
568  }
569  return 0;
570 }
571 
572 
573 void iommu_unmap_sg(struct iommu_table *tbl, struct scatterlist *sglist,
574  int nelems, enum dma_data_direction direction,
575  struct dma_attrs *attrs)
576 {
577  struct scatterlist *sg;
578 
579  BUG_ON(direction == DMA_NONE);
580 
581  if (!tbl)
582  return;
583 
584  sg = sglist;
585  while (nelems--) {
586  unsigned int npages;
588 
589  if (sg->dma_length == 0)
590  break;
591  npages = iommu_num_pages(dma_handle, sg->dma_length,
592  IOMMU_PAGE_SIZE);
593  __iommu_free(tbl, dma_handle, npages);
594  sg = sg_next(sg);
595  }
596 
597  /* Flush/invalidate TLBs if necessary. As for iommu_free(), we
598  * do not do an mb() here, the affected platforms do not need it
599  * when freeing.
600  */
601  if (ppc_md.tce_flush)
602  ppc_md.tce_flush(tbl);
603 }
604 
605 static void iommu_table_clear(struct iommu_table *tbl)
606 {
607  /*
608  * In case of firmware assisted dump system goes through clean
609  * reboot process at the time of system crash. Hence it's safe to
610  * clear the TCE entries if firmware assisted dump is active.
611  */
612  if (!is_kdump_kernel() || is_fadump_active()) {
613  /* Clear the table in case firmware left allocations in it */
614  ppc_md.tce_free(tbl, tbl->it_offset, tbl->it_size);
615  return;
616  }
617 
618 #ifdef CONFIG_CRASH_DUMP
619  if (ppc_md.tce_get) {
620  unsigned long index, tceval, tcecount = 0;
621 
622  /* Reserve the existing mappings left by the first kernel. */
623  for (index = 0; index < tbl->it_size; index++) {
624  tceval = ppc_md.tce_get(tbl, index + tbl->it_offset);
625  /*
626  * Freed TCE entry contains 0x7fffffffffffffff on JS20
627  */
628  if (tceval && (tceval != 0x7fffffffffffffffUL)) {
629  __set_bit(index, tbl->it_map);
630  tcecount++;
631  }
632  }
633 
634  if ((tbl->it_size - tcecount) < KDUMP_MIN_TCE_ENTRIES) {
635  printk(KERN_WARNING "TCE table is full; freeing ");
636  printk(KERN_WARNING "%d entries for the kdump boot\n",
637  KDUMP_MIN_TCE_ENTRIES);
638  for (index = tbl->it_size - KDUMP_MIN_TCE_ENTRIES;
639  index < tbl->it_size; index++)
640  __clear_bit(index, tbl->it_map);
641  }
642  }
643 #endif
644 }
645 
646 /*
647  * Build a iommu_table structure. This contains a bit map which
648  * is used to manage allocation of the tce space.
649  */
650 struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid)
651 {
652  unsigned long sz;
653  static int welcomed = 0;
654  struct page *page;
655  unsigned int i;
656  struct iommu_pool *p;
657 
658  /* number of bytes needed for the bitmap */
659  sz = (tbl->it_size + 7) >> 3;
660 
661  page = alloc_pages_node(nid, GFP_ATOMIC, get_order(sz));
662  if (!page)
663  panic("iommu_init_table: Can't allocate %ld bytes\n", sz);
664  tbl->it_map = page_address(page);
665  memset(tbl->it_map, 0, sz);
666 
667  /*
668  * Reserve page 0 so it will not be used for any mappings.
669  * This avoids buggy drivers that consider page 0 to be invalid
670  * to crash the machine or even lose data.
671  */
672  if (tbl->it_offset == 0)
673  set_bit(0, tbl->it_map);
674 
675  /* We only split the IOMMU table if we have 1GB or more of space */
676  if ((tbl->it_size << IOMMU_PAGE_SHIFT) >= (1UL * 1024 * 1024 * 1024))
677  tbl->nr_pools = IOMMU_NR_POOLS;
678  else
679  tbl->nr_pools = 1;
680 
681  /* We reserve the top 1/4 of the table for large allocations */
682  tbl->poolsize = (tbl->it_size * 3 / 4) / tbl->nr_pools;
683 
684  for (i = 0; i < tbl->nr_pools; i++) {
685  p = &tbl->pools[i];
686  spin_lock_init(&(p->lock));
687  p->start = tbl->poolsize * i;
688  p->hint = p->start;
689  p->end = p->start + tbl->poolsize;
690  }
691 
692  p = &tbl->large_pool;
693  spin_lock_init(&(p->lock));
694  p->start = tbl->poolsize * i;
695  p->hint = p->start;
696  p->end = tbl->it_size;
697 
698  iommu_table_clear(tbl);
699 
700  if (!welcomed) {
701  printk(KERN_INFO "IOMMU table initialized, virtual merging %s\n",
702  novmerge ? "disabled" : "enabled");
703  welcomed = 1;
704  }
705 
706  return tbl;
707 }
708 
709 void iommu_free_table(struct iommu_table *tbl, const char *node_name)
710 {
711  unsigned long bitmap_sz, i;
712  unsigned int order;
713 
714  if (!tbl || !tbl->it_map) {
715  printk(KERN_ERR "%s: expected TCE map for %s\n", __func__,
716  node_name);
717  return;
718  }
719 
720  /* verify that table contains no entries */
721  /* it_size is in entries, and we're examining 64 at a time */
722  for (i = 0; i < (tbl->it_size/64); i++) {
723  if (tbl->it_map[i] != 0) {
724  printk(KERN_WARNING "%s: Unexpected TCEs for %s\n",
725  __func__, node_name);
726  break;
727  }
728  }
729 
730  /* calculate bitmap size in bytes */
731  bitmap_sz = (tbl->it_size + 7) / 8;
732 
733  /* free bitmap */
734  order = get_order(bitmap_sz);
735  free_pages((unsigned long) tbl->it_map, order);
736 
737  /* free table */
738  kfree(tbl);
739 }
740 
741 /* Creates TCEs for a user provided buffer. The user buffer must be
742  * contiguous real kernel storage (not vmalloc). The address passed here
743  * comprises a page address and offset into that page. The dma_addr_t
744  * returned will point to the same byte within the page as was passed in.
745  */
746 dma_addr_t iommu_map_page(struct device *dev, struct iommu_table *tbl,
747  struct page *page, unsigned long offset, size_t size,
748  unsigned long mask, enum dma_data_direction direction,
749  struct dma_attrs *attrs)
750 {
752  void *vaddr;
753  unsigned long uaddr;
754  unsigned int npages, align;
755 
756  BUG_ON(direction == DMA_NONE);
757 
758  vaddr = page_address(page) + offset;
759  uaddr = (unsigned long)vaddr;
760  npages = iommu_num_pages(uaddr, size, IOMMU_PAGE_SIZE);
761 
762  if (tbl) {
763  align = 0;
764  if (IOMMU_PAGE_SHIFT < PAGE_SHIFT && size >= PAGE_SIZE &&
765  ((unsigned long)vaddr & ~PAGE_MASK) == 0)
766  align = PAGE_SHIFT - IOMMU_PAGE_SHIFT;
767 
768  dma_handle = iommu_alloc(dev, tbl, vaddr, npages, direction,
769  mask >> IOMMU_PAGE_SHIFT, align,
770  attrs);
771  if (dma_handle == DMA_ERROR_CODE) {
772  if (printk_ratelimit()) {
773  dev_info(dev, "iommu_alloc failed, tbl %p "
774  "vaddr %p npages %d\n", tbl, vaddr,
775  npages);
776  }
777  } else
778  dma_handle |= (uaddr & ~IOMMU_PAGE_MASK);
779  }
780 
781  return dma_handle;
782 }
783 
785  size_t size, enum dma_data_direction direction,
786  struct dma_attrs *attrs)
787 {
788  unsigned int npages;
789 
790  BUG_ON(direction == DMA_NONE);
791 
792  if (tbl) {
793  npages = iommu_num_pages(dma_handle, size, IOMMU_PAGE_SIZE);
794  iommu_free(tbl, dma_handle, npages);
795  }
796 }
797 
798 /* Allocates a contiguous real buffer and creates mappings over it.
799  * Returns the virtual address of the buffer and sets dma_handle
800  * to the dma address (mapping) of the first page.
801  */
802 void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl,
803  size_t size, dma_addr_t *dma_handle,
804  unsigned long mask, gfp_t flag, int node)
805 {
806  void *ret = NULL;
808  unsigned int order;
809  unsigned int nio_pages, io_order;
810  struct page *page;
811 
812  size = PAGE_ALIGN(size);
813  order = get_order(size);
814 
815  /*
816  * Client asked for way too much space. This is checked later
817  * anyway. It is easier to debug here for the drivers than in
818  * the tce tables.
819  */
820  if (order >= IOMAP_MAX_ORDER) {
821  dev_info(dev, "iommu_alloc_consistent size too large: 0x%lx\n",
822  size);
823  return NULL;
824  }
825 
826  if (!tbl)
827  return NULL;
828 
829  /* Alloc enough pages (and possibly more) */
830  page = alloc_pages_node(node, flag, order);
831  if (!page)
832  return NULL;
833  ret = page_address(page);
834  memset(ret, 0, size);
835 
836  /* Set up tces to cover the allocated range */
837  nio_pages = size >> IOMMU_PAGE_SHIFT;
838  io_order = get_iommu_order(size);
839  mapping = iommu_alloc(dev, tbl, ret, nio_pages, DMA_BIDIRECTIONAL,
840  mask >> IOMMU_PAGE_SHIFT, io_order, NULL);
841  if (mapping == DMA_ERROR_CODE) {
842  free_pages((unsigned long)ret, order);
843  return NULL;
844  }
845  *dma_handle = mapping;
846  return ret;
847 }
848 
849 void iommu_free_coherent(struct iommu_table *tbl, size_t size,
850  void *vaddr, dma_addr_t dma_handle)
851 {
852  if (tbl) {
853  unsigned int nio_pages;
854 
855  size = PAGE_ALIGN(size);
856  nio_pages = size >> IOMMU_PAGE_SHIFT;
857  iommu_free(tbl, dma_handle, nio_pages);
858  size = PAGE_ALIGN(size);
859  free_pages((unsigned long)vaddr, get_order(size));
860  }
861 }