Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
iommu.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2007-2008 Advanced Micro Devices, Inc.
3  * Author: Joerg Roedel <[email protected]>
4  *
5  * This program is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 as published
7  * by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software
16  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17  */
18 
19 #define pr_fmt(fmt) "%s: " fmt, __func__
20 
21 #include <linux/device.h>
22 #include <linux/kernel.h>
23 #include <linux/bug.h>
24 #include <linux/types.h>
25 #include <linux/module.h>
26 #include <linux/slab.h>
27 #include <linux/errno.h>
28 #include <linux/iommu.h>
29 #include <linux/idr.h>
30 #include <linux/notifier.h>
31 #include <linux/err.h>
32 
33 static struct kset *iommu_group_kset;
34 static struct ida iommu_group_ida;
35 static struct mutex iommu_group_mutex;
36 
37 struct iommu_group {
38  struct kobject kobj;
41  struct mutex mutex;
43  void *iommu_data;
45  char *name;
46  int id;
47 };
48 
49 struct iommu_device {
50  struct list_head list;
51  struct device *dev;
52  char *name;
53 };
54 
56  struct attribute attr;
57  ssize_t (*show)(struct iommu_group *group, char *buf);
59  const char *buf, size_t count);
60 };
61 
62 #define IOMMU_GROUP_ATTR(_name, _mode, _show, _store) \
63 struct iommu_group_attribute iommu_group_attr_##_name = \
64  __ATTR(_name, _mode, _show, _store)
65 
66 #define to_iommu_group_attr(_attr) \
67  container_of(_attr, struct iommu_group_attribute, attr)
68 #define to_iommu_group(_kobj) \
69  container_of(_kobj, struct iommu_group, kobj)
70 
71 static ssize_t iommu_group_attr_show(struct kobject *kobj,
72  struct attribute *__attr, char *buf)
73 {
75  struct iommu_group *group = to_iommu_group(kobj);
76  ssize_t ret = -EIO;
77 
78  if (attr->show)
79  ret = attr->show(group, buf);
80  return ret;
81 }
82 
83 static ssize_t iommu_group_attr_store(struct kobject *kobj,
84  struct attribute *__attr,
85  const char *buf, size_t count)
86 {
88  struct iommu_group *group = to_iommu_group(kobj);
89  ssize_t ret = -EIO;
90 
91  if (attr->store)
92  ret = attr->store(group, buf, count);
93  return ret;
94 }
95 
96 static const struct sysfs_ops iommu_group_sysfs_ops = {
97  .show = iommu_group_attr_show,
98  .store = iommu_group_attr_store,
99 };
100 
101 static int iommu_group_create_file(struct iommu_group *group,
102  struct iommu_group_attribute *attr)
103 {
104  return sysfs_create_file(&group->kobj, &attr->attr);
105 }
106 
107 static void iommu_group_remove_file(struct iommu_group *group,
108  struct iommu_group_attribute *attr)
109 {
110  sysfs_remove_file(&group->kobj, &attr->attr);
111 }
112 
113 static ssize_t iommu_group_show_name(struct iommu_group *group, char *buf)
114 {
115  return sprintf(buf, "%s\n", group->name);
116 }
117 
118 static IOMMU_GROUP_ATTR(name, S_IRUGO, iommu_group_show_name, NULL);
119 
120 static void iommu_group_release(struct kobject *kobj)
121 {
122  struct iommu_group *group = to_iommu_group(kobj);
123 
124  if (group->iommu_data_release)
125  group->iommu_data_release(group->iommu_data);
126 
127  mutex_lock(&iommu_group_mutex);
128  ida_remove(&iommu_group_ida, group->id);
129  mutex_unlock(&iommu_group_mutex);
130 
131  kfree(group->name);
132  kfree(group);
133 }
134 
135 static struct kobj_type iommu_group_ktype = {
136  .sysfs_ops = &iommu_group_sysfs_ops,
137  .release = iommu_group_release,
138 };
139 
153 {
154  struct iommu_group *group;
155  int ret;
156 
157  group = kzalloc(sizeof(*group), GFP_KERNEL);
158  if (!group)
159  return ERR_PTR(-ENOMEM);
160 
161  group->kobj.kset = iommu_group_kset;
162  mutex_init(&group->mutex);
163  INIT_LIST_HEAD(&group->devices);
165 
166  mutex_lock(&iommu_group_mutex);
167 
168 again:
169  if (unlikely(0 == ida_pre_get(&iommu_group_ida, GFP_KERNEL))) {
170  kfree(group);
171  mutex_unlock(&iommu_group_mutex);
172  return ERR_PTR(-ENOMEM);
173  }
174 
175  if (-EAGAIN == ida_get_new(&iommu_group_ida, &group->id))
176  goto again;
177 
178  mutex_unlock(&iommu_group_mutex);
179 
180  ret = kobject_init_and_add(&group->kobj, &iommu_group_ktype,
181  NULL, "%d", group->id);
182  if (ret) {
183  mutex_lock(&iommu_group_mutex);
184  ida_remove(&iommu_group_ida, group->id);
185  mutex_unlock(&iommu_group_mutex);
186  kfree(group);
187  return ERR_PTR(ret);
188  }
189 
190  group->devices_kobj = kobject_create_and_add("devices", &group->kobj);
191  if (!group->devices_kobj) {
192  kobject_put(&group->kobj); /* triggers .release & free */
193  return ERR_PTR(-ENOMEM);
194  }
195 
196  /*
197  * The devices_kobj holds a reference on the group kobject, so
198  * as long as that exists so will the group. We can therefore
199  * use the devices_kobj for reference counting.
200  */
201  kobject_put(&group->kobj);
202 
203  return group;
204 }
206 
216 {
217  return group->iommu_data;
218 }
220 
232  void (*release)(void *iommu_data))
233 {
234  group->iommu_data = iommu_data;
235  group->iommu_data_release = release;
236 }
238 
247 int iommu_group_set_name(struct iommu_group *group, const char *name)
248 {
249  int ret;
250 
251  if (group->name) {
252  iommu_group_remove_file(group, &iommu_group_attr_name);
253  kfree(group->name);
254  group->name = NULL;
255  if (!name)
256  return 0;
257  }
258 
259  group->name = kstrdup(name, GFP_KERNEL);
260  if (!group->name)
261  return -ENOMEM;
262 
263  ret = iommu_group_create_file(group, &iommu_group_attr_name);
264  if (ret) {
265  kfree(group->name);
266  group->name = NULL;
267  return ret;
268  }
269 
270  return 0;
271 }
273 
282 int iommu_group_add_device(struct iommu_group *group, struct device *dev)
283 {
284  int ret, i = 0;
285  struct iommu_device *device;
286 
287  device = kzalloc(sizeof(*device), GFP_KERNEL);
288  if (!device)
289  return -ENOMEM;
290 
291  device->dev = dev;
292 
293  ret = sysfs_create_link(&dev->kobj, &group->kobj, "iommu_group");
294  if (ret) {
295  kfree(device);
296  return ret;
297  }
298 
299  device->name = kasprintf(GFP_KERNEL, "%s", kobject_name(&dev->kobj));
300 rename:
301  if (!device->name) {
302  sysfs_remove_link(&dev->kobj, "iommu_group");
303  kfree(device);
304  return -ENOMEM;
305  }
306 
308  &dev->kobj, device->name);
309  if (ret) {
310  kfree(device->name);
311  if (ret == -EEXIST && i >= 0) {
312  /*
313  * Account for the slim chance of collision
314  * and append an instance to the name.
315  */
316  device->name = kasprintf(GFP_KERNEL, "%s.%d",
317  kobject_name(&dev->kobj), i++);
318  goto rename;
319  }
320 
321  sysfs_remove_link(&dev->kobj, "iommu_group");
322  kfree(device);
323  return ret;
324  }
325 
326  kobject_get(group->devices_kobj);
327 
328  dev->iommu_group = group;
329 
330  mutex_lock(&group->mutex);
331  list_add_tail(&device->list, &group->devices);
332  mutex_unlock(&group->mutex);
333 
334  /* Notify any listeners about change to group. */
336  IOMMU_GROUP_NOTIFY_ADD_DEVICE, dev);
337  return 0;
338 }
340 
349 {
350  struct iommu_group *group = dev->iommu_group;
351  struct iommu_device *tmp_device, *device = NULL;
352 
353  /* Pre-notify listeners that a device is being removed. */
355  IOMMU_GROUP_NOTIFY_DEL_DEVICE, dev);
356 
357  mutex_lock(&group->mutex);
358  list_for_each_entry(tmp_device, &group->devices, list) {
359  if (tmp_device->dev == dev) {
360  device = tmp_device;
361  list_del(&device->list);
362  break;
363  }
364  }
365  mutex_unlock(&group->mutex);
366 
367  if (!device)
368  return;
369 
370  sysfs_remove_link(group->devices_kobj, device->name);
371  sysfs_remove_link(&dev->kobj, "iommu_group");
372 
373  kfree(device->name);
374  kfree(device);
375  dev->iommu_group = NULL;
376  kobject_put(group->devices_kobj);
377 }
379 
391 int iommu_group_for_each_dev(struct iommu_group *group, void *data,
392  int (*fn)(struct device *, void *))
393 {
394  struct iommu_device *device;
395  int ret = 0;
396 
397  mutex_lock(&group->mutex);
398  list_for_each_entry(device, &group->devices, list) {
399  ret = fn(device->dev, data);
400  if (ret)
401  break;
402  }
403  mutex_unlock(&group->mutex);
404  return ret;
405 }
407 
417 {
418  struct iommu_group *group = dev->iommu_group;
419 
420  if (group)
421  kobject_get(group->devices_kobj);
422 
423  return group;
424 }
426 
434 void iommu_group_put(struct iommu_group *group)
435 {
436  if (group)
437  kobject_put(group->devices_kobj);
438 }
440 
451  struct notifier_block *nb)
452 {
453  return blocking_notifier_chain_register(&group->notifier, nb);
454 }
456 
465  struct notifier_block *nb)
466 {
467  return blocking_notifier_chain_unregister(&group->notifier, nb);
468 }
470 
477 int iommu_group_id(struct iommu_group *group)
478 {
479  return group->id;
480 }
482 
483 static int add_iommu_group(struct device *dev, void *data)
484 {
485  struct iommu_ops *ops = data;
486 
487  if (!ops->add_device)
488  return -ENODEV;
489 
490  WARN_ON(dev->iommu_group);
491 
492  ops->add_device(dev);
493 
494  return 0;
495 }
496 
497 static int iommu_bus_notifier(struct notifier_block *nb,
498  unsigned long action, void *data)
499 {
500  struct device *dev = data;
501  struct iommu_ops *ops = dev->bus->iommu_ops;
502  struct iommu_group *group;
503  unsigned long group_action = 0;
504 
505  /*
506  * ADD/DEL call into iommu driver ops if provided, which may
507  * result in ADD/DEL notifiers to group->notifier
508  */
509  if (action == BUS_NOTIFY_ADD_DEVICE) {
510  if (ops->add_device)
511  return ops->add_device(dev);
512  } else if (action == BUS_NOTIFY_DEL_DEVICE) {
513  if (ops->remove_device && dev->iommu_group) {
514  ops->remove_device(dev);
515  return 0;
516  }
517  }
518 
519  /*
520  * Remaining BUS_NOTIFYs get filtered and republished to the
521  * group, if anyone is listening
522  */
523  group = iommu_group_get(dev);
524  if (!group)
525  return 0;
526 
527  switch (action) {
529  group_action = IOMMU_GROUP_NOTIFY_BIND_DRIVER;
530  break;
532  group_action = IOMMU_GROUP_NOTIFY_BOUND_DRIVER;
533  break;
535  group_action = IOMMU_GROUP_NOTIFY_UNBIND_DRIVER;
536  break;
538  group_action = IOMMU_GROUP_NOTIFY_UNBOUND_DRIVER;
539  break;
540  }
541 
542  if (group_action)
544  group_action, dev);
545 
546  iommu_group_put(group);
547  return 0;
548 }
549 
550 static struct notifier_block iommu_bus_nb = {
551  .notifier_call = iommu_bus_notifier,
552 };
553 
554 static void iommu_bus_init(struct bus_type *bus, struct iommu_ops *ops)
555 {
556  bus_register_notifier(bus, &iommu_bus_nb);
557  bus_for_each_dev(bus, NULL, ops, add_iommu_group);
558 }
559 
573 int bus_set_iommu(struct bus_type *bus, struct iommu_ops *ops)
574 {
575  if (bus->iommu_ops != NULL)
576  return -EBUSY;
577 
578  bus->iommu_ops = ops;
579 
580  /* Do IOMMU specific setup for this bus-type */
581  iommu_bus_init(bus, ops);
582 
583  return 0;
584 }
586 
588 {
589  return bus->iommu_ops != NULL;
590 }
592 
606  iommu_fault_handler_t handler,
607  void *token)
608 {
609  BUG_ON(!domain);
610 
611  domain->handler = handler;
612  domain->handler_token = token;
613 }
615 
617 {
618  struct iommu_domain *domain;
619  int ret;
620 
621  if (bus == NULL || bus->iommu_ops == NULL)
622  return NULL;
623 
624  domain = kzalloc(sizeof(*domain), GFP_KERNEL);
625  if (!domain)
626  return NULL;
627 
628  domain->ops = bus->iommu_ops;
629 
630  ret = domain->ops->domain_init(domain);
631  if (ret)
632  goto out_free;
633 
634  return domain;
635 
636 out_free:
637  kfree(domain);
638 
639  return NULL;
640 }
642 
643 void iommu_domain_free(struct iommu_domain *domain)
644 {
645  if (likely(domain->ops->domain_destroy != NULL))
646  domain->ops->domain_destroy(domain);
647 
648  kfree(domain);
649 }
651 
652 int iommu_attach_device(struct iommu_domain *domain, struct device *dev)
653 {
654  if (unlikely(domain->ops->attach_dev == NULL))
655  return -ENODEV;
656 
657  return domain->ops->attach_dev(domain, dev);
658 }
660 
661 void iommu_detach_device(struct iommu_domain *domain, struct device *dev)
662 {
663  if (unlikely(domain->ops->detach_dev == NULL))
664  return;
665 
666  domain->ops->detach_dev(domain, dev);
667 }
669 
670 /*
671  * IOMMU groups are really the natrual working unit of the IOMMU, but
672  * the IOMMU API works on domains and devices. Bridge that gap by
673  * iterating over the devices in a group. Ideally we'd have a single
674  * device which represents the requestor ID of the group, but we also
675  * allow IOMMU drivers to create policy defined minimum sets, where
676  * the physical hardware may be able to distiguish members, but we
677  * wish to group them at a higher level (ex. untrusted multi-function
678  * PCI devices). Thus we attach each device.
679  */
680 static int iommu_group_do_attach_device(struct device *dev, void *data)
681 {
682  struct iommu_domain *domain = data;
683 
684  return iommu_attach_device(domain, dev);
685 }
686 
687 int iommu_attach_group(struct iommu_domain *domain, struct iommu_group *group)
688 {
689  return iommu_group_for_each_dev(group, domain,
690  iommu_group_do_attach_device);
691 }
693 
694 static int iommu_group_do_detach_device(struct device *dev, void *data)
695 {
696  struct iommu_domain *domain = data;
697 
698  iommu_detach_device(domain, dev);
699 
700  return 0;
701 }
702 
703 void iommu_detach_group(struct iommu_domain *domain, struct iommu_group *group)
704 {
705  iommu_group_for_each_dev(group, domain, iommu_group_do_detach_device);
706 }
708 
710  unsigned long iova)
711 {
712  if (unlikely(domain->ops->iova_to_phys == NULL))
713  return 0;
714 
715  return domain->ops->iova_to_phys(domain, iova);
716 }
718 
720  unsigned long cap)
721 {
722  if (unlikely(domain->ops->domain_has_cap == NULL))
723  return 0;
724 
725  return domain->ops->domain_has_cap(domain, cap);
726 }
728 
729 int iommu_map(struct iommu_domain *domain, unsigned long iova,
730  phys_addr_t paddr, size_t size, int prot)
731 {
732  unsigned long orig_iova = iova;
733  unsigned int min_pagesz;
734  size_t orig_size = size;
735  int ret = 0;
736 
737  if (unlikely(domain->ops->map == NULL))
738  return -ENODEV;
739 
740  /* find out the minimum page size supported */
741  min_pagesz = 1 << __ffs(domain->ops->pgsize_bitmap);
742 
743  /*
744  * both the virtual address and the physical one, as well as
745  * the size of the mapping, must be aligned (at least) to the
746  * size of the smallest page supported by the hardware
747  */
748  if (!IS_ALIGNED(iova | paddr | size, min_pagesz)) {
749  pr_err("unaligned: iova 0x%lx pa 0x%lx size 0x%lx min_pagesz "
750  "0x%x\n", iova, (unsigned long)paddr,
751  (unsigned long)size, min_pagesz);
752  return -EINVAL;
753  }
754 
755  pr_debug("map: iova 0x%lx pa 0x%lx size 0x%lx\n", iova,
756  (unsigned long)paddr, (unsigned long)size);
757 
758  while (size) {
759  unsigned long pgsize, addr_merge = iova | paddr;
760  unsigned int pgsize_idx;
761 
762  /* Max page size that still fits into 'size' */
763  pgsize_idx = __fls(size);
764 
765  /* need to consider alignment requirements ? */
766  if (likely(addr_merge)) {
767  /* Max page size allowed by both iova and paddr */
768  unsigned int align_pgsize_idx = __ffs(addr_merge);
769 
770  pgsize_idx = min(pgsize_idx, align_pgsize_idx);
771  }
772 
773  /* build a mask of acceptable page sizes */
774  pgsize = (1UL << (pgsize_idx + 1)) - 1;
775 
776  /* throw away page sizes not supported by the hardware */
777  pgsize &= domain->ops->pgsize_bitmap;
778 
779  /* make sure we're still sane */
780  BUG_ON(!pgsize);
781 
782  /* pick the biggest page */
783  pgsize_idx = __fls(pgsize);
784  pgsize = 1UL << pgsize_idx;
785 
786  pr_debug("mapping: iova 0x%lx pa 0x%lx pgsize %lu\n", iova,
787  (unsigned long)paddr, pgsize);
788 
789  ret = domain->ops->map(domain, iova, paddr, pgsize, prot);
790  if (ret)
791  break;
792 
793  iova += pgsize;
794  paddr += pgsize;
795  size -= pgsize;
796  }
797 
798  /* unroll mapping in case something went wrong */
799  if (ret)
800  iommu_unmap(domain, orig_iova, orig_size - size);
801 
802  return ret;
803 }
805 
806 size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size)
807 {
808  size_t unmapped_page, unmapped = 0;
809  unsigned int min_pagesz;
810 
811  if (unlikely(domain->ops->unmap == NULL))
812  return -ENODEV;
813 
814  /* find out the minimum page size supported */
815  min_pagesz = 1 << __ffs(domain->ops->pgsize_bitmap);
816 
817  /*
818  * The virtual address, as well as the size of the mapping, must be
819  * aligned (at least) to the size of the smallest page supported
820  * by the hardware
821  */
822  if (!IS_ALIGNED(iova | size, min_pagesz)) {
823  pr_err("unaligned: iova 0x%lx size 0x%lx min_pagesz 0x%x\n",
824  iova, (unsigned long)size, min_pagesz);
825  return -EINVAL;
826  }
827 
828  pr_debug("unmap this: iova 0x%lx size 0x%lx\n", iova,
829  (unsigned long)size);
830 
831  /*
832  * Keep iterating until we either unmap 'size' bytes (or more)
833  * or we hit an area that isn't mapped.
834  */
835  while (unmapped < size) {
836  size_t left = size - unmapped;
837 
838  unmapped_page = domain->ops->unmap(domain, iova, left);
839  if (!unmapped_page)
840  break;
841 
842  pr_debug("unmapped: iova 0x%lx size %lx\n", iova,
843  (unsigned long)unmapped_page);
844 
845  iova += unmapped_page;
846  unmapped += unmapped_page;
847  }
848 
849  return unmapped;
850 }
852 
853 static int __init iommu_init(void)
854 {
855  iommu_group_kset = kset_create_and_add("iommu_groups",
856  NULL, kernel_kobj);
857  ida_init(&iommu_group_ida);
858  mutex_init(&iommu_group_mutex);
859 
860  BUG_ON(!iommu_group_kset);
861 
862  return 0;
863 }
864 subsys_initcall(iommu_init);
865 
867  enum iommu_attr attr, void *data)
868 {
870  int ret = 0;
871 
872  switch (attr) {
874  geometry = data;
875  *geometry = domain->geometry;
876 
877  break;
878  default:
879  if (!domain->ops->domain_get_attr)
880  return -EINVAL;
881 
882  ret = domain->ops->domain_get_attr(domain, attr, data);
883  }
884 
885  return ret;
886 }
888 
890  enum iommu_attr attr, void *data)
891 {
892  if (!domain->ops->domain_set_attr)
893  return -EINVAL;
894 
895  return domain->ops->domain_set_attr(domain, attr, data);
896 }