Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
amd_iommu_v2.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2010-2012 Advanced Micro Devices, Inc.
3  * Author: Joerg Roedel <[email protected]>
4  *
5  * This program is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 as published
7  * by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software
16  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17  */
18 
19 #include <linux/mmu_notifier.h>
20 #include <linux/amd-iommu.h>
21 #include <linux/mm_types.h>
22 #include <linux/profile.h>
23 #include <linux/module.h>
24 #include <linux/sched.h>
25 #include <linux/iommu.h>
26 #include <linux/wait.h>
27 #include <linux/pci.h>
28 #include <linux/gfp.h>
29 
30 #include "amd_iommu_types.h"
31 #include "amd_iommu_proto.h"
32 
33 MODULE_LICENSE("GPL v2");
34 MODULE_AUTHOR("Joerg Roedel <[email protected]>");
35 
36 #define MAX_DEVICES 0x10000
37 #define PRI_QUEUE_SIZE 512
38 
39 struct pri_queue {
41  bool finish;
42  int status;
43 };
44 
45 struct pasid_state {
46  struct list_head list; /* For global state-list */
47  atomic_t count; /* Reference count */
48  struct task_struct *task; /* Task bound to this PASID */
49  struct mm_struct *mm; /* mm_struct for the faults */
50  struct mmu_notifier mn; /* mmu_otifier handle */
51  struct pri_queue pri[PRI_QUEUE_SIZE]; /* PRI tag states */
52  struct device_state *device_state; /* Link to our device_state */
53  int pasid; /* PASID index */
54  spinlock_t lock; /* Protect pri_queues */
55  wait_queue_head_t wq; /* To wait for count == 0 */
56 };
57 
58 struct device_state {
60  struct pci_dev *pdev;
61  struct pasid_state **states;
65  amd_iommu_invalid_ppr_cb inv_ppr_cb;
66  amd_iommu_invalidate_ctx inv_ctx_cb;
69 };
70 
71 struct fault {
72  struct work_struct work;
74  struct pasid_state *state;
75  struct mm_struct *mm;
82 };
83 
84 static struct device_state **state_table;
85 static spinlock_t state_lock;
86 
87 /* List and lock for all pasid_states */
88 static LIST_HEAD(pasid_state_list);
89 static DEFINE_SPINLOCK(ps_lock);
90 
91 static struct workqueue_struct *iommu_wq;
92 
93 /*
94  * Empty page table - Used between
95  * mmu_notifier_invalidate_range_start and
96  * mmu_notifier_invalidate_range_end
97  */
98 static u64 *empty_page_table;
99 
100 static void free_pasid_states(struct device_state *dev_state);
101 static void unbind_pasid(struct device_state *dev_state, int pasid);
102 static int task_exit(struct notifier_block *nb, unsigned long e, void *data);
103 
104 static u16 device_id(struct pci_dev *pdev)
105 {
106  u16 devid;
107 
108  devid = pdev->bus->number;
109  devid = (devid << 8) | pdev->devfn;
110 
111  return devid;
112 }
113 
114 static struct device_state *get_device_state(u16 devid)
115 {
116  struct device_state *dev_state;
117  unsigned long flags;
118 
119  spin_lock_irqsave(&state_lock, flags);
120  dev_state = state_table[devid];
121  if (dev_state != NULL)
122  atomic_inc(&dev_state->count);
123  spin_unlock_irqrestore(&state_lock, flags);
124 
125  return dev_state;
126 }
127 
128 static void free_device_state(struct device_state *dev_state)
129 {
130  /*
131  * First detach device from domain - No more PRI requests will arrive
132  * from that device after it is unbound from the IOMMUv2 domain.
133  */
134  iommu_detach_device(dev_state->domain, &dev_state->pdev->dev);
135 
136  /* Everything is down now, free the IOMMUv2 domain */
137  iommu_domain_free(dev_state->domain);
138 
139  /* Finally get rid of the device-state */
140  kfree(dev_state);
141 }
142 
143 static void put_device_state(struct device_state *dev_state)
144 {
145  if (atomic_dec_and_test(&dev_state->count))
146  wake_up(&dev_state->wq);
147 }
148 
149 static void put_device_state_wait(struct device_state *dev_state)
150 {
151  DEFINE_WAIT(wait);
152 
154  if (!atomic_dec_and_test(&dev_state->count))
155  schedule();
156  finish_wait(&dev_state->wq, &wait);
157 
158  free_device_state(dev_state);
159 }
160 
161 static struct notifier_block profile_nb = {
162  .notifier_call = task_exit,
163 };
164 
165 static void link_pasid_state(struct pasid_state *pasid_state)
166 {
167  spin_lock(&ps_lock);
168  list_add_tail(&pasid_state->list, &pasid_state_list);
169  spin_unlock(&ps_lock);
170 }
171 
172 static void __unlink_pasid_state(struct pasid_state *pasid_state)
173 {
174  list_del(&pasid_state->list);
175 }
176 
177 static void unlink_pasid_state(struct pasid_state *pasid_state)
178 {
179  spin_lock(&ps_lock);
180  __unlink_pasid_state(pasid_state);
181  spin_unlock(&ps_lock);
182 }
183 
184 /* Must be called under dev_state->lock */
185 static struct pasid_state **__get_pasid_state_ptr(struct device_state *dev_state,
186  int pasid, bool alloc)
187 {
188  struct pasid_state **root, **ptr;
189  int level, index;
190 
191  level = dev_state->pasid_levels;
192  root = dev_state->states;
193 
194  while (true) {
195 
196  index = (pasid >> (9 * level)) & 0x1ff;
197  ptr = &root[index];
198 
199  if (level == 0)
200  break;
201 
202  if (*ptr == NULL) {
203  if (!alloc)
204  return NULL;
205 
206  *ptr = (void *)get_zeroed_page(GFP_ATOMIC);
207  if (*ptr == NULL)
208  return NULL;
209  }
210 
211  root = (struct pasid_state **)*ptr;
212  level -= 1;
213  }
214 
215  return ptr;
216 }
217 
218 static int set_pasid_state(struct device_state *dev_state,
219  struct pasid_state *pasid_state,
220  int pasid)
221 {
222  struct pasid_state **ptr;
223  unsigned long flags;
224  int ret;
225 
226  spin_lock_irqsave(&dev_state->lock, flags);
227  ptr = __get_pasid_state_ptr(dev_state, pasid, true);
228 
229  ret = -ENOMEM;
230  if (ptr == NULL)
231  goto out_unlock;
232 
233  ret = -ENOMEM;
234  if (*ptr != NULL)
235  goto out_unlock;
236 
237  *ptr = pasid_state;
238 
239  ret = 0;
240 
241 out_unlock:
242  spin_unlock_irqrestore(&dev_state->lock, flags);
243 
244  return ret;
245 }
246 
247 static void clear_pasid_state(struct device_state *dev_state, int pasid)
248 {
249  struct pasid_state **ptr;
250  unsigned long flags;
251 
252  spin_lock_irqsave(&dev_state->lock, flags);
253  ptr = __get_pasid_state_ptr(dev_state, pasid, true);
254 
255  if (ptr == NULL)
256  goto out_unlock;
257 
258  *ptr = NULL;
259 
260 out_unlock:
261  spin_unlock_irqrestore(&dev_state->lock, flags);
262 }
263 
264 static struct pasid_state *get_pasid_state(struct device_state *dev_state,
265  int pasid)
266 {
267  struct pasid_state **ptr, *ret = NULL;
268  unsigned long flags;
269 
270  spin_lock_irqsave(&dev_state->lock, flags);
271  ptr = __get_pasid_state_ptr(dev_state, pasid, false);
272 
273  if (ptr == NULL)
274  goto out_unlock;
275 
276  ret = *ptr;
277  if (ret)
278  atomic_inc(&ret->count);
279 
280 out_unlock:
281  spin_unlock_irqrestore(&dev_state->lock, flags);
282 
283  return ret;
284 }
285 
286 static void free_pasid_state(struct pasid_state *pasid_state)
287 {
288  kfree(pasid_state);
289 }
290 
291 static void put_pasid_state(struct pasid_state *pasid_state)
292 {
293  if (atomic_dec_and_test(&pasid_state->count)) {
294  put_device_state(pasid_state->device_state);
295  wake_up(&pasid_state->wq);
296  }
297 }
298 
299 static void put_pasid_state_wait(struct pasid_state *pasid_state)
300 {
301  DEFINE_WAIT(wait);
302 
303  prepare_to_wait(&pasid_state->wq, &wait, TASK_UNINTERRUPTIBLE);
304 
305  if (atomic_dec_and_test(&pasid_state->count))
306  put_device_state(pasid_state->device_state);
307  else
308  schedule();
309 
310  finish_wait(&pasid_state->wq, &wait);
311  mmput(pasid_state->mm);
312  free_pasid_state(pasid_state);
313 }
314 
315 static void __unbind_pasid(struct pasid_state *pasid_state)
316 {
317  struct iommu_domain *domain;
318 
319  domain = pasid_state->device_state->domain;
320 
321  amd_iommu_domain_clear_gcr3(domain, pasid_state->pasid);
322  clear_pasid_state(pasid_state->device_state, pasid_state->pasid);
323 
324  /* Make sure no more pending faults are in the queue */
325  flush_workqueue(iommu_wq);
326 
327  mmu_notifier_unregister(&pasid_state->mn, pasid_state->mm);
328 
329  put_pasid_state(pasid_state); /* Reference taken in bind() function */
330 }
331 
332 static void unbind_pasid(struct device_state *dev_state, int pasid)
333 {
334  struct pasid_state *pasid_state;
335 
336  pasid_state = get_pasid_state(dev_state, pasid);
337  if (pasid_state == NULL)
338  return;
339 
340  unlink_pasid_state(pasid_state);
341  __unbind_pasid(pasid_state);
342  put_pasid_state_wait(pasid_state); /* Reference taken in this function */
343 }
344 
345 static void free_pasid_states_level1(struct pasid_state **tbl)
346 {
347  int i;
348 
349  for (i = 0; i < 512; ++i) {
350  if (tbl[i] == NULL)
351  continue;
352 
353  free_page((unsigned long)tbl[i]);
354  }
355 }
356 
357 static void free_pasid_states_level2(struct pasid_state **tbl)
358 {
359  struct pasid_state **ptr;
360  int i;
361 
362  for (i = 0; i < 512; ++i) {
363  if (tbl[i] == NULL)
364  continue;
365 
366  ptr = (struct pasid_state **)tbl[i];
367  free_pasid_states_level1(ptr);
368  }
369 }
370 
371 static void free_pasid_states(struct device_state *dev_state)
372 {
373  struct pasid_state *pasid_state;
374  int i;
375 
376  for (i = 0; i < dev_state->max_pasids; ++i) {
377  pasid_state = get_pasid_state(dev_state, i);
378  if (pasid_state == NULL)
379  continue;
380 
381  put_pasid_state(pasid_state);
382  unbind_pasid(dev_state, i);
383  }
384 
385  if (dev_state->pasid_levels == 2)
386  free_pasid_states_level2(dev_state->states);
387  else if (dev_state->pasid_levels == 1)
388  free_pasid_states_level1(dev_state->states);
389  else if (dev_state->pasid_levels != 0)
390  BUG();
391 
392  free_page((unsigned long)dev_state->states);
393 }
394 
395 static struct pasid_state *mn_to_state(struct mmu_notifier *mn)
396 {
397  return container_of(mn, struct pasid_state, mn);
398 }
399 
400 static void __mn_flush_page(struct mmu_notifier *mn,
401  unsigned long address)
402 {
403  struct pasid_state *pasid_state;
404  struct device_state *dev_state;
405 
406  pasid_state = mn_to_state(mn);
407  dev_state = pasid_state->device_state;
408 
409  amd_iommu_flush_page(dev_state->domain, pasid_state->pasid, address);
410 }
411 
412 static int mn_clear_flush_young(struct mmu_notifier *mn,
413  struct mm_struct *mm,
414  unsigned long address)
415 {
416  __mn_flush_page(mn, address);
417 
418  return 0;
419 }
420 
421 static void mn_change_pte(struct mmu_notifier *mn,
422  struct mm_struct *mm,
423  unsigned long address,
424  pte_t pte)
425 {
426  __mn_flush_page(mn, address);
427 }
428 
429 static void mn_invalidate_page(struct mmu_notifier *mn,
430  struct mm_struct *mm,
431  unsigned long address)
432 {
433  __mn_flush_page(mn, address);
434 }
435 
436 static void mn_invalidate_range_start(struct mmu_notifier *mn,
437  struct mm_struct *mm,
438  unsigned long start, unsigned long end)
439 {
440  struct pasid_state *pasid_state;
441  struct device_state *dev_state;
442 
443  pasid_state = mn_to_state(mn);
444  dev_state = pasid_state->device_state;
445 
446  amd_iommu_domain_set_gcr3(dev_state->domain, pasid_state->pasid,
447  __pa(empty_page_table));
448 }
449 
450 static void mn_invalidate_range_end(struct mmu_notifier *mn,
451  struct mm_struct *mm,
452  unsigned long start, unsigned long end)
453 {
454  struct pasid_state *pasid_state;
455  struct device_state *dev_state;
456 
457  pasid_state = mn_to_state(mn);
458  dev_state = pasid_state->device_state;
459 
460  amd_iommu_domain_set_gcr3(dev_state->domain, pasid_state->pasid,
461  __pa(pasid_state->mm->pgd));
462 }
463 
464 static struct mmu_notifier_ops iommu_mn = {
465  .clear_flush_young = mn_clear_flush_young,
466  .change_pte = mn_change_pte,
467  .invalidate_page = mn_invalidate_page,
468  .invalidate_range_start = mn_invalidate_range_start,
469  .invalidate_range_end = mn_invalidate_range_end,
470 };
471 
472 static void set_pri_tag_status(struct pasid_state *pasid_state,
473  u16 tag, int status)
474 {
475  unsigned long flags;
476 
477  spin_lock_irqsave(&pasid_state->lock, flags);
478  pasid_state->pri[tag].status = status;
479  spin_unlock_irqrestore(&pasid_state->lock, flags);
480 }
481 
482 static void finish_pri_tag(struct device_state *dev_state,
483  struct pasid_state *pasid_state,
484  u16 tag)
485 {
486  unsigned long flags;
487 
488  spin_lock_irqsave(&pasid_state->lock, flags);
489  if (atomic_dec_and_test(&pasid_state->pri[tag].inflight) &&
490  pasid_state->pri[tag].finish) {
491  amd_iommu_complete_ppr(dev_state->pdev, pasid_state->pasid,
492  pasid_state->pri[tag].status, tag);
493  pasid_state->pri[tag].finish = false;
494  pasid_state->pri[tag].status = PPR_SUCCESS;
495  }
496  spin_unlock_irqrestore(&pasid_state->lock, flags);
497 }
498 
499 static void do_fault(struct work_struct *work)
500 {
501  struct fault *fault = container_of(work, struct fault, work);
502  int npages, write;
503  struct page *page;
504 
505  write = !!(fault->flags & PPR_FAULT_WRITE);
506 
507  npages = get_user_pages(fault->state->task, fault->state->mm,
508  fault->address, 1, write, 0, &page, NULL);
509 
510  if (npages == 1) {
511  put_page(page);
512  } else if (fault->dev_state->inv_ppr_cb) {
513  int status;
514 
515  status = fault->dev_state->inv_ppr_cb(fault->dev_state->pdev,
516  fault->pasid,
517  fault->address,
518  fault->flags);
519  switch (status) {
520  case AMD_IOMMU_INV_PRI_RSP_SUCCESS:
521  set_pri_tag_status(fault->state, fault->tag, PPR_SUCCESS);
522  break;
523  case AMD_IOMMU_INV_PRI_RSP_INVALID:
524  set_pri_tag_status(fault->state, fault->tag, PPR_INVALID);
525  break;
526  case AMD_IOMMU_INV_PRI_RSP_FAIL:
527  set_pri_tag_status(fault->state, fault->tag, PPR_FAILURE);
528  break;
529  default:
530  BUG();
531  }
532  } else {
533  set_pri_tag_status(fault->state, fault->tag, PPR_INVALID);
534  }
535 
536  finish_pri_tag(fault->dev_state, fault->state, fault->tag);
537 
538  put_pasid_state(fault->state);
539 
540  kfree(fault);
541 }
542 
543 static int ppr_notifier(struct notifier_block *nb, unsigned long e, void *data)
544 {
545  struct amd_iommu_fault *iommu_fault;
546  struct pasid_state *pasid_state;
547  struct device_state *dev_state;
548  unsigned long flags;
549  struct fault *fault;
550  bool finish;
551  u16 tag;
552  int ret;
553 
554  iommu_fault = data;
555  tag = iommu_fault->tag & 0x1ff;
556  finish = (iommu_fault->tag >> 9) & 1;
557 
558  ret = NOTIFY_DONE;
559  dev_state = get_device_state(iommu_fault->device_id);
560  if (dev_state == NULL)
561  goto out;
562 
563  pasid_state = get_pasid_state(dev_state, iommu_fault->pasid);
564  if (pasid_state == NULL) {
565  /* We know the device but not the PASID -> send INVALID */
566  amd_iommu_complete_ppr(dev_state->pdev, iommu_fault->pasid,
567  PPR_INVALID, tag);
568  goto out_drop_state;
569  }
570 
571  spin_lock_irqsave(&pasid_state->lock, flags);
572  atomic_inc(&pasid_state->pri[tag].inflight);
573  if (finish)
574  pasid_state->pri[tag].finish = true;
575  spin_unlock_irqrestore(&pasid_state->lock, flags);
576 
577  fault = kzalloc(sizeof(*fault), GFP_ATOMIC);
578  if (fault == NULL) {
579  /* We are OOM - send success and let the device re-fault */
580  finish_pri_tag(dev_state, pasid_state, tag);
581  goto out_drop_state;
582  }
583 
584  fault->dev_state = dev_state;
585  fault->address = iommu_fault->address;
586  fault->state = pasid_state;
587  fault->tag = tag;
588  fault->finish = finish;
589  fault->flags = iommu_fault->flags;
590  INIT_WORK(&fault->work, do_fault);
591 
592  queue_work(iommu_wq, &fault->work);
593 
594  ret = NOTIFY_OK;
595 
596 out_drop_state:
597  put_device_state(dev_state);
598 
599 out:
600  return ret;
601 }
602 
603 static struct notifier_block ppr_nb = {
604  .notifier_call = ppr_notifier,
605 };
606 
607 static int task_exit(struct notifier_block *nb, unsigned long e, void *data)
608 {
609  struct pasid_state *pasid_state;
610  struct task_struct *task;
611 
612  task = data;
613 
614  /*
615  * Using this notifier is a hack - but there is no other choice
616  * at the moment. What I really want is a sleeping notifier that
617  * is called when an MM goes down. But such a notifier doesn't
618  * exist yet. The notifier needs to sleep because it has to make
619  * sure that the device does not use the PASID and the address
620  * space anymore before it is destroyed. This includes waiting
621  * for pending PRI requests to pass the workqueue. The
622  * MMU-Notifiers would be a good fit, but they use RCU and so
623  * they are not allowed to sleep. Lets see how we can solve this
624  * in a more intelligent way in the future.
625  */
626 again:
627  spin_lock(&ps_lock);
628  list_for_each_entry(pasid_state, &pasid_state_list, list) {
629  struct device_state *dev_state;
630  int pasid;
631 
632  if (pasid_state->task != task)
633  continue;
634 
635  /* Drop Lock and unbind */
636  spin_unlock(&ps_lock);
637 
638  dev_state = pasid_state->device_state;
639  pasid = pasid_state->pasid;
640 
641  if (pasid_state->device_state->inv_ctx_cb)
642  dev_state->inv_ctx_cb(dev_state->pdev, pasid);
643 
644  unbind_pasid(dev_state, pasid);
645 
646  /* Task may be in the list multiple times */
647  goto again;
648  }
649  spin_unlock(&ps_lock);
650 
651  return NOTIFY_OK;
652 }
653 
654 int amd_iommu_bind_pasid(struct pci_dev *pdev, int pasid,
655  struct task_struct *task)
656 {
657  struct pasid_state *pasid_state;
658  struct device_state *dev_state;
659  u16 devid;
660  int ret;
661 
662  might_sleep();
663 
664  if (!amd_iommu_v2_supported())
665  return -ENODEV;
666 
667  devid = device_id(pdev);
668  dev_state = get_device_state(devid);
669 
670  if (dev_state == NULL)
671  return -EINVAL;
672 
673  ret = -EINVAL;
674  if (pasid < 0 || pasid >= dev_state->max_pasids)
675  goto out;
676 
677  ret = -ENOMEM;
678  pasid_state = kzalloc(sizeof(*pasid_state), GFP_KERNEL);
679  if (pasid_state == NULL)
680  goto out;
681 
682  atomic_set(&pasid_state->count, 1);
683  init_waitqueue_head(&pasid_state->wq);
684  spin_lock_init(&pasid_state->lock);
685 
686  pasid_state->task = task;
687  pasid_state->mm = get_task_mm(task);
688  pasid_state->device_state = dev_state;
689  pasid_state->pasid = pasid;
690  pasid_state->mn.ops = &iommu_mn;
691 
692  if (pasid_state->mm == NULL)
693  goto out_free;
694 
695  mmu_notifier_register(&pasid_state->mn, pasid_state->mm);
696 
697  ret = set_pasid_state(dev_state, pasid_state, pasid);
698  if (ret)
699  goto out_unregister;
700 
701  ret = amd_iommu_domain_set_gcr3(dev_state->domain, pasid,
702  __pa(pasid_state->mm->pgd));
703  if (ret)
704  goto out_clear_state;
705 
706  link_pasid_state(pasid_state);
707 
708  return 0;
709 
710 out_clear_state:
711  clear_pasid_state(dev_state, pasid);
712 
713 out_unregister:
714  mmu_notifier_unregister(&pasid_state->mn, pasid_state->mm);
715 
716 out_free:
717  free_pasid_state(pasid_state);
718 
719 out:
720  put_device_state(dev_state);
721 
722  return ret;
723 }
725 
726 void amd_iommu_unbind_pasid(struct pci_dev *pdev, int pasid)
727 {
728  struct device_state *dev_state;
729  u16 devid;
730 
731  might_sleep();
732 
733  if (!amd_iommu_v2_supported())
734  return;
735 
736  devid = device_id(pdev);
737  dev_state = get_device_state(devid);
738  if (dev_state == NULL)
739  return;
740 
741  if (pasid < 0 || pasid >= dev_state->max_pasids)
742  goto out;
743 
744  unbind_pasid(dev_state, pasid);
745 
746 out:
747  put_device_state(dev_state);
748 }
750 
751 int amd_iommu_init_device(struct pci_dev *pdev, int pasids)
752 {
753  struct device_state *dev_state;
754  unsigned long flags;
755  int ret, tmp;
756  u16 devid;
757 
758  might_sleep();
759 
760  if (!amd_iommu_v2_supported())
761  return -ENODEV;
762 
763  if (pasids <= 0 || pasids > (PASID_MASK + 1))
764  return -EINVAL;
765 
766  devid = device_id(pdev);
767 
768  dev_state = kzalloc(sizeof(*dev_state), GFP_KERNEL);
769  if (dev_state == NULL)
770  return -ENOMEM;
771 
772  spin_lock_init(&dev_state->lock);
773  init_waitqueue_head(&dev_state->wq);
774  dev_state->pdev = pdev;
775 
776  tmp = pasids;
777  for (dev_state->pasid_levels = 0; (tmp - 1) & ~0x1ff; tmp >>= 9)
778  dev_state->pasid_levels += 1;
779 
780  atomic_set(&dev_state->count, 1);
781  dev_state->max_pasids = pasids;
782 
783  ret = -ENOMEM;
784  dev_state->states = (void *)get_zeroed_page(GFP_KERNEL);
785  if (dev_state->states == NULL)
786  goto out_free_dev_state;
787 
788  dev_state->domain = iommu_domain_alloc(&pci_bus_type);
789  if (dev_state->domain == NULL)
790  goto out_free_states;
791 
793 
794  ret = amd_iommu_domain_enable_v2(dev_state->domain, pasids);
795  if (ret)
796  goto out_free_domain;
797 
798  ret = iommu_attach_device(dev_state->domain, &pdev->dev);
799  if (ret != 0)
800  goto out_free_domain;
801 
802  spin_lock_irqsave(&state_lock, flags);
803 
804  if (state_table[devid] != NULL) {
805  spin_unlock_irqrestore(&state_lock, flags);
806  ret = -EBUSY;
807  goto out_free_domain;
808  }
809 
810  state_table[devid] = dev_state;
811 
812  spin_unlock_irqrestore(&state_lock, flags);
813 
814  return 0;
815 
816 out_free_domain:
817  iommu_domain_free(dev_state->domain);
818 
819 out_free_states:
820  free_page((unsigned long)dev_state->states);
821 
822 out_free_dev_state:
823  kfree(dev_state);
824 
825  return ret;
826 }
828 
830 {
831  struct device_state *dev_state;
832  unsigned long flags;
833  u16 devid;
834 
835  if (!amd_iommu_v2_supported())
836  return;
837 
838  devid = device_id(pdev);
839 
840  spin_lock_irqsave(&state_lock, flags);
841 
842  dev_state = state_table[devid];
843  if (dev_state == NULL) {
844  spin_unlock_irqrestore(&state_lock, flags);
845  return;
846  }
847 
848  state_table[devid] = NULL;
849 
850  spin_unlock_irqrestore(&state_lock, flags);
851 
852  /* Get rid of any remaining pasid states */
853  free_pasid_states(dev_state);
854 
855  put_device_state_wait(dev_state);
856 }
858 
860  amd_iommu_invalid_ppr_cb cb)
861 {
862  struct device_state *dev_state;
863  unsigned long flags;
864  u16 devid;
865  int ret;
866 
867  if (!amd_iommu_v2_supported())
868  return -ENODEV;
869 
870  devid = device_id(pdev);
871 
872  spin_lock_irqsave(&state_lock, flags);
873 
874  ret = -EINVAL;
875  dev_state = state_table[devid];
876  if (dev_state == NULL)
877  goto out_unlock;
878 
879  dev_state->inv_ppr_cb = cb;
880 
881  ret = 0;
882 
883 out_unlock:
884  spin_unlock_irqrestore(&state_lock, flags);
885 
886  return ret;
887 }
889 
891  amd_iommu_invalidate_ctx cb)
892 {
893  struct device_state *dev_state;
894  unsigned long flags;
895  u16 devid;
896  int ret;
897 
898  if (!amd_iommu_v2_supported())
899  return -ENODEV;
900 
901  devid = device_id(pdev);
902 
903  spin_lock_irqsave(&state_lock, flags);
904 
905  ret = -EINVAL;
906  dev_state = state_table[devid];
907  if (dev_state == NULL)
908  goto out_unlock;
909 
910  dev_state->inv_ctx_cb = cb;
911 
912  ret = 0;
913 
914 out_unlock:
915  spin_unlock_irqrestore(&state_lock, flags);
916 
917  return ret;
918 }
920 
921 static int __init amd_iommu_v2_init(void)
922 {
923  size_t state_table_size;
924  int ret;
925 
926  pr_info("AMD IOMMUv2 driver by Joerg Roedel <[email protected]>\n");
927 
928  if (!amd_iommu_v2_supported()) {
929  pr_info("AMD IOMMUv2 functionality not available on this system\n");
930  /*
931  * Load anyway to provide the symbols to other modules
932  * which may use AMD IOMMUv2 optionally.
933  */
934  return 0;
935  }
936 
937  spin_lock_init(&state_lock);
938 
939  state_table_size = MAX_DEVICES * sizeof(struct device_state *);
940  state_table = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
941  get_order(state_table_size));
942  if (state_table == NULL)
943  return -ENOMEM;
944 
945  ret = -ENOMEM;
946  iommu_wq = create_workqueue("amd_iommu_v2");
947  if (iommu_wq == NULL)
948  goto out_free;
949 
950  ret = -ENOMEM;
951  empty_page_table = (u64 *)get_zeroed_page(GFP_KERNEL);
952  if (empty_page_table == NULL)
953  goto out_destroy_wq;
954 
957 
958  return 0;
959 
960 out_destroy_wq:
961  destroy_workqueue(iommu_wq);
962 
963 out_free:
964  free_pages((unsigned long)state_table, get_order(state_table_size));
965 
966  return ret;
967 }
968 
969 static void __exit amd_iommu_v2_exit(void)
970 {
971  struct device_state *dev_state;
972  size_t state_table_size;
973  int i;
974 
975  if (!amd_iommu_v2_supported())
976  return;
977 
980 
981  flush_workqueue(iommu_wq);
982 
983  /*
984  * The loop below might call flush_workqueue(), so call
985  * destroy_workqueue() after it
986  */
987  for (i = 0; i < MAX_DEVICES; ++i) {
988  dev_state = get_device_state(i);
989 
990  if (dev_state == NULL)
991  continue;
992 
993  WARN_ON_ONCE(1);
994 
995  put_device_state(dev_state);
996  amd_iommu_free_device(dev_state->pdev);
997  }
998 
999  destroy_workqueue(iommu_wq);
1000 
1001  state_table_size = MAX_DEVICES * sizeof(struct device_state *);
1002  free_pages((unsigned long)state_table, get_order(state_table_size));
1003 
1004  free_page((unsigned long)empty_page_table);
1005 }
1006 
1007 module_init(amd_iommu_v2_init);
1008 module_exit(amd_iommu_v2_exit);