Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
dmar.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2006, Intel Corporation.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms and conditions of the GNU General Public License,
6  * version 2, as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11  * more details.
12  *
13  * You should have received a copy of the GNU General Public License along with
14  * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15  * Place - Suite 330, Boston, MA 02111-1307 USA.
16  *
17  * Copyright (C) 2006-2008 Intel Corporation
18  * Author: Ashok Raj <[email protected]>
19  * Author: Shaohua Li <[email protected]>
20  * Author: Anil S Keshavamurthy <[email protected]>
21  *
22  * This file implements early detection/parsing of Remapping Devices
23  * reported to OS through BIOS via DMA remapping reporting (DMAR) ACPI
24  * tables.
25  *
26  * These routines are used by both DMA-remapping and Interrupt-remapping
27  */
28 
29 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt /* has to precede printk.h */
30 
31 #include <linux/pci.h>
32 #include <linux/dmar.h>
33 #include <linux/iova.h>
34 #include <linux/intel-iommu.h>
35 #include <linux/timer.h>
36 #include <linux/irq.h>
37 #include <linux/interrupt.h>
38 #include <linux/tboot.h>
39 #include <linux/dmi.h>
40 #include <linux/slab.h>
41 #include <asm/irq_remapping.h>
42 #include <asm/iommu_table.h>
43 
44 /* No locks are needed as DMA remapping hardware unit
45  * list is constructed at boot time and hotplug of
46  * these units are not supported by the architecture.
47  */
48 LIST_HEAD(dmar_drhd_units);
49 
51 static acpi_size dmar_tbl_size;
52 
53 static void __init dmar_register_drhd_unit(struct dmar_drhd_unit *drhd)
54 {
55  /*
56  * add INCLUDE_ALL at the tail, so scan the list will find it at
57  * the very end.
58  */
59  if (drhd->include_all)
60  list_add_tail(&drhd->list, &dmar_drhd_units);
61  else
62  list_add(&drhd->list, &dmar_drhd_units);
63 }
64 
65 static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope,
66  struct pci_dev **dev, u16 segment)
67 {
68  struct pci_bus *bus;
69  struct pci_dev *pdev = NULL;
70  struct acpi_dmar_pci_path *path;
71  int count;
72 
73  bus = pci_find_bus(segment, scope->bus);
74  path = (struct acpi_dmar_pci_path *)(scope + 1);
75  count = (scope->length - sizeof(struct acpi_dmar_device_scope))
77 
78  while (count) {
79  if (pdev)
80  pci_dev_put(pdev);
81  /*
82  * Some BIOSes list non-exist devices in DMAR table, just
83  * ignore it
84  */
85  if (!bus) {
86  pr_warn("Device scope bus [%d] not found\n", scope->bus);
87  break;
88  }
89  pdev = pci_get_slot(bus, PCI_DEVFN(path->dev, path->fn));
90  if (!pdev) {
91  /* warning will be printed below */
92  break;
93  }
94  path ++;
95  count --;
96  bus = pdev->subordinate;
97  }
98  if (!pdev) {
99  pr_warn("Device scope device [%04x:%02x:%02x.%02x] not found\n",
100  segment, scope->bus, path->dev, path->fn);
101  *dev = NULL;
102  return 0;
103  }
104  if ((scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT && \
105  pdev->subordinate) || (scope->entry_type == \
107  pci_dev_put(pdev);
108  pr_warn("Device scope type does not match for %s\n",
109  pci_name(pdev));
110  return -EINVAL;
111  }
112  *dev = pdev;
113  return 0;
114 }
115 
116 int __init dmar_parse_dev_scope(void *start, void *end, int *cnt,
117  struct pci_dev ***devices, u16 segment)
118 {
120  void * tmp = start;
121  int index;
122  int ret;
123 
124  *cnt = 0;
125  while (start < end) {
126  scope = start;
129  (*cnt)++;
130  else if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_IOAPIC) {
131  pr_warn("Unsupported device scope\n");
132  }
133  start += scope->length;
134  }
135  if (*cnt == 0)
136  return 0;
137 
138  *devices = kcalloc(*cnt, sizeof(struct pci_dev *), GFP_KERNEL);
139  if (!*devices)
140  return -ENOMEM;
141 
142  start = tmp;
143  index = 0;
144  while (start < end) {
145  scope = start;
148  ret = dmar_parse_one_dev_scope(scope,
149  &(*devices)[index], segment);
150  if (ret) {
151  kfree(*devices);
152  return ret;
153  }
154  index ++;
155  }
156  start += scope->length;
157  }
158 
159  return 0;
160 }
161 
167 static int __init
168 dmar_parse_one_drhd(struct acpi_dmar_header *header)
169 {
170  struct acpi_dmar_hardware_unit *drhd;
171  struct dmar_drhd_unit *dmaru;
172  int ret = 0;
173 
174  drhd = (struct acpi_dmar_hardware_unit *)header;
175  dmaru = kzalloc(sizeof(*dmaru), GFP_KERNEL);
176  if (!dmaru)
177  return -ENOMEM;
178 
179  dmaru->hdr = header;
180  dmaru->reg_base_addr = drhd->address;
181  dmaru->segment = drhd->segment;
182  dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */
183 
184  ret = alloc_iommu(dmaru);
185  if (ret) {
186  kfree(dmaru);
187  return ret;
188  }
189  dmar_register_drhd_unit(dmaru);
190  return 0;
191 }
192 
193 static int __init dmar_parse_dev(struct dmar_drhd_unit *dmaru)
194 {
195  struct acpi_dmar_hardware_unit *drhd;
196  int ret = 0;
197 
198  drhd = (struct acpi_dmar_hardware_unit *) dmaru->hdr;
199 
200  if (dmaru->include_all)
201  return 0;
202 
203  ret = dmar_parse_dev_scope((void *)(drhd + 1),
204  ((void *)drhd) + drhd->header.length,
205  &dmaru->devices_cnt, &dmaru->devices,
206  drhd->segment);
207  if (ret) {
208  list_del(&dmaru->list);
209  kfree(dmaru);
210  }
211  return ret;
212 }
213 
214 #ifdef CONFIG_ACPI_NUMA
215 static int __init
216 dmar_parse_one_rhsa(struct acpi_dmar_header *header)
217 {
218  struct acpi_dmar_rhsa *rhsa;
219  struct dmar_drhd_unit *drhd;
220 
221  rhsa = (struct acpi_dmar_rhsa *)header;
222  for_each_drhd_unit(drhd) {
223  if (drhd->reg_base_addr == rhsa->base_address) {
225 
226  if (!node_online(node))
227  node = -1;
228  drhd->iommu->node = node;
229  return 0;
230  }
231  }
232  WARN_TAINT(
234  "Your BIOS is broken; RHSA refers to non-existent DMAR unit at %llx\n"
235  "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
236  drhd->reg_base_addr,
240 
241  return 0;
242 }
243 #endif
244 
245 static void __init
246 dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
247 {
248  struct acpi_dmar_hardware_unit *drhd;
249  struct acpi_dmar_reserved_memory *rmrr;
250  struct acpi_dmar_atsr *atsr;
251  struct acpi_dmar_rhsa *rhsa;
252 
253  switch (header->type) {
255  drhd = container_of(header, struct acpi_dmar_hardware_unit,
256  header);
257  pr_info("DRHD base: %#016Lx flags: %#x\n",
258  (unsigned long long)drhd->address, drhd->flags);
259  break;
261  rmrr = container_of(header, struct acpi_dmar_reserved_memory,
262  header);
263  pr_info("RMRR base: %#016Lx end: %#016Lx\n",
264  (unsigned long long)rmrr->base_address,
265  (unsigned long long)rmrr->end_address);
266  break;
267  case ACPI_DMAR_TYPE_ATSR:
268  atsr = container_of(header, struct acpi_dmar_atsr, header);
269  pr_info("ATSR flags: %#x\n", atsr->flags);
270  break;
272  rhsa = container_of(header, struct acpi_dmar_rhsa, header);
273  pr_info("RHSA base: %#016Lx proximity domain: %#x\n",
274  (unsigned long long)rhsa->base_address,
275  rhsa->proximity_domain);
276  break;
277  }
278 }
279 
283 static int __init dmar_table_detect(void)
284 {
286 
287  /* if we could find DMAR table, then there are DMAR devices */
289  (struct acpi_table_header **)&dmar_tbl,
290  &dmar_tbl_size);
291 
292  if (ACPI_SUCCESS(status) && !dmar_tbl) {
293  pr_warn("Unable to map DMAR\n");
294  status = AE_NOT_FOUND;
295  }
296 
297  return (ACPI_SUCCESS(status) ? 1 : 0);
298 }
299 
303 static int __init
304 parse_dmar_table(void)
305 {
306  struct acpi_table_dmar *dmar;
307  struct acpi_dmar_header *entry_header;
308  int ret = 0;
309 
310  /*
311  * Do it again, earlier dmar_tbl mapping could be mapped with
312  * fixed map.
313  */
314  dmar_table_detect();
315 
316  /*
317  * ACPI tables may not be DMA protected by tboot, so use DMAR copy
318  * SINIT saved in SinitMleData in TXT heap (which is DMA protected)
319  */
321 
322  dmar = (struct acpi_table_dmar *)dmar_tbl;
323  if (!dmar)
324  return -ENODEV;
325 
326  if (dmar->width < PAGE_SHIFT - 1) {
327  pr_warn("Invalid DMAR haw\n");
328  return -EINVAL;
329  }
330 
331  pr_info("Host address width %d\n", dmar->width + 1);
332 
333  entry_header = (struct acpi_dmar_header *)(dmar + 1);
334  while (((unsigned long)entry_header) <
335  (((unsigned long)dmar) + dmar_tbl->length)) {
336  /* Avoid looping forever on bad ACPI tables */
337  if (entry_header->length == 0) {
338  pr_warn("Invalid 0-length structure\n");
339  ret = -EINVAL;
340  break;
341  }
342 
343  dmar_table_print_dmar_entry(entry_header);
344 
345  switch (entry_header->type) {
347  ret = dmar_parse_one_drhd(entry_header);
348  break;
350  ret = dmar_parse_one_rmrr(entry_header);
351  break;
352  case ACPI_DMAR_TYPE_ATSR:
353  ret = dmar_parse_one_atsr(entry_header);
354  break;
356 #ifdef CONFIG_ACPI_NUMA
357  ret = dmar_parse_one_rhsa(entry_header);
358 #endif
359  break;
360  default:
361  pr_warn("Unknown DMAR structure type %d\n",
362  entry_header->type);
363  ret = 0; /* for forward compatibility */
364  break;
365  }
366  if (ret)
367  break;
368 
369  entry_header = ((void *)entry_header + entry_header->length);
370  }
371  return ret;
372 }
373 
374 static int dmar_pci_device_match(struct pci_dev *devices[], int cnt,
375  struct pci_dev *dev)
376 {
377  int index;
378 
379  while (dev) {
380  for (index = 0; index < cnt; index++)
381  if (dev == devices[index])
382  return 1;
383 
384  /* Check our parent */
385  dev = dev->bus->self;
386  }
387 
388  return 0;
389 }
390 
391 struct dmar_drhd_unit *
393 {
394  struct dmar_drhd_unit *dmaru = NULL;
395  struct acpi_dmar_hardware_unit *drhd;
396 
397  dev = pci_physfn(dev);
398 
399  list_for_each_entry(dmaru, &dmar_drhd_units, list) {
400  drhd = container_of(dmaru->hdr,
402  header);
403 
404  if (dmaru->include_all &&
405  drhd->segment == pci_domain_nr(dev->bus))
406  return dmaru;
407 
408  if (dmar_pci_device_match(dmaru->devices,
409  dmaru->devices_cnt, dev))
410  return dmaru;
411  }
412 
413  return NULL;
414 }
415 
417 {
418  static int dmar_dev_scope_initialized;
419  struct dmar_drhd_unit *drhd, *drhd_n;
420  int ret = -ENODEV;
421 
422  if (dmar_dev_scope_initialized)
423  return dmar_dev_scope_initialized;
424 
425  if (list_empty(&dmar_drhd_units))
426  goto fail;
427 
428  list_for_each_entry_safe(drhd, drhd_n, &dmar_drhd_units, list) {
429  ret = dmar_parse_dev(drhd);
430  if (ret)
431  goto fail;
432  }
433 
434  ret = dmar_parse_rmrr_atsr_dev();
435  if (ret)
436  goto fail;
437 
438  dmar_dev_scope_initialized = 1;
439  return 0;
440 
441 fail:
442  dmar_dev_scope_initialized = ret;
443  return ret;
444 }
445 
446 
448 {
449  static int dmar_table_initialized;
450  int ret;
451 
452  if (dmar_table_initialized)
453  return 0;
454 
455  dmar_table_initialized = 1;
456 
457  ret = parse_dmar_table();
458  if (ret) {
459  if (ret != -ENODEV)
460  pr_info("parse DMAR table failure.\n");
461  return ret;
462  }
463 
464  if (list_empty(&dmar_drhd_units)) {
465  pr_info("No DMAR devices found\n");
466  return -ENODEV;
467  }
468 
469  return 0;
470 }
471 
472 static void warn_invalid_dmar(u64 addr, const char *message)
473 {
476  "Your BIOS is broken; DMAR reported at address %llx%s!\n"
477  "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
478  addr, message,
482 }
483 
485 {
486  struct acpi_table_dmar *dmar;
487  struct acpi_dmar_header *entry_header;
488  struct acpi_dmar_hardware_unit *drhd;
489 
490  dmar = (struct acpi_table_dmar *)dmar_tbl;
491  entry_header = (struct acpi_dmar_header *)(dmar + 1);
492 
493  while (((unsigned long)entry_header) <
494  (((unsigned long)dmar) + dmar_tbl->length)) {
495  /* Avoid looping forever on bad ACPI tables */
496  if (entry_header->length == 0) {
497  pr_warn("Invalid 0-length structure\n");
498  return 0;
499  }
500 
501  if (entry_header->type == ACPI_DMAR_TYPE_HARDWARE_UNIT) {
502  void __iomem *addr;
503  u64 cap, ecap;
504 
505  drhd = (void *)entry_header;
506  if (!drhd->address) {
507  warn_invalid_dmar(0, "");
508  goto failed;
509  }
510 
511  addr = early_ioremap(drhd->address, VTD_PAGE_SIZE);
512  if (!addr ) {
513  printk("IOMMU: can't validate: %llx\n", drhd->address);
514  goto failed;
515  }
516  cap = dmar_readq(addr + DMAR_CAP_REG);
517  ecap = dmar_readq(addr + DMAR_ECAP_REG);
519  if (cap == (uint64_t)-1 && ecap == (uint64_t)-1) {
520  warn_invalid_dmar(drhd->address,
521  " returns all ones");
522  goto failed;
523  }
524  }
525 
526  entry_header = ((void *)entry_header + entry_header->length);
527  }
528  return 1;
529 
530 failed:
531  return 0;
532 }
533 
535 {
536  int ret;
537 
538  ret = dmar_table_detect();
539  if (ret)
540  ret = check_zero_address();
541  {
542  struct acpi_table_dmar *dmar;
543 
544  dmar = (struct acpi_table_dmar *) dmar_tbl;
545 
546  if (ret && irq_remapping_enabled && cpu_has_x2apic &&
547  dmar->flags & 0x1)
548  pr_info("Queued invalidation will be enabled to support x2apic and Intr-remapping.\n");
549 
550  if (ret && !no_iommu && !iommu_detected && !dmar_disabled) {
551  iommu_detected = 1;
552  /* Make sure ACS will be enabled */
553  pci_request_acs();
554  }
555 
556 #ifdef CONFIG_X86
557  if (ret)
558  x86_init.iommu.iommu_init = intel_iommu_init;
559 #endif
560  }
561  early_acpi_os_unmap_memory(dmar_tbl, dmar_tbl_size);
562  dmar_tbl = NULL;
563 
564  return ret ? 1 : -ENODEV;
565 }
566 
567 
568 static void unmap_iommu(struct intel_iommu *iommu)
569 {
570  iounmap(iommu->reg);
571  release_mem_region(iommu->reg_phys, iommu->reg_size);
572 }
573 
582 static int map_iommu(struct intel_iommu *iommu, u64 phys_addr)
583 {
584  int map_size, err=0;
585 
586  iommu->reg_phys = phys_addr;
587  iommu->reg_size = VTD_PAGE_SIZE;
588 
589  if (!request_mem_region(iommu->reg_phys, iommu->reg_size, iommu->name)) {
590  pr_err("IOMMU: can't reserve memory\n");
591  err = -EBUSY;
592  goto out;
593  }
594 
595  iommu->reg = ioremap(iommu->reg_phys, iommu->reg_size);
596  if (!iommu->reg) {
597  pr_err("IOMMU: can't map the region\n");
598  err = -ENOMEM;
599  goto release;
600  }
601 
602  iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
603  iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
604 
605  if (iommu->cap == (uint64_t)-1 && iommu->ecap == (uint64_t)-1) {
606  err = -EINVAL;
607  warn_invalid_dmar(phys_addr, " returns all ones");
608  goto unmap;
609  }
610 
611  /* the registers might be more than one page */
612  map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
613  cap_max_fault_reg_offset(iommu->cap));
614  map_size = VTD_PAGE_ALIGN(map_size);
615  if (map_size > iommu->reg_size) {
616  iounmap(iommu->reg);
617  release_mem_region(iommu->reg_phys, iommu->reg_size);
618  iommu->reg_size = map_size;
619  if (!request_mem_region(iommu->reg_phys, iommu->reg_size,
620  iommu->name)) {
621  pr_err("IOMMU: can't reserve memory\n");
622  err = -EBUSY;
623  goto out;
624  }
625  iommu->reg = ioremap(iommu->reg_phys, iommu->reg_size);
626  if (!iommu->reg) {
627  pr_err("IOMMU: can't map the region\n");
628  err = -ENOMEM;
629  goto release;
630  }
631  }
632  err = 0;
633  goto out;
634 
635 unmap:
636  iounmap(iommu->reg);
637 release:
638  release_mem_region(iommu->reg_phys, iommu->reg_size);
639 out:
640  return err;
641 }
642 
643 int alloc_iommu(struct dmar_drhd_unit *drhd)
644 {
645  struct intel_iommu *iommu;
646  u32 ver;
647  static int iommu_allocated = 0;
648  int agaw = 0;
649  int msagaw = 0;
650  int err;
651 
652  if (!drhd->reg_base_addr) {
653  warn_invalid_dmar(0, "");
654  return -EINVAL;
655  }
656 
657  iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
658  if (!iommu)
659  return -ENOMEM;
660 
661  iommu->seq_id = iommu_allocated++;
662  sprintf (iommu->name, "dmar%d", iommu->seq_id);
663 
664  err = map_iommu(iommu, drhd->reg_base_addr);
665  if (err) {
666  pr_err("IOMMU: failed to map %s\n", iommu->name);
667  goto error;
668  }
669 
670  err = -EINVAL;
671  agaw = iommu_calculate_agaw(iommu);
672  if (agaw < 0) {
673  pr_err("Cannot get a valid agaw for iommu (seq_id = %d)\n",
674  iommu->seq_id);
675  goto err_unmap;
676  }
677  msagaw = iommu_calculate_max_sagaw(iommu);
678  if (msagaw < 0) {
679  pr_err("Cannot get a valid max agaw for iommu (seq_id = %d)\n",
680  iommu->seq_id);
681  goto err_unmap;
682  }
683  iommu->agaw = agaw;
684  iommu->msagaw = msagaw;
685 
686  iommu->node = -1;
687 
688  ver = readl(iommu->reg + DMAR_VER_REG);
689  pr_info("IOMMU %d: reg_base_addr %llx ver %d:%d cap %llx ecap %llx\n",
690  iommu->seq_id,
691  (unsigned long long)drhd->reg_base_addr,
692  DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
693  (unsigned long long)iommu->cap,
694  (unsigned long long)iommu->ecap);
695 
697 
698  drhd->iommu = iommu;
699  return 0;
700 
701  err_unmap:
702  unmap_iommu(iommu);
703  error:
704  kfree(iommu);
705  return err;
706 }
707 
708 void free_iommu(struct intel_iommu *iommu)
709 {
710  if (!iommu)
711  return;
712 
713  free_dmar_iommu(iommu);
714 
715  if (iommu->reg)
716  unmap_iommu(iommu);
717 
718  kfree(iommu);
719 }
720 
721 /*
722  * Reclaim all the submitted descriptors which have completed its work.
723  */
724 static inline void reclaim_free_desc(struct q_inval *qi)
725 {
726  while (qi->desc_status[qi->free_tail] == QI_DONE ||
727  qi->desc_status[qi->free_tail] == QI_ABORT) {
728  qi->desc_status[qi->free_tail] = QI_FREE;
729  qi->free_tail = (qi->free_tail + 1) % QI_LENGTH;
730  qi->free_cnt++;
731  }
732 }
733 
734 static int qi_check_fault(struct intel_iommu *iommu, int index)
735 {
736  u32 fault;
737  int head, tail;
738  struct q_inval *qi = iommu->qi;
739  int wait_index = (index + 1) % QI_LENGTH;
740 
741  if (qi->desc_status[wait_index] == QI_ABORT)
742  return -EAGAIN;
743 
744  fault = readl(iommu->reg + DMAR_FSTS_REG);
745 
746  /*
747  * If IQE happens, the head points to the descriptor associated
748  * with the error. No new descriptors are fetched until the IQE
749  * is cleared.
750  */
751  if (fault & DMA_FSTS_IQE) {
752  head = readl(iommu->reg + DMAR_IQH_REG);
753  if ((head >> DMAR_IQ_SHIFT) == index) {
754  pr_err("VT-d detected invalid descriptor: "
755  "low=%llx, high=%llx\n",
756  (unsigned long long)qi->desc[index].low,
757  (unsigned long long)qi->desc[index].high);
758  memcpy(&qi->desc[index], &qi->desc[wait_index],
759  sizeof(struct qi_desc));
760  __iommu_flush_cache(iommu, &qi->desc[index],
761  sizeof(struct qi_desc));
762  writel(DMA_FSTS_IQE, iommu->reg + DMAR_FSTS_REG);
763  return -EINVAL;
764  }
765  }
766 
767  /*
768  * If ITE happens, all pending wait_desc commands are aborted.
769  * No new descriptors are fetched until the ITE is cleared.
770  */
771  if (fault & DMA_FSTS_ITE) {
772  head = readl(iommu->reg + DMAR_IQH_REG);
773  head = ((head >> DMAR_IQ_SHIFT) - 1 + QI_LENGTH) % QI_LENGTH;
774  head |= 1;
775  tail = readl(iommu->reg + DMAR_IQT_REG);
776  tail = ((tail >> DMAR_IQ_SHIFT) - 1 + QI_LENGTH) % QI_LENGTH;
777 
778  writel(DMA_FSTS_ITE, iommu->reg + DMAR_FSTS_REG);
779 
780  do {
781  if (qi->desc_status[head] == QI_IN_USE)
782  qi->desc_status[head] = QI_ABORT;
783  head = (head - 2 + QI_LENGTH) % QI_LENGTH;
784  } while (head != tail);
785 
786  if (qi->desc_status[wait_index] == QI_ABORT)
787  return -EAGAIN;
788  }
789 
790  if (fault & DMA_FSTS_ICE)
791  writel(DMA_FSTS_ICE, iommu->reg + DMAR_FSTS_REG);
792 
793  return 0;
794 }
795 
796 /*
797  * Submit the queued invalidation descriptor to the remapping
798  * hardware unit and wait for its completion.
799  */
800 int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu)
801 {
802  int rc;
803  struct q_inval *qi = iommu->qi;
804  struct qi_desc *hw, wait_desc;
805  int wait_index, index;
806  unsigned long flags;
807 
808  if (!qi)
809  return 0;
810 
811  hw = qi->desc;
812 
813 restart:
814  rc = 0;
815 
816  raw_spin_lock_irqsave(&qi->q_lock, flags);
817  while (qi->free_cnt < 3) {
818  raw_spin_unlock_irqrestore(&qi->q_lock, flags);
819  cpu_relax();
820  raw_spin_lock_irqsave(&qi->q_lock, flags);
821  }
822 
823  index = qi->free_head;
824  wait_index = (index + 1) % QI_LENGTH;
825 
826  qi->desc_status[index] = qi->desc_status[wait_index] = QI_IN_USE;
827 
828  hw[index] = *desc;
829 
830  wait_desc.low = QI_IWD_STATUS_DATA(QI_DONE) |
832  wait_desc.high = virt_to_phys(&qi->desc_status[wait_index]);
833 
834  hw[wait_index] = wait_desc;
835 
836  __iommu_flush_cache(iommu, &hw[index], sizeof(struct qi_desc));
837  __iommu_flush_cache(iommu, &hw[wait_index], sizeof(struct qi_desc));
838 
839  qi->free_head = (qi->free_head + 2) % QI_LENGTH;
840  qi->free_cnt -= 2;
841 
842  /*
843  * update the HW tail register indicating the presence of
844  * new descriptors.
845  */
846  writel(qi->free_head << DMAR_IQ_SHIFT, iommu->reg + DMAR_IQT_REG);
847 
848  while (qi->desc_status[wait_index] != QI_DONE) {
849  /*
850  * We will leave the interrupts disabled, to prevent interrupt
851  * context to queue another cmd while a cmd is already submitted
852  * and waiting for completion on this cpu. This is to avoid
853  * a deadlock where the interrupt context can wait indefinitely
854  * for free slots in the queue.
855  */
856  rc = qi_check_fault(iommu, index);
857  if (rc)
858  break;
859 
860  raw_spin_unlock(&qi->q_lock);
861  cpu_relax();
862  raw_spin_lock(&qi->q_lock);
863  }
864 
865  qi->desc_status[index] = QI_DONE;
866 
867  reclaim_free_desc(qi);
868  raw_spin_unlock_irqrestore(&qi->q_lock, flags);
869 
870  if (rc == -EAGAIN)
871  goto restart;
872 
873  return rc;
874 }
875 
876 /*
877  * Flush the global interrupt entry cache.
878  */
879 void qi_global_iec(struct intel_iommu *iommu)
880 {
881  struct qi_desc desc;
882 
883  desc.low = QI_IEC_TYPE;
884  desc.high = 0;
885 
886  /* should never fail */
887  qi_submit_sync(&desc, iommu);
888 }
889 
890 void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm,
891  u64 type)
892 {
893  struct qi_desc desc;
894 
895  desc.low = QI_CC_FM(fm) | QI_CC_SID(sid) | QI_CC_DID(did)
896  | QI_CC_GRAN(type) | QI_CC_TYPE;
897  desc.high = 0;
898 
899  qi_submit_sync(&desc, iommu);
900 }
901 
902 void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
903  unsigned int size_order, u64 type)
904 {
905  u8 dw = 0, dr = 0;
906 
907  struct qi_desc desc;
908  int ih = 0;
909 
910  if (cap_write_drain(iommu->cap))
911  dw = 1;
912 
913  if (cap_read_drain(iommu->cap))
914  dr = 1;
915 
916  desc.low = QI_IOTLB_DID(did) | QI_IOTLB_DR(dr) | QI_IOTLB_DW(dw)
917  | QI_IOTLB_GRAN(type) | QI_IOTLB_TYPE;
918  desc.high = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih)
919  | QI_IOTLB_AM(size_order);
920 
921  qi_submit_sync(&desc, iommu);
922 }
923 
924 void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 qdep,
925  u64 addr, unsigned mask)
926 {
927  struct qi_desc desc;
928 
929  if (mask) {
930  BUG_ON(addr & ((1 << (VTD_PAGE_SHIFT + mask)) - 1));
931  addr |= (1 << (VTD_PAGE_SHIFT + mask - 1)) - 1;
933  } else
934  desc.high = QI_DEV_IOTLB_ADDR(addr);
935 
936  if (qdep >= QI_DEV_IOTLB_MAX_INVS)
937  qdep = 0;
938 
939  desc.low = QI_DEV_IOTLB_SID(sid) | QI_DEV_IOTLB_QDEP(qdep) |
941 
942  qi_submit_sync(&desc, iommu);
943 }
944 
945 /*
946  * Disable Queued Invalidation interface.
947  */
948 void dmar_disable_qi(struct intel_iommu *iommu)
949 {
950  unsigned long flags;
951  u32 sts;
953 
954  if (!ecap_qis(iommu->ecap))
955  return;
956 
957  raw_spin_lock_irqsave(&iommu->register_lock, flags);
958 
959  sts = dmar_readq(iommu->reg + DMAR_GSTS_REG);
960  if (!(sts & DMA_GSTS_QIES))
961  goto end;
962 
963  /*
964  * Give a chance to HW to complete the pending invalidation requests.
965  */
966  while ((readl(iommu->reg + DMAR_IQT_REG) !=
967  readl(iommu->reg + DMAR_IQH_REG)) &&
968  (DMAR_OPERATION_TIMEOUT > (get_cycles() - start_time)))
969  cpu_relax();
970 
971  iommu->gcmd &= ~DMA_GCMD_QIE;
972  writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
973 
975  !(sts & DMA_GSTS_QIES), sts);
976 end:
978 }
979 
980 /*
981  * Enable queued invalidation.
982  */
983 static void __dmar_enable_qi(struct intel_iommu *iommu)
984 {
985  u32 sts;
986  unsigned long flags;
987  struct q_inval *qi = iommu->qi;
988 
989  qi->free_head = qi->free_tail = 0;
990  qi->free_cnt = QI_LENGTH;
991 
992  raw_spin_lock_irqsave(&iommu->register_lock, flags);
993 
994  /* write zero to the tail reg */
995  writel(0, iommu->reg + DMAR_IQT_REG);
996 
997  dmar_writeq(iommu->reg + DMAR_IQA_REG, virt_to_phys(qi->desc));
998 
999  iommu->gcmd |= DMA_GCMD_QIE;
1000  writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1001 
1002  /* Make sure hardware complete it */
1003  IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_QIES), sts);
1004 
1005  raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1006 }
1007 
1008 /*
1009  * Enable Queued Invalidation interface. This is a must to support
1010  * interrupt-remapping. Also used by DMA-remapping, which replaces
1011  * register based IOTLB invalidation.
1012  */
1013 int dmar_enable_qi(struct intel_iommu *iommu)
1014 {
1015  struct q_inval *qi;
1016  struct page *desc_page;
1017 
1018  if (!ecap_qis(iommu->ecap))
1019  return -ENOENT;
1020 
1021  /*
1022  * queued invalidation is already setup and enabled.
1023  */
1024  if (iommu->qi)
1025  return 0;
1026 
1027  iommu->qi = kmalloc(sizeof(*qi), GFP_ATOMIC);
1028  if (!iommu->qi)
1029  return -ENOMEM;
1030 
1031  qi = iommu->qi;
1032 
1033 
1034  desc_page = alloc_pages_node(iommu->node, GFP_ATOMIC | __GFP_ZERO, 0);
1035  if (!desc_page) {
1036  kfree(qi);
1037  iommu->qi = 0;
1038  return -ENOMEM;
1039  }
1040 
1041  qi->desc = page_address(desc_page);
1042 
1043  qi->desc_status = kmalloc(QI_LENGTH * sizeof(int), GFP_ATOMIC);
1044  if (!qi->desc_status) {
1045  free_page((unsigned long) qi->desc);
1046  kfree(qi);
1047  iommu->qi = 0;
1048  return -ENOMEM;
1049  }
1050 
1051  qi->free_head = qi->free_tail = 0;
1052  qi->free_cnt = QI_LENGTH;
1053 
1054  raw_spin_lock_init(&qi->q_lock);
1055 
1056  __dmar_enable_qi(iommu);
1057 
1058  return 0;
1059 }
1060 
1061 /* iommu interrupt handling. Most stuff are MSI-like. */
1062 
1067 };
1068 
1069 static const char *dma_remap_fault_reasons[] =
1070 {
1071  "Software",
1072  "Present bit in root entry is clear",
1073  "Present bit in context entry is clear",
1074  "Invalid context entry",
1075  "Access beyond MGAW",
1076  "PTE Write access is not set",
1077  "PTE Read access is not set",
1078  "Next page table ptr is invalid",
1079  "Root table address invalid",
1080  "Context table ptr is invalid",
1081  "non-zero reserved fields in RTP",
1082  "non-zero reserved fields in CTP",
1083  "non-zero reserved fields in PTE",
1084 };
1085 
1086 static const char *irq_remap_fault_reasons[] =
1087 {
1088  "Detected reserved fields in the decoded interrupt-remapped request",
1089  "Interrupt index exceeded the interrupt-remapping table size",
1090  "Present field in the IRTE entry is clear",
1091  "Error accessing interrupt-remapping table pointed by IRTA_REG",
1092  "Detected reserved fields in the IRTE entry",
1093  "Blocked a compatibility format interrupt request",
1094  "Blocked an interrupt request due to source-id verification failure",
1095 };
1096 
1097 #define MAX_FAULT_REASON_IDX (ARRAY_SIZE(fault_reason_strings) - 1)
1098 
1099 const char *dmar_get_fault_reason(u8 fault_reason, int *fault_type)
1100 {
1101  if (fault_reason >= 0x20 && (fault_reason - 0x20 <
1102  ARRAY_SIZE(irq_remap_fault_reasons))) {
1103  *fault_type = INTR_REMAP;
1104  return irq_remap_fault_reasons[fault_reason - 0x20];
1105  } else if (fault_reason < ARRAY_SIZE(dma_remap_fault_reasons)) {
1106  *fault_type = DMA_REMAP;
1107  return dma_remap_fault_reasons[fault_reason];
1108  } else {
1109  *fault_type = UNKNOWN;
1110  return "Unknown";
1111  }
1112 }
1113 
1115 {
1116  struct intel_iommu *iommu = irq_data_get_irq_handler_data(data);
1117  unsigned long flag;
1118 
1119  /* unmask it */
1120  raw_spin_lock_irqsave(&iommu->register_lock, flag);
1121  writel(0, iommu->reg + DMAR_FECTL_REG);
1122  /* Read a reg to force flush the post write */
1123  readl(iommu->reg + DMAR_FECTL_REG);
1125 }
1126 
1128 {
1129  unsigned long flag;
1130  struct intel_iommu *iommu = irq_data_get_irq_handler_data(data);
1131 
1132  /* mask it */
1133  raw_spin_lock_irqsave(&iommu->register_lock, flag);
1135  /* Read a reg to force flush the post write */
1136  readl(iommu->reg + DMAR_FECTL_REG);
1138 }
1139 
1140 void dmar_msi_write(int irq, struct msi_msg *msg)
1141 {
1142  struct intel_iommu *iommu = irq_get_handler_data(irq);
1143  unsigned long flag;
1144 
1145  raw_spin_lock_irqsave(&iommu->register_lock, flag);
1146  writel(msg->data, iommu->reg + DMAR_FEDATA_REG);
1147  writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG);
1148  writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG);
1150 }
1151 
1152 void dmar_msi_read(int irq, struct msi_msg *msg)
1153 {
1154  struct intel_iommu *iommu = irq_get_handler_data(irq);
1155  unsigned long flag;
1156 
1157  raw_spin_lock_irqsave(&iommu->register_lock, flag);
1158  msg->data = readl(iommu->reg + DMAR_FEDATA_REG);
1159  msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG);
1160  msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG);
1162 }
1163 
1164 static int dmar_fault_do_one(struct intel_iommu *iommu, int type,
1165  u8 fault_reason, u16 source_id, unsigned long long addr)
1166 {
1167  const char *reason;
1168  int fault_type;
1169 
1170  reason = dmar_get_fault_reason(fault_reason, &fault_type);
1171 
1172  if (fault_type == INTR_REMAP)
1173  pr_err("INTR-REMAP: Request device [[%02x:%02x.%d] "
1174  "fault index %llx\n"
1175  "INTR-REMAP:[fault reason %02d] %s\n",
1176  (source_id >> 8), PCI_SLOT(source_id & 0xFF),
1177  PCI_FUNC(source_id & 0xFF), addr >> 48,
1178  fault_reason, reason);
1179  else
1180  pr_err("DMAR:[%s] Request device [%02x:%02x.%d] "
1181  "fault addr %llx \n"
1182  "DMAR:[fault reason %02d] %s\n",
1183  (type ? "DMA Read" : "DMA Write"),
1184  (source_id >> 8), PCI_SLOT(source_id & 0xFF),
1185  PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
1186  return 0;
1187 }
1188 
1189 #define PRIMARY_FAULT_REG_LEN (16)
1191 {
1192  struct intel_iommu *iommu = dev_id;
1193  int reg, fault_index;
1194  u32 fault_status;
1195  unsigned long flag;
1196 
1197  raw_spin_lock_irqsave(&iommu->register_lock, flag);
1198  fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1199  if (fault_status)
1200  pr_err("DRHD: handling fault status reg %x\n", fault_status);
1201 
1202  /* TBD: ignore advanced fault log currently */
1203  if (!(fault_status & DMA_FSTS_PPF))
1204  goto clear_rest;
1205 
1206  fault_index = dma_fsts_fault_record_index(fault_status);
1207  reg = cap_fault_reg_offset(iommu->cap);
1208  while (1) {
1209  u8 fault_reason;
1210  u16 source_id;
1211  u64 guest_addr;
1212  int type;
1213  u32 data;
1214 
1215  /* highest 32 bits */
1216  data = readl(iommu->reg + reg +
1217  fault_index * PRIMARY_FAULT_REG_LEN + 12);
1218  if (!(data & DMA_FRCD_F))
1219  break;
1220 
1221  fault_reason = dma_frcd_fault_reason(data);
1222  type = dma_frcd_type(data);
1223 
1224  data = readl(iommu->reg + reg +
1225  fault_index * PRIMARY_FAULT_REG_LEN + 8);
1226  source_id = dma_frcd_source_id(data);
1227 
1228  guest_addr = dmar_readq(iommu->reg + reg +
1229  fault_index * PRIMARY_FAULT_REG_LEN);
1230  guest_addr = dma_frcd_page_addr(guest_addr);
1231  /* clear the fault */
1232  writel(DMA_FRCD_F, iommu->reg + reg +
1233  fault_index * PRIMARY_FAULT_REG_LEN + 12);
1234 
1236 
1237  dmar_fault_do_one(iommu, type, fault_reason,
1238  source_id, guest_addr);
1239 
1240  fault_index++;
1241  if (fault_index >= cap_num_fault_regs(iommu->cap))
1242  fault_index = 0;
1243  raw_spin_lock_irqsave(&iommu->register_lock, flag);
1244  }
1245 clear_rest:
1246  /* clear all the other faults */
1247  fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1248  writel(fault_status, iommu->reg + DMAR_FSTS_REG);
1249 
1251  return IRQ_HANDLED;
1252 }
1253 
1255 {
1256  int irq, ret;
1257 
1258  /*
1259  * Check if the fault interrupt is already initialized.
1260  */
1261  if (iommu->irq)
1262  return 0;
1263 
1264  irq = create_irq();
1265  if (!irq) {
1266  pr_err("IOMMU: no free vectors\n");
1267  return -EINVAL;
1268  }
1269 
1270  irq_set_handler_data(irq, iommu);
1271  iommu->irq = irq;
1272 
1273  ret = arch_setup_dmar_msi(irq);
1274  if (ret) {
1275  irq_set_handler_data(irq, NULL);
1276  iommu->irq = 0;
1277  destroy_irq(irq);
1278  return ret;
1279  }
1280 
1281  ret = request_irq(irq, dmar_fault, IRQF_NO_THREAD, iommu->name, iommu);
1282  if (ret)
1283  pr_err("IOMMU: can't request irq\n");
1284  return ret;
1285 }
1286 
1288 {
1289  struct dmar_drhd_unit *drhd;
1290 
1291  /*
1292  * Enable fault control interrupt.
1293  */
1294  for_each_drhd_unit(drhd) {
1295  int ret;
1296  struct intel_iommu *iommu = drhd->iommu;
1297  ret = dmar_set_interrupt(iommu);
1298 
1299  if (ret) {
1300  pr_err("DRHD %Lx: failed to enable fault, interrupt, ret %d\n",
1301  (unsigned long long)drhd->reg_base_addr, ret);
1302  return -1;
1303  }
1304 
1305  /*
1306  * Clear any previous faults.
1307  */
1308  dmar_fault(iommu->irq, iommu);
1309  }
1310 
1311  return 0;
1312 }
1313 
1314 /*
1315  * Re-enable Queued Invalidation interface.
1316  */
1317 int dmar_reenable_qi(struct intel_iommu *iommu)
1318 {
1319  if (!ecap_qis(iommu->ecap))
1320  return -ENOENT;
1321 
1322  if (!iommu->qi)
1323  return -ENOENT;
1324 
1325  /*
1326  * First disable queued invalidation.
1327  */
1328  dmar_disable_qi(iommu);
1329  /*
1330  * Then enable queued invalidation again. Since there is no pending
1331  * invalidation requests now, it's safe to re-enable queued
1332  * invalidation.
1333  */
1334  __dmar_enable_qi(iommu);
1335 
1336  return 0;
1337 }
1338 
1339 /*
1340  * Check interrupt remapping support in DMAR table description.
1341  */
1343 {
1344  struct acpi_table_dmar *dmar;
1345  dmar = (struct acpi_table_dmar *)dmar_tbl;
1346  if (!dmar)
1347  return 0;
1348  return dmar->flags & 0x1;
1349 }