Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
pci.c
Go to the documentation of this file.
1 /*
2  * Support PCI/PCIe on PowerNV platforms
3  *
4  * Currently supports only P5IOC2
5  *
6  * Copyright 2011 Benjamin Herrenschmidt, IBM Corp.
7  *
8  * This program is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU General Public License
10  * as published by the Free Software Foundation; either version
11  * 2 of the License, or (at your option) any later version.
12  */
13 
14 #include <linux/kernel.h>
15 #include <linux/pci.h>
16 #include <linux/delay.h>
17 #include <linux/string.h>
18 #include <linux/init.h>
19 #include <linux/bootmem.h>
20 #include <linux/irq.h>
21 #include <linux/io.h>
22 #include <linux/msi.h>
23 
24 #include <asm/sections.h>
25 #include <asm/io.h>
26 #include <asm/prom.h>
27 #include <asm/pci-bridge.h>
28 #include <asm/machdep.h>
29 #include <asm/ppc-pci.h>
30 #include <asm/opal.h>
31 #include <asm/iommu.h>
32 #include <asm/tce.h>
33 #include <asm/firmware.h>
34 
35 #include "powernv.h"
36 #include "pci.h"
37 
38 /* Delay in usec */
39 #define PCI_RESET_DELAY_US 3000000
40 
41 #define cfg_dbg(fmt...) do { } while(0)
42 //#define cfg_dbg(fmt...) printk(fmt)
43 
44 #ifdef CONFIG_PCI_MSI
45 static int pnv_msi_check_device(struct pci_dev* pdev, int nvec, int type)
46 {
47  struct pci_controller *hose = pci_bus_to_host(pdev->bus);
48  struct pnv_phb *phb = hose->private_data;
49 
50  return (phb && phb->msi_map) ? 0 : -ENODEV;
51 }
52 
53 static unsigned int pnv_get_one_msi(struct pnv_phb *phb)
54 {
55  unsigned long flags;
56  unsigned int id, rc;
57 
58  spin_lock_irqsave(&phb->lock, flags);
59 
60  id = find_next_zero_bit(phb->msi_map, phb->msi_count, phb->msi_next);
61  if (id >= phb->msi_count && phb->msi_next)
62  id = find_next_zero_bit(phb->msi_map, phb->msi_count, 0);
63  if (id >= phb->msi_count) {
64  rc = 0;
65  goto out;
66  }
67  __set_bit(id, phb->msi_map);
68  rc = id + phb->msi_base;
69 out:
70  spin_unlock_irqrestore(&phb->lock, flags);
71  return rc;
72 }
73 
74 static void pnv_put_msi(struct pnv_phb *phb, unsigned int hwirq)
75 {
76  unsigned long flags;
77  unsigned int id;
78 
79  if (WARN_ON(hwirq < phb->msi_base ||
80  hwirq >= (phb->msi_base + phb->msi_count)))
81  return;
82  id = hwirq - phb->msi_base;
83 
84  spin_lock_irqsave(&phb->lock, flags);
85  __clear_bit(id, phb->msi_map);
86  spin_unlock_irqrestore(&phb->lock, flags);
87 }
88 
89 static int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
90 {
91  struct pci_controller *hose = pci_bus_to_host(pdev->bus);
92  struct pnv_phb *phb = hose->private_data;
93  struct msi_desc *entry;
94  struct msi_msg msg;
95  unsigned int hwirq, virq;
96  int rc;
97 
98  if (WARN_ON(!phb))
99  return -ENODEV;
100 
101  list_for_each_entry(entry, &pdev->msi_list, list) {
102  if (!entry->msi_attrib.is_64 && !phb->msi32_support) {
103  pr_warn("%s: Supports only 64-bit MSIs\n",
104  pci_name(pdev));
105  return -ENXIO;
106  }
107  hwirq = pnv_get_one_msi(phb);
108  if (!hwirq) {
109  pr_warn("%s: Failed to find a free MSI\n",
110  pci_name(pdev));
111  return -ENOSPC;
112  }
113  virq = irq_create_mapping(NULL, hwirq);
114  if (virq == NO_IRQ) {
115  pr_warn("%s: Failed to map MSI to linux irq\n",
116  pci_name(pdev));
117  pnv_put_msi(phb, hwirq);
118  return -ENOMEM;
119  }
120  rc = phb->msi_setup(phb, pdev, hwirq, entry->msi_attrib.is_64,
121  &msg);
122  if (rc) {
123  pr_warn("%s: Failed to setup MSI\n", pci_name(pdev));
124  irq_dispose_mapping(virq);
125  pnv_put_msi(phb, hwirq);
126  return rc;
127  }
128  irq_set_msi_desc(virq, entry);
129  write_msi_msg(virq, &msg);
130  }
131  return 0;
132 }
133 
134 static void pnv_teardown_msi_irqs(struct pci_dev *pdev)
135 {
136  struct pci_controller *hose = pci_bus_to_host(pdev->bus);
137  struct pnv_phb *phb = hose->private_data;
138  struct msi_desc *entry;
139 
140  if (WARN_ON(!phb))
141  return;
142 
143  list_for_each_entry(entry, &pdev->msi_list, list) {
144  if (entry->irq == NO_IRQ)
145  continue;
146  irq_set_msi_desc(entry->irq, NULL);
147  pnv_put_msi(phb, virq_to_hw(entry->irq));
148  irq_dispose_mapping(entry->irq);
149  }
150 }
151 #endif /* CONFIG_PCI_MSI */
152 
153 static void pnv_pci_dump_p7ioc_diag_data(struct pnv_phb *phb)
154 {
155  struct OpalIoP7IOCPhbErrorData *data = &phb->diag.p7ioc;
156  int i;
157 
158  pr_info("PHB %d diagnostic data:\n", phb->hose->global_number);
159 
160  pr_info(" brdgCtl = 0x%08x\n", data->brdgCtl);
161 
162  pr_info(" portStatusReg = 0x%08x\n", data->portStatusReg);
163  pr_info(" rootCmplxStatus = 0x%08x\n", data->rootCmplxStatus);
164  pr_info(" busAgentStatus = 0x%08x\n", data->busAgentStatus);
165 
166  pr_info(" deviceStatus = 0x%08x\n", data->deviceStatus);
167  pr_info(" slotStatus = 0x%08x\n", data->slotStatus);
168  pr_info(" linkStatus = 0x%08x\n", data->linkStatus);
169  pr_info(" devCmdStatus = 0x%08x\n", data->devCmdStatus);
170  pr_info(" devSecStatus = 0x%08x\n", data->devSecStatus);
171 
172  pr_info(" rootErrorStatus = 0x%08x\n", data->rootErrorStatus);
173  pr_info(" uncorrErrorStatus = 0x%08x\n", data->uncorrErrorStatus);
174  pr_info(" corrErrorStatus = 0x%08x\n", data->corrErrorStatus);
175  pr_info(" tlpHdr1 = 0x%08x\n", data->tlpHdr1);
176  pr_info(" tlpHdr2 = 0x%08x\n", data->tlpHdr2);
177  pr_info(" tlpHdr3 = 0x%08x\n", data->tlpHdr3);
178  pr_info(" tlpHdr4 = 0x%08x\n", data->tlpHdr4);
179  pr_info(" sourceId = 0x%08x\n", data->sourceId);
180 
181  pr_info(" errorClass = 0x%016llx\n", data->errorClass);
182  pr_info(" correlator = 0x%016llx\n", data->correlator);
183 
184  pr_info(" p7iocPlssr = 0x%016llx\n", data->p7iocPlssr);
185  pr_info(" p7iocCsr = 0x%016llx\n", data->p7iocCsr);
186  pr_info(" lemFir = 0x%016llx\n", data->lemFir);
187  pr_info(" lemErrorMask = 0x%016llx\n", data->lemErrorMask);
188  pr_info(" lemWOF = 0x%016llx\n", data->lemWOF);
189  pr_info(" phbErrorStatus = 0x%016llx\n", data->phbErrorStatus);
190  pr_info(" phbFirstErrorStatus = 0x%016llx\n", data->phbFirstErrorStatus);
191  pr_info(" phbErrorLog0 = 0x%016llx\n", data->phbErrorLog0);
192  pr_info(" phbErrorLog1 = 0x%016llx\n", data->phbErrorLog1);
193  pr_info(" mmioErrorStatus = 0x%016llx\n", data->mmioErrorStatus);
194  pr_info(" mmioFirstErrorStatus = 0x%016llx\n", data->mmioFirstErrorStatus);
195  pr_info(" mmioErrorLog0 = 0x%016llx\n", data->mmioErrorLog0);
196  pr_info(" mmioErrorLog1 = 0x%016llx\n", data->mmioErrorLog1);
197  pr_info(" dma0ErrorStatus = 0x%016llx\n", data->dma0ErrorStatus);
198  pr_info(" dma0FirstErrorStatus = 0x%016llx\n", data->dma0FirstErrorStatus);
199  pr_info(" dma0ErrorLog0 = 0x%016llx\n", data->dma0ErrorLog0);
200  pr_info(" dma0ErrorLog1 = 0x%016llx\n", data->dma0ErrorLog1);
201  pr_info(" dma1ErrorStatus = 0x%016llx\n", data->dma1ErrorStatus);
202  pr_info(" dma1FirstErrorStatus = 0x%016llx\n", data->dma1FirstErrorStatus);
203  pr_info(" dma1ErrorLog0 = 0x%016llx\n", data->dma1ErrorLog0);
204  pr_info(" dma1ErrorLog1 = 0x%016llx\n", data->dma1ErrorLog1);
205 
206  for (i = 0; i < OPAL_P7IOC_NUM_PEST_REGS; i++) {
207  if ((data->pestA[i] >> 63) == 0 &&
208  (data->pestB[i] >> 63) == 0)
209  continue;
210  pr_info(" PE[%3d] PESTA = 0x%016llx\n", i, data->pestA[i]);
211  pr_info(" PESTB = 0x%016llx\n", data->pestB[i]);
212  }
213 }
214 
215 static void pnv_pci_dump_phb_diag_data(struct pnv_phb *phb)
216 {
217  switch(phb->model) {
218  case PNV_PHB_MODEL_P7IOC:
219  pnv_pci_dump_p7ioc_diag_data(phb);
220  break;
221  default:
222  pr_warning("PCI %d: Can't decode this PHB diag data\n",
223  phb->hose->global_number);
224  }
225 }
226 
227 static void pnv_pci_handle_eeh_config(struct pnv_phb *phb, u32 pe_no)
228 {
229  unsigned long flags, rc;
230  int has_diag;
231 
232  spin_lock_irqsave(&phb->lock, flags);
233 
235  has_diag = (rc == OPAL_SUCCESS);
236 
237  rc = opal_pci_eeh_freeze_clear(phb->opal_id, pe_no,
239  if (rc) {
240  pr_warning("PCI %d: Failed to clear EEH freeze state"
241  " for PE#%d, err %ld\n",
242  phb->hose->global_number, pe_no, rc);
243 
244  /* For now, let's only display the diag buffer when we fail to clear
245  * the EEH status. We'll do more sensible things later when we have
246  * proper EEH support. We need to make sure we don't pollute ourselves
247  * with the normal errors generated when probing empty slots
248  */
249  if (has_diag)
250  pnv_pci_dump_phb_diag_data(phb);
251  else
252  pr_warning("PCI %d: No diag data available\n",
253  phb->hose->global_number);
254  }
255 
256  spin_unlock_irqrestore(&phb->lock, flags);
257 }
258 
259 static void pnv_pci_config_check_eeh(struct pnv_phb *phb, struct pci_bus *bus,
260  u32 bdfn)
261 {
262  s64 rc;
263  u8 fstate;
264  u16 pcierr;
265  u32 pe_no;
266 
267  /* Get PE# if we support IODA */
268  pe_no = phb->bdfn_to_pe ? phb->bdfn_to_pe(phb, bus, bdfn & 0xff) : 0;
269 
270  /* Read freeze status */
271  rc = opal_pci_eeh_freeze_status(phb->opal_id, pe_no, &fstate, &pcierr,
272  NULL);
273  if (rc) {
274  pr_warning("PCI %d: Failed to read EEH status for PE#%d,"
275  " err %lld\n", phb->hose->global_number, pe_no, rc);
276  return;
277  }
278  cfg_dbg(" -> EEH check, bdfn=%04x PE%d fstate=%x\n",
279  bdfn, pe_no, fstate);
280  if (fstate != 0)
281  pnv_pci_handle_eeh_config(phb, pe_no);
282 }
283 
284 static int pnv_pci_read_config(struct pci_bus *bus,
285  unsigned int devfn,
286  int where, int size, u32 *val)
287 {
288  struct pci_controller *hose = pci_bus_to_host(bus);
289  struct pnv_phb *phb = hose->private_data;
290  u32 bdfn = (((uint64_t)bus->number) << 8) | devfn;
291  s64 rc;
292 
293  if (hose == NULL)
295 
296  switch (size) {
297  case 1: {
298  u8 v8;
299  rc = opal_pci_config_read_byte(phb->opal_id, bdfn, where, &v8);
300  *val = (rc == OPAL_SUCCESS) ? v8 : 0xff;
301  break;
302  }
303  case 2: {
304  u16 v16;
305  rc = opal_pci_config_read_half_word(phb->opal_id, bdfn, where,
306  &v16);
307  *val = (rc == OPAL_SUCCESS) ? v16 : 0xffff;
308  break;
309  }
310  case 4: {
311  u32 v32;
312  rc = opal_pci_config_read_word(phb->opal_id, bdfn, where, &v32);
313  *val = (rc == OPAL_SUCCESS) ? v32 : 0xffffffff;
314  break;
315  }
316  default:
318  }
319  cfg_dbg("pnv_pci_read_config bus: %x devfn: %x +%x/%x -> %08x\n",
320  bus->number, devfn, where, size, *val);
321 
322  /* Check if the PHB got frozen due to an error (no response) */
323  pnv_pci_config_check_eeh(phb, bus, bdfn);
324 
325  return PCIBIOS_SUCCESSFUL;
326 }
327 
328 static int pnv_pci_write_config(struct pci_bus *bus,
329  unsigned int devfn,
330  int where, int size, u32 val)
331 {
332  struct pci_controller *hose = pci_bus_to_host(bus);
333  struct pnv_phb *phb = hose->private_data;
334  u32 bdfn = (((uint64_t)bus->number) << 8) | devfn;
335 
336  if (hose == NULL)
338 
339  cfg_dbg("pnv_pci_write_config bus: %x devfn: %x +%x/%x -> %08x\n",
340  bus->number, devfn, where, size, val);
341  switch (size) {
342  case 1:
343  opal_pci_config_write_byte(phb->opal_id, bdfn, where, val);
344  break;
345  case 2:
346  opal_pci_config_write_half_word(phb->opal_id, bdfn, where, val);
347  break;
348  case 4:
349  opal_pci_config_write_word(phb->opal_id, bdfn, where, val);
350  break;
351  default:
353  }
354  /* Check if the PHB got frozen due to an error (no response) */
355  pnv_pci_config_check_eeh(phb, bus, bdfn);
356 
357  return PCIBIOS_SUCCESSFUL;
358 }
359 
361  .read = pnv_pci_read_config,
362  .write = pnv_pci_write_config,
363 };
364 
365 
366 static void pnv_tce_invalidate(struct iommu_table *tbl,
367  u64 *startp, u64 *endp)
368 {
369  u64 __iomem *invalidate = (u64 __iomem *)tbl->it_index;
370  unsigned long start, end, inc;
371 
372  start = __pa(startp);
373  end = __pa(endp);
374 
375 
376  /* BML uses this case for p6/p7/galaxy2: Shift addr and put in node */
377  if (tbl->it_busno) {
378  start <<= 12;
379  end <<= 12;
380  inc = 128 << 12;
381  start |= tbl->it_busno;
382  end |= tbl->it_busno;
383  }
384  /* p7ioc-style invalidation, 2 TCEs per write */
385  else if (tbl->it_type & TCE_PCI_SWINV_PAIR) {
386  start |= (1ull << 63);
387  end |= (1ull << 63);
388  inc = 16;
389  }
390  /* Default (older HW) */
391  else
392  inc = 128;
393 
394  end |= inc - 1; /* round up end to be different than start */
395 
396  mb(); /* Ensure above stores are visible */
397  while (start <= end) {
398  __raw_writeq(start, invalidate);
399  start += inc;
400  }
401  /* The iommu layer will do another mb() for us on build() and
402  * we don't care on free()
403  */
404 }
405 
406 
407 static int pnv_tce_build(struct iommu_table *tbl, long index, long npages,
408  unsigned long uaddr, enum dma_data_direction direction,
409  struct dma_attrs *attrs)
410 {
411  u64 proto_tce;
412  u64 *tcep, *tces;
413  u64 rpn;
414 
415  proto_tce = TCE_PCI_READ; // Read allowed
416 
417  if (direction != DMA_TO_DEVICE)
418  proto_tce |= TCE_PCI_WRITE;
419 
420  tces = tcep = ((u64 *)tbl->it_base) + index - tbl->it_offset;
421  rpn = __pa(uaddr) >> TCE_SHIFT;
422 
423  while (npages--)
424  *(tcep++) = proto_tce | (rpn++ << TCE_RPN_SHIFT);
425 
426  /* Some implementations won't cache invalid TCEs and thus may not
427  * need that flush. We'll probably turn it_type into a bit mask
428  * of flags if that becomes the case
429  */
430  if (tbl->it_type & TCE_PCI_SWINV_CREATE)
431  pnv_tce_invalidate(tbl, tces, tcep - 1);
432 
433  return 0;
434 }
435 
436 static void pnv_tce_free(struct iommu_table *tbl, long index, long npages)
437 {
438  u64 *tcep, *tces;
439 
440  tces = tcep = ((u64 *)tbl->it_base) + index - tbl->it_offset;
441 
442  while (npages--)
443  *(tcep++) = 0;
444 
445  if (tbl->it_type & TCE_PCI_SWINV_FREE)
446  pnv_tce_invalidate(tbl, tces, tcep - 1);
447 }
448 
449 static unsigned long pnv_tce_get(struct iommu_table *tbl, long index)
450 {
451  return ((u64 *)tbl->it_base)[index - tbl->it_offset];
452 }
453 
455  void *tce_mem, u64 tce_size,
456  u64 dma_offset)
457 {
458  tbl->it_blocksize = 16;
459  tbl->it_base = (unsigned long)tce_mem;
460  tbl->it_offset = dma_offset >> IOMMU_PAGE_SHIFT;
461  tbl->it_index = 0;
462  tbl->it_size = tce_size >> 3;
463  tbl->it_busno = 0;
464  tbl->it_type = TCE_PCI;
465 }
466 
467 static struct iommu_table * __devinit
468 pnv_pci_setup_bml_iommu(struct pci_controller *hose)
469 {
470  struct iommu_table *tbl;
471  const __be64 *basep, *swinvp;
472  const __be32 *sizep;
473 
474  basep = of_get_property(hose->dn, "linux,tce-base", NULL);
475  sizep = of_get_property(hose->dn, "linux,tce-size", NULL);
476  if (basep == NULL || sizep == NULL) {
477  pr_err("PCI: %s has missing tce entries !\n",
478  hose->dn->full_name);
479  return NULL;
480  }
481  tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, hose->node);
482  if (WARN_ON(!tbl))
483  return NULL;
485  be32_to_cpup(sizep), 0);
486  iommu_init_table(tbl, hose->node);
487 
488  /* Deal with SW invalidated TCEs when needed (BML way) */
489  swinvp = of_get_property(hose->dn, "linux,tce-sw-invalidate-info",
490  NULL);
491  if (swinvp) {
492  tbl->it_busno = swinvp[1];
493  tbl->it_index = (unsigned long)ioremap(swinvp[0], 8);
494  tbl->it_type = TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE;
495  }
496  return tbl;
497 }
498 
499 static void __devinit pnv_pci_dma_fallback_setup(struct pci_controller *hose,
500  struct pci_dev *pdev)
501 {
502  struct device_node *np = pci_bus_to_OF_node(hose->bus);
503  struct pci_dn *pdn;
504 
505  if (np == NULL)
506  return;
507  pdn = PCI_DN(np);
508  if (!pdn->iommu_table)
509  pdn->iommu_table = pnv_pci_setup_bml_iommu(hose);
510  if (!pdn->iommu_table)
511  return;
512  set_iommu_table_base(&pdev->dev, pdn->iommu_table);
513 }
514 
515 static void __devinit pnv_pci_dma_dev_setup(struct pci_dev *pdev)
516 {
517  struct pci_controller *hose = pci_bus_to_host(pdev->bus);
518  struct pnv_phb *phb = hose->private_data;
519 
520  /* If we have no phb structure, try to setup a fallback based on
521  * the device-tree (RTAS PCI for example)
522  */
523  if (phb && phb->dma_dev_setup)
524  phb->dma_dev_setup(phb, pdev);
525  else
526  pnv_pci_dma_fallback_setup(hose, pdev);
527 }
528 
529 /* Fixup wrong class code in p7ioc root complex */
530 static void __devinit pnv_p7ioc_rc_quirk(struct pci_dev *dev)
531 {
532  dev->class = PCI_CLASS_BRIDGE_PCI << 8;
533 }
534 DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_IBM, 0x3b9, pnv_p7ioc_rc_quirk);
535 
536 static int pnv_pci_probe_mode(struct pci_bus *bus)
537 {
538  struct pci_controller *hose = pci_bus_to_host(bus);
539  const __be64 *tstamp;
540  u64 now, target;
541 
542 
543  /* We hijack this as a way to ensure we have waited long
544  * enough since the reset was lifted on the PCI bus
545  */
546  if (bus != hose->bus)
547  return PCI_PROBE_NORMAL;
548  tstamp = of_get_property(hose->dn, "reset-clear-timestamp", NULL);
549  if (!tstamp || !*tstamp)
550  return PCI_PROBE_NORMAL;
551 
552  now = mftb() / tb_ticks_per_usec;
553  target = (be64_to_cpup(tstamp) / tb_ticks_per_usec)
555 
556  pr_devel("pci %04d: Reset target: 0x%llx now: 0x%llx\n",
557  hose->global_number, target, now);
558 
559  if (now < target)
560  msleep((target - now + 999) / 1000);
561 
562  return PCI_PROBE_NORMAL;
563 }
564 
566 {
567  struct device_node *np;
568 
569  pci_add_flags(PCI_CAN_SKIP_ISA_ALIGN);
570 
571  /* OPAL absent, try POPAL first then RTAS detection of PHBs */
572  if (!firmware_has_feature(FW_FEATURE_OPAL)) {
573 #ifdef CONFIG_PPC_POWERNV_RTAS
576 #endif /* CONFIG_PPC_POWERNV_RTAS */
577  }
578  /* OPAL is here, do our normal stuff */
579  else {
580  int found_ioda = 0;
581 
582  /* Look for IODA IO-Hubs. We don't support mixing IODA
583  * and p5ioc2 due to the need to change some global
584  * probing flags
585  */
586  for_each_compatible_node(np, NULL, "ibm,ioda-hub") {
588  found_ioda = 1;
589  }
590 
591  /* Look for p5ioc2 IO-Hubs */
592  if (!found_ioda)
593  for_each_compatible_node(np, NULL, "ibm,p5ioc2")
595  }
596 
597  /* Setup the linkage between OF nodes and PHBs */
599 
600  /* Configure IOMMU DMA hooks */
601  ppc_md.pci_dma_dev_setup = pnv_pci_dma_dev_setup;
602  ppc_md.tce_build = pnv_tce_build;
603  ppc_md.tce_free = pnv_tce_free;
604  ppc_md.tce_get = pnv_tce_get;
605  ppc_md.pci_probe_mode = pnv_pci_probe_mode;
607 
608  /* Configure MSIs */
609 #ifdef CONFIG_PCI_MSI
610  ppc_md.msi_check_device = pnv_msi_check_device;
611  ppc_md.setup_msi_irqs = pnv_setup_msi_irqs;
612  ppc_md.teardown_msi_irqs = pnv_teardown_msi_irqs;
613 #endif
614 }