Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
pci-ioda.c
Go to the documentation of this file.
1 /*
2  * Support PCI/PCIe on PowerNV platforms
3  *
4  * Copyright 2011 Benjamin Herrenschmidt, IBM Corp.
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version
9  * 2 of the License, or (at your option) any later version.
10  */
11 
12 #undef DEBUG
13 
14 #include <linux/kernel.h>
15 #include <linux/pci.h>
16 #include <linux/delay.h>
17 #include <linux/string.h>
18 #include <linux/init.h>
19 #include <linux/bootmem.h>
20 #include <linux/irq.h>
21 #include <linux/io.h>
22 #include <linux/msi.h>
23 
24 #include <asm/sections.h>
25 #include <asm/io.h>
26 #include <asm/prom.h>
27 #include <asm/pci-bridge.h>
28 #include <asm/machdep.h>
29 #include <asm/ppc-pci.h>
30 #include <asm/opal.h>
31 #include <asm/iommu.h>
32 #include <asm/tce.h>
33 
34 #include "powernv.h"
35 #include "pci.h"
36 
37 static int __pe_printk(const char *level, const struct pnv_ioda_pe *pe,
38  struct va_format *vaf)
39 {
40  char pfix[32];
41 
42  if (pe->pdev)
43  strlcpy(pfix, dev_name(&pe->pdev->dev), sizeof(pfix));
44  else
45  sprintf(pfix, "%04x:%02x ",
46  pci_domain_nr(pe->pbus), pe->pbus->number);
47  return printk("pci %s%s: [PE# %.3d] %pV", level, pfix, pe->pe_number, vaf);
48 }
49 
50 #define define_pe_printk_level(func, kern_level) \
51 static int func(const struct pnv_ioda_pe *pe, const char *fmt, ...) \
52 { \
53  struct va_format vaf; \
54  va_list args; \
55  int r; \
56  \
57  va_start(args, fmt); \
58  \
59  vaf.fmt = fmt; \
60  vaf.va = &args; \
61  \
62  r = __pe_printk(kern_level, pe, &vaf); \
63  va_end(args); \
64  \
65  return r; \
66 } \
67 
71 
72 static struct pci_dn *pnv_ioda_get_pdn(struct pci_dev *dev)
73 {
74  struct device_node *np;
75 
76  np = pci_device_to_OF_node(dev);
77  if (!np)
78  return NULL;
79  return PCI_DN(np);
80 }
81 
82 static int __devinit pnv_ioda_alloc_pe(struct pnv_phb *phb)
83 {
84  unsigned long pe;
85 
86  do {
87  pe = find_next_zero_bit(phb->ioda.pe_alloc,
88  phb->ioda.total_pe, 0);
89  if (pe >= phb->ioda.total_pe)
90  return IODA_INVALID_PE;
91  } while(test_and_set_bit(pe, phb->ioda.pe_alloc));
92 
93  phb->ioda.pe_array[pe].pe_number = pe;
94  return pe;
95 }
96 
97 static void __devinit pnv_ioda_free_pe(struct pnv_phb *phb, int pe)
98 {
99  WARN_ON(phb->ioda.pe_array[pe].pdev);
100 
101  memset(&phb->ioda.pe_array[pe], 0, sizeof(struct pnv_ioda_pe));
102  clear_bit(pe, phb->ioda.pe_alloc);
103 }
104 
105 /* Currently those 2 are only used when MSIs are enabled, this will change
106  * but in the meantime, we need to protect them to avoid warnings
107  */
108 #ifdef CONFIG_PCI_MSI
109 static struct pnv_ioda_pe * __devinit pnv_ioda_get_pe(struct pci_dev *dev)
110 {
111  struct pci_controller *hose = pci_bus_to_host(dev->bus);
112  struct pnv_phb *phb = hose->private_data;
113  struct pci_dn *pdn = pnv_ioda_get_pdn(dev);
114 
115  if (!pdn)
116  return NULL;
117  if (pdn->pe_number == IODA_INVALID_PE)
118  return NULL;
119  return &phb->ioda.pe_array[pdn->pe_number];
120 }
121 #endif /* CONFIG_PCI_MSI */
122 
123 static int __devinit pnv_ioda_configure_pe(struct pnv_phb *phb,
124  struct pnv_ioda_pe *pe)
125 {
126  struct pci_dev *parent;
127  uint8_t bcomp, dcomp, fcomp;
128  long rc, rid_end, rid;
129 
130  /* Bus validation ? */
131  if (pe->pbus) {
132  int count;
133 
136  parent = pe->pbus->self;
137  if (pe->flags & PNV_IODA_PE_BUS_ALL)
138  count = pe->pbus->busn_res.end - pe->pbus->busn_res.start + 1;
139  else
140  count = 1;
141 
142  switch(count) {
143  case 1: bcomp = OpalPciBusAll; break;
144  case 2: bcomp = OpalPciBus7Bits; break;
145  case 4: bcomp = OpalPciBus6Bits; break;
146  case 8: bcomp = OpalPciBus5Bits; break;
147  case 16: bcomp = OpalPciBus4Bits; break;
148  case 32: bcomp = OpalPciBus3Bits; break;
149  default:
150  pr_err("%s: Number of subordinate busses %d"
151  " unsupported\n",
152  pci_name(pe->pbus->self), count);
153  /* Do an exact match only */
154  bcomp = OpalPciBusAll;
155  }
156  rid_end = pe->rid + (count << 8);
157  } else {
158  parent = pe->pdev->bus->self;
159  bcomp = OpalPciBusAll;
162  rid_end = pe->rid + 1;
163  }
164 
165  /* Associate PE in PELT */
166  rc = opal_pci_set_pe(phb->opal_id, pe->pe_number, pe->rid,
167  bcomp, dcomp, fcomp, OPAL_MAP_PE);
168  if (rc) {
169  pe_err(pe, "OPAL error %ld trying to setup PELT table\n", rc);
170  return -ENXIO;
171  }
174 
175  /* Add to all parents PELT-V */
176  while (parent) {
177  struct pci_dn *pdn = pnv_ioda_get_pdn(parent);
178  if (pdn && pdn->pe_number != IODA_INVALID_PE) {
179  rc = opal_pci_set_peltv(phb->opal_id, pdn->pe_number,
181  /* XXX What to do in case of error ? */
182  }
183  parent = parent->bus->self;
184  }
185  /* Setup reverse map */
186  for (rid = pe->rid; rid < rid_end; rid++)
187  phb->ioda.pe_rmap[rid] = pe->pe_number;
188 
189  /* Setup one MVTs on IODA1 */
190  if (phb->type == PNV_PHB_IODA1) {
191  pe->mve_number = pe->pe_number;
192  rc = opal_pci_set_mve(phb->opal_id, pe->mve_number,
193  pe->pe_number);
194  if (rc) {
195  pe_err(pe, "OPAL error %ld setting up MVE %d\n",
196  rc, pe->mve_number);
197  pe->mve_number = -1;
198  } else {
201  if (rc) {
202  pe_err(pe, "OPAL error %ld enabling MVE %d\n",
203  rc, pe->mve_number);
204  pe->mve_number = -1;
205  }
206  }
207  } else if (phb->type == PNV_PHB_IODA2)
208  pe->mve_number = 0;
209 
210  return 0;
211 }
212 
213 static void __devinit pnv_ioda_link_pe_by_weight(struct pnv_phb *phb,
214  struct pnv_ioda_pe *pe)
215 {
216  struct pnv_ioda_pe *lpe;
217 
218  list_for_each_entry(lpe, &phb->ioda.pe_dma_list, dma_link) {
219  if (lpe->dma_weight < pe->dma_weight) {
220  list_add_tail(&pe->dma_link, &lpe->dma_link);
221  return;
222  }
223  }
224  list_add_tail(&pe->dma_link, &phb->ioda.pe_dma_list);
225 }
226 
227 static unsigned int pnv_ioda_dma_weight(struct pci_dev *dev)
228 {
229  /* This is quite simplistic. The "base" weight of a device
230  * is 10. 0 means no DMA is to be accounted for it.
231  */
232 
233  /* If it's a bridge, no DMA */
234  if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL)
235  return 0;
236 
237  /* Reduce the weight of slow USB controllers */
238  if (dev->class == PCI_CLASS_SERIAL_USB_UHCI ||
241  return 3;
242 
243  /* Increase the weight of RAID (includes Obsidian) */
244  if ((dev->class >> 8) == PCI_CLASS_STORAGE_RAID)
245  return 15;
246 
247  /* Default */
248  return 10;
249 }
250 
251 #if 0
252 static struct pnv_ioda_pe * __devinit pnv_ioda_setup_dev_PE(struct pci_dev *dev)
253 {
254  struct pci_controller *hose = pci_bus_to_host(dev->bus);
255  struct pnv_phb *phb = hose->private_data;
256  struct pci_dn *pdn = pnv_ioda_get_pdn(dev);
257  struct pnv_ioda_pe *pe;
258  int pe_num;
259 
260  if (!pdn) {
261  pr_err("%s: Device tree node not associated properly\n",
262  pci_name(dev));
263  return NULL;
264  }
265  if (pdn->pe_number != IODA_INVALID_PE)
266  return NULL;
267 
268  /* PE#0 has been pre-set */
269  if (dev->bus->number == 0)
270  pe_num = 0;
271  else
272  pe_num = pnv_ioda_alloc_pe(phb);
273  if (pe_num == IODA_INVALID_PE) {
274  pr_warning("%s: Not enough PE# available, disabling device\n",
275  pci_name(dev));
276  return NULL;
277  }
278 
279  /* NOTE: We get only one ref to the pci_dev for the pdn, not for the
280  * pointer in the PE data structure, both should be destroyed at the
281  * same time. However, this needs to be looked at more closely again
282  * once we actually start removing things (Hotplug, SR-IOV, ...)
283  *
284  * At some point we want to remove the PDN completely anyways
285  */
286  pe = &phb->ioda.pe_array[pe_num];
287  pci_dev_get(dev);
288  pdn->pcidev = dev;
289  pdn->pe_number = pe_num;
290  pe->pdev = dev;
291  pe->pbus = NULL;
292  pe->tce32_seg = -1;
293  pe->mve_number = -1;
294  pe->rid = dev->bus->number << 8 | pdn->devfn;
295 
296  pe_info(pe, "Associated device to PE\n");
297 
298  if (pnv_ioda_configure_pe(phb, pe)) {
299  /* XXX What do we do here ? */
300  if (pe_num)
301  pnv_ioda_free_pe(phb, pe_num);
302  pdn->pe_number = IODA_INVALID_PE;
303  pe->pdev = NULL;
304  pci_dev_put(dev);
305  return NULL;
306  }
307 
308  /* Assign a DMA weight to the device */
309  pe->dma_weight = pnv_ioda_dma_weight(dev);
310  if (pe->dma_weight != 0) {
311  phb->ioda.dma_weight += pe->dma_weight;
312  phb->ioda.dma_pe_count++;
313  }
314 
315  /* Link the PE */
316  pnv_ioda_link_pe_by_weight(phb, pe);
317 
318  return pe;
319 }
320 #endif /* Useful for SRIOV case */
321 
322 static void pnv_ioda_setup_same_PE(struct pci_bus *bus, struct pnv_ioda_pe *pe)
323 {
324  struct pci_dev *dev;
325 
326  list_for_each_entry(dev, &bus->devices, bus_list) {
327  struct pci_dn *pdn = pnv_ioda_get_pdn(dev);
328 
329  if (pdn == NULL) {
330  pr_warn("%s: No device node associated with device !\n",
331  pci_name(dev));
332  continue;
333  }
334  pci_dev_get(dev);
335  pdn->pcidev = dev;
336  pdn->pe_number = pe->pe_number;
337  pe->dma_weight += pnv_ioda_dma_weight(dev);
338  if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate)
339  pnv_ioda_setup_same_PE(dev->subordinate, pe);
340  }
341 }
342 
343 /*
344  * There're 2 types of PCI bus sensitive PEs: One that is compromised of
345  * single PCI bus. Another one that contains the primary PCI bus and its
346  * subordinate PCI devices and buses. The second type of PE is normally
347  * orgiriated by PCIe-to-PCI bridge or PLX switch downstream ports.
348  */
349 static void __devinit pnv_ioda_setup_bus_PE(struct pci_bus *bus, int all)
350 {
351  struct pci_controller *hose = pci_bus_to_host(bus);
352  struct pnv_phb *phb = hose->private_data;
353  struct pnv_ioda_pe *pe;
354  int pe_num;
355 
356  pe_num = pnv_ioda_alloc_pe(phb);
357  if (pe_num == IODA_INVALID_PE) {
358  pr_warning("%s: Not enough PE# available for PCI bus %04x:%02x\n",
359  __func__, pci_domain_nr(bus), bus->number);
360  return;
361  }
362 
363  pe = &phb->ioda.pe_array[pe_num];
364  pe->flags = (all ? PNV_IODA_PE_BUS_ALL : PNV_IODA_PE_BUS);
365  pe->pbus = bus;
366  pe->pdev = NULL;
367  pe->tce32_seg = -1;
368  pe->mve_number = -1;
369  pe->rid = bus->busn_res.start << 8;
370  pe->dma_weight = 0;
371 
372  if (all)
373  pe_info(pe, "Secondary bus %d..%d associated with PE#%d\n",
374  bus->busn_res.start, bus->busn_res.end, pe_num);
375  else
376  pe_info(pe, "Secondary bus %d associated with PE#%d\n",
377  bus->busn_res.start, pe_num);
378 
379  if (pnv_ioda_configure_pe(phb, pe)) {
380  /* XXX What do we do here ? */
381  if (pe_num)
382  pnv_ioda_free_pe(phb, pe_num);
383  pe->pbus = NULL;
384  return;
385  }
386 
387  /* Associate it with all child devices */
388  pnv_ioda_setup_same_PE(bus, pe);
389 
390  /* Put PE to the list */
391  list_add_tail(&pe->list, &phb->ioda.pe_list);
392 
393  /* Account for one DMA PE if at least one DMA capable device exist
394  * below the bridge
395  */
396  if (pe->dma_weight != 0) {
397  phb->ioda.dma_weight += pe->dma_weight;
398  phb->ioda.dma_pe_count++;
399  }
400 
401  /* Link the PE */
402  pnv_ioda_link_pe_by_weight(phb, pe);
403 }
404 
405 static void __devinit pnv_ioda_setup_PEs(struct pci_bus *bus)
406 {
407  struct pci_dev *dev;
408 
409  pnv_ioda_setup_bus_PE(bus, 0);
410 
411  list_for_each_entry(dev, &bus->devices, bus_list) {
412  if (dev->subordinate) {
413  if (pci_pcie_type(dev) == PCI_EXP_TYPE_PCI_BRIDGE)
414  pnv_ioda_setup_bus_PE(dev->subordinate, 1);
415  else
416  pnv_ioda_setup_PEs(dev->subordinate);
417  }
418  }
419 }
420 
421 /*
422  * Configure PEs so that the downstream PCI buses and devices
423  * could have their associated PE#. Unfortunately, we didn't
424  * figure out the way to identify the PLX bridge yet. So we
425  * simply put the PCI bus and the subordinate behind the root
426  * port to PE# here. The game rule here is expected to be changed
427  * as soon as we can detected PLX bridge correctly.
428  */
429 static void __devinit pnv_pci_ioda_setup_PEs(void)
430 {
431  struct pci_controller *hose, *tmp;
432 
433  list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
434  pnv_ioda_setup_PEs(hose->bus);
435  }
436 }
437 
438 static void __devinit pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb,
439  struct pci_dev *dev)
440 {
441  /* We delay DMA setup after we have assigned all PE# */
442 }
443 
444 static void __devinit pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe,
445  struct pci_bus *bus)
446 {
447  struct pci_dev *dev;
448 
449  list_for_each_entry(dev, &bus->devices, bus_list) {
450  set_iommu_table_base(&dev->dev, &pe->tce32_table);
451  if (dev->subordinate)
452  pnv_ioda_setup_bus_dma(pe, dev->subordinate);
453  }
454 }
455 
456 static void __devinit pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
457  struct pnv_ioda_pe *pe,
458  unsigned int base,
459  unsigned int segs)
460 {
461 
462  struct page *tce_mem = NULL;
463  const __be64 *swinvp;
464  struct iommu_table *tbl;
465  unsigned int i;
466  int64_t rc;
467  void *addr;
468 
469  /* 256M DMA window, 4K TCE pages, 8 bytes TCE */
470 #define TCE32_TABLE_SIZE ((0x10000000 / 0x1000) * 8)
471 
472  /* XXX FIXME: Handle 64-bit only DMA devices */
473  /* XXX FIXME: Provide 64-bit DMA facilities & non-4K TCE tables etc.. */
474  /* XXX FIXME: Allocate multi-level tables on PHB3 */
475 
476  /* We shouldn't already have a 32-bit DMA associated */
477  if (WARN_ON(pe->tce32_seg >= 0))
478  return;
479 
480  /* Grab a 32-bit TCE table */
481  pe->tce32_seg = base;
482  pe_info(pe, " Setting up 32-bit TCE table at %08x..%08x\n",
483  (base << 28), ((base + segs) << 28) - 1);
484 
485  /* XXX Currently, we allocate one big contiguous table for the
486  * TCEs. We only really need one chunk per 256M of TCE space
487  * (ie per segment) but that's an optimization for later, it
488  * requires some added smarts with our get/put_tce implementation
489  */
490  tce_mem = alloc_pages_node(phb->hose->node, GFP_KERNEL,
491  get_order(TCE32_TABLE_SIZE * segs));
492  if (!tce_mem) {
493  pe_err(pe, " Failed to allocate a 32-bit TCE memory\n");
494  goto fail;
495  }
496  addr = page_address(tce_mem);
497  memset(addr, 0, TCE32_TABLE_SIZE * segs);
498 
499  /* Configure HW */
500  for (i = 0; i < segs; i++) {
502  pe->pe_number,
503  base + i, 1,
504  __pa(addr) + TCE32_TABLE_SIZE * i,
505  TCE32_TABLE_SIZE, 0x1000);
506  if (rc) {
507  pe_err(pe, " Failed to configure 32-bit TCE table,"
508  " err %ld\n", rc);
509  goto fail;
510  }
511  }
512 
513  /* Setup linux iommu table */
514  tbl = &pe->tce32_table;
515  pnv_pci_setup_iommu_table(tbl, addr, TCE32_TABLE_SIZE * segs,
516  base << 28);
517 
518  /* OPAL variant of P7IOC SW invalidated TCEs */
519  swinvp = of_get_property(phb->hose->dn, "ibm,opal-tce-kill", NULL);
520  if (swinvp) {
521  /* We need a couple more fields -- an address and a data
522  * to or. Since the bus is only printed out on table free
523  * errors, and on the first pass the data will be a relative
524  * bus number, print that out instead.
525  */
526  tbl->it_busno = 0;
527  tbl->it_index = (unsigned long)ioremap(be64_to_cpup(swinvp), 8);
528  tbl->it_type = TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE
529  | TCE_PCI_SWINV_PAIR;
530  }
531  iommu_init_table(tbl, phb->hose->node);
532 
533  if (pe->pdev)
534  set_iommu_table_base(&pe->pdev->dev, tbl);
535  else
536  pnv_ioda_setup_bus_dma(pe, pe->pbus);
537 
538  return;
539  fail:
540  /* XXX Failure: Try to fallback to 64-bit only ? */
541  if (pe->tce32_seg >= 0)
542  pe->tce32_seg = -1;
543  if (tce_mem)
544  __free_pages(tce_mem, get_order(TCE32_TABLE_SIZE * segs));
545 }
546 
547 static void __devinit pnv_ioda_setup_dma(struct pnv_phb *phb)
548 {
549  struct pci_controller *hose = phb->hose;
550  unsigned int residual, remaining, segs, tw, base;
551  struct pnv_ioda_pe *pe;
552 
553  /* If we have more PE# than segments available, hand out one
554  * per PE until we run out and let the rest fail. If not,
555  * then we assign at least one segment per PE, plus more based
556  * on the amount of devices under that PE
557  */
558  if (phb->ioda.dma_pe_count > phb->ioda.tce32_count)
559  residual = 0;
560  else
561  residual = phb->ioda.tce32_count -
562  phb->ioda.dma_pe_count;
563 
564  pr_info("PCI: Domain %04x has %ld available 32-bit DMA segments\n",
565  hose->global_number, phb->ioda.tce32_count);
566  pr_info("PCI: %d PE# for a total weight of %d\n",
567  phb->ioda.dma_pe_count, phb->ioda.dma_weight);
568 
569  /* Walk our PE list and configure their DMA segments, hand them
570  * out one base segment plus any residual segments based on
571  * weight
572  */
573  remaining = phb->ioda.tce32_count;
574  tw = phb->ioda.dma_weight;
575  base = 0;
576  list_for_each_entry(pe, &phb->ioda.pe_dma_list, dma_link) {
577  if (!pe->dma_weight)
578  continue;
579  if (!remaining) {
580  pe_warn(pe, "No DMA32 resources available\n");
581  continue;
582  }
583  segs = 1;
584  if (residual) {
585  segs += ((pe->dma_weight * residual) + (tw / 2)) / tw;
586  if (segs > remaining)
587  segs = remaining;
588  }
589  pe_info(pe, "DMA weight %d, assigned %d DMA32 segments\n",
590  pe->dma_weight, segs);
591  pnv_pci_ioda_setup_dma_pe(phb, pe, base, segs);
592  remaining -= segs;
593  base += segs;
594  }
595 }
596 
597 #ifdef CONFIG_PCI_MSI
598 static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev,
599  unsigned int hwirq, unsigned int is_64,
600  struct msi_msg *msg)
601 {
602  struct pnv_ioda_pe *pe = pnv_ioda_get_pe(dev);
603  unsigned int xive_num = hwirq - phb->msi_base;
605  uint32_t addr32, data;
606  int rc;
607 
608  /* No PE assigned ? bail out ... no MSI for you ! */
609  if (pe == NULL)
610  return -ENXIO;
611 
612  /* Check if we have an MVE */
613  if (pe->mve_number < 0)
614  return -ENXIO;
615 
616  /* Assign XIVE to PE */
617  rc = opal_pci_set_xive_pe(phb->opal_id, pe->pe_number, xive_num);
618  if (rc) {
619  pr_warn("%s: OPAL error %d setting XIVE %d PE\n",
620  pci_name(dev), rc, xive_num);
621  return -EIO;
622  }
623 
624  if (is_64) {
625  rc = opal_get_msi_64(phb->opal_id, pe->mve_number, xive_num, 1,
626  &addr64, &data);
627  if (rc) {
628  pr_warn("%s: OPAL error %d getting 64-bit MSI data\n",
629  pci_name(dev), rc);
630  return -EIO;
631  }
632  msg->address_hi = addr64 >> 32;
633  msg->address_lo = addr64 & 0xfffffffful;
634  } else {
635  rc = opal_get_msi_32(phb->opal_id, pe->mve_number, xive_num, 1,
636  &addr32, &data);
637  if (rc) {
638  pr_warn("%s: OPAL error %d getting 32-bit MSI data\n",
639  pci_name(dev), rc);
640  return -EIO;
641  }
642  msg->address_hi = 0;
643  msg->address_lo = addr32;
644  }
645  msg->data = data;
646 
647  pr_devel("%s: %s-bit MSI on hwirq %x (xive #%d),"
648  " address=%x_%08x data=%x PE# %d\n",
649  pci_name(dev), is_64 ? "64" : "32", hwirq, xive_num,
650  msg->address_hi, msg->address_lo, data, pe->pe_number);
651 
652  return 0;
653 }
654 
655 static void pnv_pci_init_ioda_msis(struct pnv_phb *phb)
656 {
657  unsigned int bmap_size;
658  const __be32 *prop = of_get_property(phb->hose->dn,
659  "ibm,opal-msi-ranges", NULL);
660  if (!prop) {
661  /* BML Fallback */
662  prop = of_get_property(phb->hose->dn, "msi-ranges", NULL);
663  }
664  if (!prop)
665  return;
666 
667  phb->msi_base = be32_to_cpup(prop);
668  phb->msi_count = be32_to_cpup(prop + 1);
669  bmap_size = BITS_TO_LONGS(phb->msi_count) * sizeof(unsigned long);
670  phb->msi_map = zalloc_maybe_bootmem(bmap_size, GFP_KERNEL);
671  if (!phb->msi_map) {
672  pr_err("PCI %d: Failed to allocate MSI bitmap !\n",
673  phb->hose->global_number);
674  return;
675  }
676  phb->msi_setup = pnv_pci_ioda_msi_setup;
677  phb->msi32_support = 1;
678  pr_info(" Allocated bitmap for %d MSIs (base IRQ 0x%x)\n",
679  phb->msi_count, phb->msi_base);
680 }
681 #else
682 static void pnv_pci_init_ioda_msis(struct pnv_phb *phb) { }
683 #endif /* CONFIG_PCI_MSI */
684 
685 /*
686  * This function is supposed to be called on basis of PE from top
687  * to bottom style. So the the I/O or MMIO segment assigned to
688  * parent PE could be overrided by its child PEs if necessary.
689  */
690 static void __devinit pnv_ioda_setup_pe_seg(struct pci_controller *hose,
691  struct pnv_ioda_pe *pe)
692 {
693  struct pnv_phb *phb = hose->private_data;
694  struct pci_bus_region region;
695  struct resource *res;
696  int i, index;
697  int rc;
698 
699  /*
700  * NOTE: We only care PCI bus based PE for now. For PCI
701  * device based PE, for example SRIOV sensitive VF should
702  * be figured out later.
703  */
705 
706  pci_bus_for_each_resource(pe->pbus, res, i) {
707  if (!res || !res->flags ||
708  res->start > res->end)
709  continue;
710 
711  if (res->flags & IORESOURCE_IO) {
712  region.start = res->start - phb->ioda.io_pci_base;
713  region.end = res->end - phb->ioda.io_pci_base;
714  index = region.start / phb->ioda.io_segsize;
715 
716  while (index < phb->ioda.total_pe &&
717  region.start <= region.end) {
718  phb->ioda.io_segmap[index] = pe->pe_number;
720  pe->pe_number, OPAL_IO_WINDOW_TYPE, 0, index);
721  if (rc != OPAL_SUCCESS) {
722  pr_err("%s: OPAL error %d when mapping IO "
723  "segment #%d to PE#%d\n",
724  __func__, rc, index, pe->pe_number);
725  break;
726  }
727 
728  region.start += phb->ioda.io_segsize;
729  index++;
730  }
731  } else if (res->flags & IORESOURCE_MEM) {
732  region.start = res->start -
733  hose->pci_mem_offset -
734  phb->ioda.m32_pci_base;
735  region.end = res->end -
736  hose->pci_mem_offset -
737  phb->ioda.m32_pci_base;
738  index = region.start / phb->ioda.m32_segsize;
739 
740  while (index < phb->ioda.total_pe &&
741  region.start <= region.end) {
742  phb->ioda.m32_segmap[index] = pe->pe_number;
744  pe->pe_number, OPAL_M32_WINDOW_TYPE, 0, index);
745  if (rc != OPAL_SUCCESS) {
746  pr_err("%s: OPAL error %d when mapping M32 "
747  "segment#%d to PE#%d",
748  __func__, rc, index, pe->pe_number);
749  break;
750  }
751 
752  region.start += phb->ioda.m32_segsize;
753  index++;
754  }
755  }
756  }
757 }
758 
759 static void __devinit pnv_pci_ioda_setup_seg(void)
760 {
761  struct pci_controller *tmp, *hose;
762  struct pnv_phb *phb;
763  struct pnv_ioda_pe *pe;
764 
765  list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
766  phb = hose->private_data;
767  list_for_each_entry(pe, &phb->ioda.pe_list, list) {
768  pnv_ioda_setup_pe_seg(hose, pe);
769  }
770  }
771 }
772 
773 static void __devinit pnv_pci_ioda_setup_DMA(void)
774 {
775  struct pci_controller *hose, *tmp;
776  struct pnv_phb *phb;
777 
778  list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
779  pnv_ioda_setup_dma(hose->private_data);
780 
781  /* Mark the PHB initialization done */
782  phb = hose->private_data;
783  phb->initialized = 1;
784  }
785 }
786 
787 static void __devinit pnv_pci_ioda_fixup(void)
788 {
789  pnv_pci_ioda_setup_PEs();
790  pnv_pci_ioda_setup_seg();
791  pnv_pci_ioda_setup_DMA();
792 }
793 
794 /*
795  * Returns the alignment for I/O or memory windows for P2P
796  * bridges. That actually depends on how PEs are segmented.
797  * For now, we return I/O or M32 segment size for PE sensitive
798  * P2P bridges. Otherwise, the default values (4KiB for I/O,
799  * 1MiB for memory) will be returned.
800  *
801  * The current PCI bus might be put into one PE, which was
802  * create against the parent PCI bridge. For that case, we
803  * needn't enlarge the alignment so that we can save some
804  * resources.
805  */
806 static resource_size_t pnv_pci_window_alignment(struct pci_bus *bus,
807  unsigned long type)
808 {
809  struct pci_dev *bridge;
810  struct pci_controller *hose = pci_bus_to_host(bus);
811  struct pnv_phb *phb = hose->private_data;
812  int num_pci_bridges = 0;
813 
814  bridge = bus->self;
815  while (bridge) {
816  if (pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE) {
817  num_pci_bridges++;
818  if (num_pci_bridges >= 2)
819  return 1;
820  }
821 
822  bridge = bridge->bus->self;
823  }
824 
825  /* We need support prefetchable memory window later */
826  if (type & IORESOURCE_MEM)
827  return phb->ioda.m32_segsize;
828 
829  return phb->ioda.io_segsize;
830 }
831 
832 /* Prevent enabling devices for which we couldn't properly
833  * assign a PE
834  */
835 static int __devinit pnv_pci_enable_device_hook(struct pci_dev *dev)
836 {
837  struct pci_controller *hose = pci_bus_to_host(dev->bus);
838  struct pnv_phb *phb = hose->private_data;
839  struct pci_dn *pdn;
840 
841  /* The function is probably called while the PEs have
842  * not be created yet. For example, resource reassignment
843  * during PCI probe period. We just skip the check if
844  * PEs isn't ready.
845  */
846  if (!phb->initialized)
847  return 0;
848 
849  pdn = pnv_ioda_get_pdn(dev);
850  if (!pdn || pdn->pe_number == IODA_INVALID_PE)
851  return -EINVAL;
852 
853  return 0;
854 }
855 
856 static u32 pnv_ioda_bdfn_to_pe(struct pnv_phb *phb, struct pci_bus *bus,
857  u32 devfn)
858 {
859  return phb->ioda.pe_rmap[(bus->number << 8) | devfn];
860 }
861 
863 {
864  struct pci_controller *hose;
865  static int primary = 1;
866  struct pnv_phb *phb;
867  unsigned long size, m32map_off, iomap_off, pemap_off;
868  const u64 *prop64;
869  u64 phb_id;
870  void *aux;
871  long rc;
872 
873  pr_info(" Initializing IODA OPAL PHB %s\n", np->full_name);
874 
875  prop64 = of_get_property(np, "ibm,opal-phbid", NULL);
876  if (!prop64) {
877  pr_err(" Missing \"ibm,opal-phbid\" property !\n");
878  return;
879  }
880  phb_id = be64_to_cpup(prop64);
881  pr_debug(" PHB-ID : 0x%016llx\n", phb_id);
882 
883  phb = alloc_bootmem(sizeof(struct pnv_phb));
884  if (phb) {
885  memset(phb, 0, sizeof(struct pnv_phb));
886  phb->hose = hose = pcibios_alloc_controller(np);
887  }
888  if (!phb || !phb->hose) {
889  pr_err("PCI: Failed to allocate PCI controller for %s\n",
890  np->full_name);
891  return;
892  }
893 
894  spin_lock_init(&phb->lock);
895  /* XXX Use device-tree */
896  hose->first_busno = 0;
897  hose->last_busno = 0xff;
898  hose->private_data = phb;
899  phb->opal_id = phb_id;
900  phb->type = PNV_PHB_IODA1;
901 
902  /* Detect specific models for error handling */
903  if (of_device_is_compatible(np, "ibm,p7ioc-pciex"))
904  phb->model = PNV_PHB_MODEL_P7IOC;
905  else
907 
908  /* We parse "ranges" now since we need to deduce the register base
909  * from the IO base
910  */
911  pci_process_bridge_OF_ranges(phb->hose, np, primary);
912  primary = 0;
913 
914  /* Magic formula from Milton */
915  phb->regs = of_iomap(np, 0);
916  if (phb->regs == NULL)
917  pr_err(" Failed to map registers !\n");
918 
919 
920  /* XXX This is hack-a-thon. This needs to be changed so that:
921  * - we obtain stuff like PE# etc... from device-tree
922  * - we properly re-allocate M32 ourselves
923  * (the OFW one isn't very good)
924  */
925 
926  /* Initialize more IODA stuff */
927  phb->ioda.total_pe = 128;
928 
929  phb->ioda.m32_size = resource_size(&hose->mem_resources[0]);
930  /* OFW Has already off top 64k of M32 space (MSI space) */
931  phb->ioda.m32_size += 0x10000;
932 
933  phb->ioda.m32_segsize = phb->ioda.m32_size / phb->ioda.total_pe;
934  phb->ioda.m32_pci_base = hose->mem_resources[0].start -
935  hose->pci_mem_offset;
936  phb->ioda.io_size = hose->pci_io_size;
937  phb->ioda.io_segsize = phb->ioda.io_size / phb->ioda.total_pe;
938  phb->ioda.io_pci_base = 0; /* XXX calculate this ? */
939 
940  /* Allocate aux data & arrays */
941  size = _ALIGN_UP(phb->ioda.total_pe / 8, sizeof(unsigned long));
942  m32map_off = size;
943  size += phb->ioda.total_pe * sizeof(phb->ioda.m32_segmap[0]);
944  iomap_off = size;
945  size += phb->ioda.total_pe * sizeof(phb->ioda.io_segmap[0]);
946  pemap_off = size;
947  size += phb->ioda.total_pe * sizeof(struct pnv_ioda_pe);
948  aux = alloc_bootmem(size);
949  memset(aux, 0, size);
950  phb->ioda.pe_alloc = aux;
951  phb->ioda.m32_segmap = aux + m32map_off;
952  phb->ioda.io_segmap = aux + iomap_off;
953  phb->ioda.pe_array = aux + pemap_off;
954  set_bit(0, phb->ioda.pe_alloc);
955 
956  INIT_LIST_HEAD(&phb->ioda.pe_dma_list);
957  INIT_LIST_HEAD(&phb->ioda.pe_list);
958 
959  /* Calculate how many 32-bit TCE segments we have */
960  phb->ioda.tce32_count = phb->ioda.m32_pci_base >> 28;
961 
962  /* Clear unusable m64 */
963  hose->mem_resources[1].flags = 0;
964  hose->mem_resources[1].start = 0;
965  hose->mem_resources[1].end = 0;
966  hose->mem_resources[2].flags = 0;
967  hose->mem_resources[2].start = 0;
968  hose->mem_resources[2].end = 0;
969 
970 #if 0
971  rc = opal_pci_set_phb_mem_window(opal->phb_id,
972  window_type,
973  window_num,
974  starting_real_address,
975  starting_pci_address,
976  segment_size);
977 #endif
978 
979  pr_info(" %d PE's M32: 0x%x [segment=0x%x] IO: 0x%x [segment=0x%x]\n",
980  phb->ioda.total_pe,
981  phb->ioda.m32_size, phb->ioda.m32_segsize,
982  phb->ioda.io_size, phb->ioda.io_segsize);
983 
984  if (phb->regs) {
985  pr_devel(" BUID = 0x%016llx\n", in_be64(phb->regs + 0x100));
986  pr_devel(" PHB2_CR = 0x%016llx\n", in_be64(phb->regs + 0x160));
987  pr_devel(" IO_BAR = 0x%016llx\n", in_be64(phb->regs + 0x170));
988  pr_devel(" IO_BAMR = 0x%016llx\n", in_be64(phb->regs + 0x178));
989  pr_devel(" IO_SAR = 0x%016llx\n", in_be64(phb->regs + 0x180));
990  pr_devel(" M32_BAR = 0x%016llx\n", in_be64(phb->regs + 0x190));
991  pr_devel(" M32_BAMR = 0x%016llx\n", in_be64(phb->regs + 0x198));
992  pr_devel(" M32_SAR = 0x%016llx\n", in_be64(phb->regs + 0x1a0));
993  }
994  phb->hose->ops = &pnv_pci_ops;
995 
996  /* Setup RID -> PE mapping function */
997  phb->bdfn_to_pe = pnv_ioda_bdfn_to_pe;
998 
999  /* Setup TCEs */
1000  phb->dma_dev_setup = pnv_pci_ioda_dma_dev_setup;
1001 
1002  /* Setup MSI support */
1003  pnv_pci_init_ioda_msis(phb);
1004 
1005  /*
1006  * We pass the PCI probe flag PCI_REASSIGN_ALL_RSRC here
1007  * to let the PCI core do resource assignment. It's supposed
1008  * that the PCI core will do correct I/O and MMIO alignment
1009  * for the P2P bridge bars so that each PCI bus (excluding
1010  * the child P2P bridges) can form individual PE.
1011  */
1012  ppc_md.pcibios_fixup = pnv_pci_ioda_fixup;
1013  ppc_md.pcibios_enable_device_hook = pnv_pci_enable_device_hook;
1014  ppc_md.pcibios_window_alignment = pnv_pci_window_alignment;
1015  pci_add_flags(PCI_REASSIGN_ALL_RSRC);
1016 
1017  /* Reset IODA tables to a clean state */
1019  if (rc)
1020  pr_warning(" OPAL Error %ld performing IODA table reset !\n", rc);
1021  opal_pci_set_pe(phb_id, 0, 0, 7, 1, 1 , OPAL_MAP_PE);
1022 }
1023 
1025 {
1026  struct device_node *phbn;
1027  const u64 *prop64;
1028  u64 hub_id;
1029 
1030  pr_info("Probing IODA IO-Hub %s\n", np->full_name);
1031 
1032  prop64 = of_get_property(np, "ibm,opal-hubid", NULL);
1033  if (!prop64) {
1034  pr_err(" Missing \"ibm,opal-hubid\" property !\n");
1035  return;
1036  }
1037  hub_id = be64_to_cpup(prop64);
1038  pr_devel(" HUB-ID : 0x%016llx\n", hub_id);
1039 
1040  /* Count child PHBs */
1041  for_each_child_of_node(np, phbn) {
1042  /* Look for IODA1 PHBs */
1043  if (of_device_is_compatible(phbn, "ibm,ioda-phb"))
1044  pnv_pci_init_ioda1_phb(phbn);
1045  }
1046 }