ppc64: Set up PCI tree from Open Firmware device tree
authorPaul Mackerras <paulus@samba.org>
Mon, 12 Sep 2005 07:17:36 +0000 (17:17 +1000)
committerPaul Mackerras <paulus@samba.org>
Mon, 12 Sep 2005 07:17:36 +0000 (17:17 +1000)
This adds code which gives us the option on ppc64 of instantiating the
PCI tree (the tree of pci_bus and pci_dev structs) from the Open
Firmware device tree rather than by probing PCI configuration space.
The OF device tree has a node for each PCI device and bridge in the
system, with properties that tell us what addresses the firmware has
configured for them and other details.

There are a couple of reasons why this is needed.  First, on systems
with a hypervisor, there is a PCI-PCI bridge per slot under the PCI
host bridges.  These PCI-PCI bridges have special isolation features
for virtualization.  We can't write to their config space, and we are
not supposed to be reading their config space either.  The firmware
tells us about the address ranges that they pass in the OF device
tree.

Secondly, on powermacs, the interrupt controller is in a PCI device
that may be behind a PCI-PCI bridge.  If we happened to take an
interrupt just at the point when the device or a bridge on the path to
it was disabled for probing, we would crash when we try to access the
interrupt controller.

I have implemented a platform-specific function which is called for
each PCI bridge (host or PCI-PCI) to say whether the code should look
in the device tree or use normal PCI probing for the devices under
that bridge.  On pSeries machines we use the device tree if we're
running under a hypervisor, otherwise we use normal probing.  On
powermacs we use normal probing for the AGP bridge, since the device
for the AGP bridge itself isn't shown in the device tree (at least on
my G5), and the device tree for everything else.

This has been tested on a dual G5 powermac, a partition on a POWER5
machine (running under the hypervisor), and a legacy iSeries
partition.

Signed-off-by: Paul Mackerras <paulus@samba.org>
arch/ppc64/kernel/pSeries_setup.c
arch/ppc64/kernel/pci.c
arch/ppc64/kernel/pmac_setup.c
include/asm-ppc64/machdep.h
include/asm-ppc64/pci-bridge.h

index 9490b6c..bfadccc 100644 (file)
@@ -590,6 +590,13 @@ static int pseries_shared_idle(void)
        return 0;
 }
 
+static int pSeries_pci_probe_mode(struct pci_bus *bus)
+{
+       if (systemcfg->platform & PLATFORM_LPAR)
+               return PCI_PROBE_DEVTREE;
+       return PCI_PROBE_NORMAL;
+}
+
 struct machdep_calls __initdata pSeries_md = {
        .probe                  = pSeries_probe,
        .setup_arch             = pSeries_setup_arch,
@@ -597,6 +604,7 @@ struct machdep_calls __initdata pSeries_md = {
        .get_cpuinfo            = pSeries_get_cpuinfo,
        .log_error              = pSeries_log_error,
        .pcibios_fixup          = pSeries_final_fixup,
+       .pci_probe_mode         = pSeries_pci_probe_mode,
        .irq_bus_setup          = pSeries_irq_bus_setup,
        .restart                = rtas_restart,
        .power_off              = rtas_power_off,
index 8447dcc..861138a 100644 (file)
@@ -51,6 +51,10 @@ unsigned long io_page_mask;
 
 EXPORT_SYMBOL(io_page_mask);
 
+#ifdef CONFIG_PPC_MULTIPLATFORM
+static void fixup_resource(struct resource *res, struct pci_dev *dev);
+static void do_bus_setup(struct pci_bus *bus);
+#endif
 
 unsigned int pcibios_assign_all_busses(void)
 {
@@ -225,10 +229,287 @@ static void __init pcibios_claim_of_setup(void)
 }
 #endif
 
+#ifdef CONFIG_PPC_MULTIPLATFORM
+static u32 get_int_prop(struct device_node *np, const char *name, u32 def)
+{
+       u32 *prop;
+       int len;
+
+       prop = (u32 *) get_property(np, name, &len);
+       if (prop && len >= 4)
+               return *prop;
+       return def;
+}
+
+static unsigned int pci_parse_of_flags(u32 addr0)
+{
+       unsigned int flags = 0;
+
+       if (addr0 & 0x02000000) {
+               flags |= IORESOURCE_MEM;
+               if (addr0 & 0x40000000)
+                       flags |= IORESOURCE_PREFETCH;
+       } else if (addr0 & 0x01000000)
+               flags |= IORESOURCE_IO;
+       return flags;
+}
+
+#define GET_64BIT(prop, i)     ((((u64) (prop)[(i)]) << 32) | (prop)[(i)+1])
+
+static void pci_parse_of_addrs(struct device_node *node, struct pci_dev *dev)
+{
+       u64 base, size;
+       unsigned int flags;
+       struct resource *res;
+       u32 *addrs, i;
+       int proplen;
+
+       addrs = (u32 *) get_property(node, "assigned-addresses", &proplen);
+       if (!addrs)
+               return;
+       for (; proplen >= 20; proplen -= 20, addrs += 5) {
+               flags = pci_parse_of_flags(addrs[0]);
+               if (!flags)
+                       continue;
+               base = GET_64BIT(addrs, 1);
+               size = GET_64BIT(addrs, 3);
+               if (!size)
+                       continue;
+               i = addrs[0] & 0xff;
+               if (PCI_BASE_ADDRESS_0 <= i && i <= PCI_BASE_ADDRESS_5) {
+                       res = &dev->resource[(i - PCI_BASE_ADDRESS_0) >> 2];
+               } else if (i == dev->rom_base_reg) {
+                       res = &dev->resource[PCI_ROM_RESOURCE];
+                       flags |= IORESOURCE_READONLY | IORESOURCE_CACHEABLE;
+               } else {
+                       printk(KERN_ERR "PCI: bad cfg reg num 0x%x\n", i);
+                       continue;
+               }
+               res->start = base;
+               res->end = base + size - 1;
+               res->flags = flags;
+               res->name = pci_name(dev);
+               fixup_resource(res, dev);
+       }
+}
+
+static struct pci_dev *of_create_pci_dev(struct device_node *node,
+                                        struct pci_bus *bus, int devfn)
+{
+       struct pci_dev *dev;
+       const char *type;
+
+       dev = kmalloc(sizeof(struct pci_dev), GFP_KERNEL);
+       if (!dev)
+               return NULL;
+       type = get_property(node, "device_type", NULL);
+       if (type == NULL)
+               type = "";
+
+       memset(dev, 0, sizeof(struct pci_dev));
+       dev->bus = bus;
+       dev->sysdata = node;
+       dev->dev.parent = bus->bridge;
+       dev->dev.bus = &pci_bus_type;
+       dev->devfn = devfn;
+       dev->multifunction = 0;         /* maybe a lie? */
+
+       dev->vendor = get_int_prop(node, "vendor-id", 0xffff);
+       dev->device = get_int_prop(node, "device-id", 0xffff);
+       dev->subsystem_vendor = get_int_prop(node, "subsystem-vendor-id", 0);
+       dev->subsystem_device = get_int_prop(node, "subsystem-id", 0);
+
+       dev->cfg_size = 256; /*pci_cfg_space_size(dev);*/
+
+       sprintf(pci_name(dev), "%04x:%02x:%02x.%d", pci_domain_nr(bus),
+               dev->bus->number, PCI_SLOT(devfn), PCI_FUNC(devfn));
+       dev->class = get_int_prop(node, "class-code", 0);
+
+       dev->current_state = 4;         /* unknown power state */
+
+       if (!strcmp(type, "pci")) {
+               /* a PCI-PCI bridge */
+               dev->hdr_type = PCI_HEADER_TYPE_BRIDGE;
+               dev->rom_base_reg = PCI_ROM_ADDRESS1;
+       } else if (!strcmp(type, "cardbus")) {
+               dev->hdr_type = PCI_HEADER_TYPE_CARDBUS;
+       } else {
+               dev->hdr_type = PCI_HEADER_TYPE_NORMAL;
+               dev->rom_base_reg = PCI_ROM_ADDRESS;
+               dev->irq = NO_IRQ;
+               if (node->n_intrs > 0) {
+                       dev->irq = node->intrs[0].line;
+                       pci_write_config_byte(dev, PCI_INTERRUPT_LINE,
+                                             dev->irq);
+               }
+       }
+
+       pci_parse_of_addrs(node, dev);
+
+       pci_device_add(dev, bus);
+
+       /* XXX pci_scan_msi_device(dev); */
+
+       return dev;
+}
+
+static void of_scan_pci_bridge(struct device_node *node, struct pci_dev *dev);
+
+static void __devinit of_scan_bus(struct device_node *node,
+                                 struct pci_bus *bus)
+{
+       struct device_node *child = NULL;
+       u32 *reg;
+       int reglen, devfn;
+       struct pci_dev *dev;
+
+       while ((child = of_get_next_child(node, child)) != NULL) {
+               reg = (u32 *) get_property(child, "reg", &reglen);
+               if (reg == NULL || reglen < 20)
+                       continue;
+               devfn = (reg[0] >> 8) & 0xff;
+               /* create a new pci_dev for this device */
+               dev = of_create_pci_dev(child, bus, devfn);
+               if (!dev)
+                       continue;
+               if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE ||
+                   dev->hdr_type == PCI_HEADER_TYPE_CARDBUS)
+                       of_scan_pci_bridge(child, dev);
+       }
+
+       do_bus_setup(bus);
+}
+
+static void __devinit of_scan_pci_bridge(struct device_node *node,
+                                        struct pci_dev *dev)
+{
+       struct pci_bus *bus;
+       u32 *busrange, *ranges;
+       int len, i, mode;
+       struct resource *res;
+       unsigned int flags;
+       u64 size;
+
+       /* parse bus-range property */
+       busrange = (u32 *) get_property(node, "bus-range", &len);
+       if (busrange == NULL || len != 8) {
+               printk(KERN_ERR "Can't get bus-range for PCI-PCI bridge %s\n",
+                      node->full_name);
+               return;
+       }
+       ranges = (u32 *) get_property(node, "ranges", &len);
+       if (ranges == NULL) {
+               printk(KERN_ERR "Can't get ranges for PCI-PCI bridge %s\n",
+                      node->full_name);
+               return;
+       }
+
+       bus = pci_add_new_bus(dev->bus, dev, busrange[0]);
+       if (!bus) {
+               printk(KERN_ERR "Failed to create pci bus for %s\n",
+                      node->full_name);
+               return;
+       }
+
+       bus->primary = dev->bus->number;
+       bus->subordinate = busrange[1];
+       bus->bridge_ctl = 0;
+       bus->sysdata = node;
+
+       /* parse ranges property */
+       /* PCI #address-cells == 3 and #size-cells == 2 always */
+       res = &dev->resource[PCI_BRIDGE_RESOURCES];
+       for (i = 0; i < PCI_NUM_RESOURCES - PCI_BRIDGE_RESOURCES; ++i) {
+               res->flags = 0;
+               bus->resource[i] = res;
+               ++res;
+       }
+       i = 1;
+       for (; len >= 32; len -= 32, ranges += 8) {
+               flags = pci_parse_of_flags(ranges[0]);
+               size = GET_64BIT(ranges, 6);
+               if (flags == 0 || size == 0)
+                       continue;
+               if (flags & IORESOURCE_IO) {
+                       res = bus->resource[0];
+                       if (res->flags) {
+                               printk(KERN_ERR "PCI: ignoring extra I/O range"
+                                      " for bridge %s\n", node->full_name);
+                               continue;
+                       }
+               } else {
+                       if (i >= PCI_NUM_RESOURCES - PCI_BRIDGE_RESOURCES) {
+                               printk(KERN_ERR "PCI: too many memory ranges"
+                                      " for bridge %s\n", node->full_name);
+                               continue;
+                       }
+                       res = bus->resource[i];
+                       ++i;
+               }
+               res->start = GET_64BIT(ranges, 1);
+               res->end = res->start + size - 1;
+               res->flags = flags;
+               fixup_resource(res, dev);
+       }
+       sprintf(bus->name, "PCI Bus %04x:%02x", pci_domain_nr(bus),
+               bus->number);
+
+       mode = PCI_PROBE_NORMAL;
+       if (ppc_md.pci_probe_mode)
+               mode = ppc_md.pci_probe_mode(bus);
+       if (mode == PCI_PROBE_DEVTREE)
+               of_scan_bus(node, bus);
+       else if (mode == PCI_PROBE_NORMAL)
+               pci_scan_child_bus(bus);
+}
+#endif /* CONFIG_PPC_MULTIPLATFORM */
+
+static void __devinit scan_phb(struct pci_controller *hose)
+{
+       struct pci_bus *bus;
+       struct device_node *node = hose->arch_data;
+       int i, mode;
+       struct resource *res;
+
+       bus = pci_create_bus(NULL, hose->first_busno, hose->ops, node);
+       if (bus == NULL) {
+               printk(KERN_ERR "Failed to create bus for PCI domain %04x\n",
+                      hose->global_number);
+               return;
+       }
+       bus->secondary = hose->first_busno;
+       hose->bus = bus;
+
+       bus->resource[0] = res = &hose->io_resource;
+       if (res->flags && request_resource(&ioport_resource, res))
+               printk(KERN_ERR "Failed to request PCI IO region "
+                      "on PCI domain %04x\n", hose->global_number);
+
+       for (i = 0; i < 3; ++i) {
+               res = &hose->mem_resources[i];
+               bus->resource[i+1] = res;
+               if (res->flags && request_resource(&iomem_resource, res))
+                       printk(KERN_ERR "Failed to request PCI memory region "
+                              "on PCI domain %04x\n", hose->global_number);
+       }
+
+       mode = PCI_PROBE_NORMAL;
+#ifdef CONFIG_PPC_MULTIPLATFORM
+       if (ppc_md.pci_probe_mode)
+               mode = ppc_md.pci_probe_mode(bus);
+       if (mode == PCI_PROBE_DEVTREE) {
+               bus->subordinate = hose->last_busno;
+               of_scan_bus(node, bus);
+       }
+#endif /* CONFIG_PPC_MULTIPLATFORM */
+       if (mode == PCI_PROBE_NORMAL)
+               hose->last_busno = bus->subordinate = pci_scan_child_bus(bus);
+       pci_bus_add_devices(bus);
+}
+
 static int __init pcibios_init(void)
 {
        struct pci_controller *hose, *tmp;
-       struct pci_bus *bus;
 
        /* For now, override phys_mem_access_prot. If we need it,
         * later, we may move that initialization to each ppc_md
@@ -242,13 +523,8 @@ static int __init pcibios_init(void)
        printk("PCI: Probing PCI hardware\n");
 
        /* Scan all of the recorded PCI controllers.  */
-       list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
-               hose->last_busno = 0xff;
-               bus = pci_scan_bus(hose->first_busno, hose->ops,
-                                  hose->arch_data);
-               hose->bus = bus;
-               hose->last_busno = bus->subordinate;
-       }
+       list_for_each_entry_safe(hose, tmp, &hose_list, list_node)
+               scan_phb(hose);
 
 #ifndef CONFIG_PPC_ISERIES
        if (pci_probe_only)
@@ -820,120 +1096,89 @@ void phbs_remap_io(void)
 /*
  * ppc64 can have multifunction devices that do not respond to function 0.
  * In this case we must scan all functions.
+ * XXX this can go now, we use the OF device tree in all the
+ * cases that caused problems. -- paulus
  */
 int pcibios_scan_all_fns(struct pci_bus *bus, int devfn)
 {
-       struct device_node *busdn, *dn;
-
-       if (bus->self)
-               busdn = pci_device_to_OF_node(bus->self);
-       else
-               busdn = bus->sysdata;   /* must be a phb */
-
-       if (busdn == NULL)
-              return 0;
-
-       /*
-        * Check to see if there is any of the 8 functions are in the
-        * device tree.  If they are then we need to scan all the
-        * functions of this slot.
-        */
-       for (dn = busdn->child; dn; dn = dn->sibling) {
-              struct pci_dn *pdn = dn->data;
-              if (pdn && (pdn->devfn >> 3) == (devfn >> 3))
-                       return 1;
-       }
-
        return 0;
 }
 
+static void __devinit fixup_resource(struct resource *res, struct pci_dev *dev)
+{
+       struct pci_controller *hose = pci_bus_to_host(dev->bus);
+       unsigned long start, end, mask, offset;
+
+       if (res->flags & IORESOURCE_IO) {
+               offset = (unsigned long)hose->io_base_virt - pci_io_base;
+
+               start = res->start += offset;
+               end = res->end += offset;
+
+               /* Need to allow IO access to pages that are in the
+                  ISA range */
+               if (start < MAX_ISA_PORT) {
+                       if (end > MAX_ISA_PORT)
+                               end = MAX_ISA_PORT;
+
+                       start >>= PAGE_SHIFT;
+                       end >>= PAGE_SHIFT;
+
+                       /* get the range of pages for the map */
+                       mask = ((1 << (end+1)) - 1) ^ ((1 << start) - 1);
+                       io_page_mask |= mask;
+               }
+       } else if (res->flags & IORESOURCE_MEM) {
+               res->start += hose->pci_mem_offset;
+               res->end += hose->pci_mem_offset;
+       }
+}
 
 void __devinit pcibios_fixup_device_resources(struct pci_dev *dev,
-                                          struct pci_bus *bus)
+                                             struct pci_bus *bus)
 {
        /* Update device resources.  */
-       struct pci_controller *hose = pci_bus_to_host(bus);
        int i;
 
-       for (i = 0; i < PCI_NUM_RESOURCES; i++) {
-               if (dev->resource[i].flags & IORESOURCE_IO) {
-                       unsigned long offset = (unsigned long)hose->io_base_virt
-                               - pci_io_base;
-                        unsigned long start, end, mask;
-
-                        start = dev->resource[i].start += offset;
-                        end = dev->resource[i].end += offset;
-
-                        /* Need to allow IO access to pages that are in the
-                           ISA range */
-                        if (start < MAX_ISA_PORT) {
-                                if (end > MAX_ISA_PORT)
-                                        end = MAX_ISA_PORT;
-
-                                start >>= PAGE_SHIFT;
-                                end >>= PAGE_SHIFT;
-
-                                /* get the range of pages for the map */
-                                mask = ((1 << (end+1))-1) ^ ((1 << start)-1);
-                                io_page_mask |= mask;
-                        }
-               }
-                else if (dev->resource[i].flags & IORESOURCE_MEM) {
-                       dev->resource[i].start += hose->pci_mem_offset;
-                       dev->resource[i].end += hose->pci_mem_offset;
-               }
-        }
+       for (i = 0; i < PCI_NUM_RESOURCES; i++)
+               if (dev->resource[i].flags)
+                       fixup_resource(&dev->resource[i], dev);
 }
 EXPORT_SYMBOL(pcibios_fixup_device_resources);
 
-void __devinit pcibios_fixup_bus(struct pci_bus *bus)
+static void __devinit do_bus_setup(struct pci_bus *bus)
 {
-       struct pci_controller *hose = pci_bus_to_host(bus);
-       struct pci_dev *dev = bus->self;
-       struct resource *res;
-       int i;
+       struct pci_dev *dev;
 
-       if (!dev) {
-               /* Root bus. */
+       ppc_md.iommu_bus_setup(bus);
 
-               hose->bus = bus;
-               bus->resource[0] = res = &hose->io_resource;
+       list_for_each_entry(dev, &bus->devices, bus_list)
+               ppc_md.iommu_dev_setup(dev);
 
-               if (res->flags && request_resource(&ioport_resource, res))
-                       printk(KERN_ERR "Failed to request IO on "
-                                       "PCI domain %d\n", pci_domain_nr(bus));
+       if (ppc_md.irq_bus_setup)
+               ppc_md.irq_bus_setup(bus);
+}
 
-               for (i = 0; i < 3; ++i) {
-                       res = &hose->mem_resources[i];
-                       bus->resource[i+1] = res;
-                       if (res->flags && request_resource(&iomem_resource, res))
-                               printk(KERN_ERR "Failed to request MEM on "
-                                               "PCI domain %d\n",
-                                               pci_domain_nr(bus));
-               }
-       } else if (pci_probe_only &&
-                  (dev->class >> 8) == PCI_CLASS_BRIDGE_PCI) {
+void __devinit pcibios_fixup_bus(struct pci_bus *bus)
+{
+       struct pci_dev *dev = bus->self;
+
+       if (dev && pci_probe_only &&
+           (dev->class >> 8) == PCI_CLASS_BRIDGE_PCI) {
                /* This is a subordinate bridge */
 
                pci_read_bridge_bases(bus);
                pcibios_fixup_device_resources(dev, bus);
        }
 
-       ppc_md.iommu_bus_setup(bus);
-
-       list_for_each_entry(dev, &bus->devices, bus_list)
-               ppc_md.iommu_dev_setup(dev);
-
-       if (ppc_md.irq_bus_setup)
-               ppc_md.irq_bus_setup(bus);
+       do_bus_setup(bus);
 
        if (!pci_probe_only)
                return;
 
-       list_for_each_entry(dev, &bus->devices, bus_list) {
+       list_for_each_entry(dev, &bus->devices, bus_list)
                if ((dev->class >> 8) != PCI_CLASS_BRIDGE_PCI)
                        pcibios_fixup_device_resources(dev, bus);
-       }
 }
 EXPORT_SYMBOL(pcibios_fixup_bus);
 
index e7f695d..325426c 100644 (file)
@@ -477,6 +477,18 @@ static int __init pmac_probe(int platform)
        return 1;
 }
 
+static int pmac_probe_mode(struct pci_bus *bus)
+{
+       struct device_node *node = bus->sysdata;
+
+       /* We need to use normal PCI probing for the AGP bus,
+          since the device for the AGP bridge isn't in the tree. */
+       if (bus->self == NULL && device_is_compatible(node, "u3-agp"))
+               return PCI_PROBE_NORMAL;
+
+       return PCI_PROBE_DEVTREE;
+}
+
 struct machdep_calls __initdata pmac_md = {
 #ifdef CONFIG_HOTPLUG_CPU
        .cpu_die                = generic_mach_cpu_die,
@@ -488,6 +500,7 @@ struct machdep_calls __initdata pmac_md = {
        .init_IRQ               = pmac_init_IRQ,
        .get_irq                = mpic_get_irq,
        .pcibios_fixup          = pmac_pcibios_fixup,
+       .pci_probe_mode         = pmac_probe_mode,
        .restart                = pmac_restart,
        .power_off              = pmac_power_off,
        .halt                   = pmac_halt,
index 9a1ef44..7b25738 100644 (file)
@@ -88,6 +88,7 @@ struct machdep_calls {
 
        /* PCI stuff */
        void            (*pcibios_fixup)(void);
+       int             (*pci_probe_mode)(struct pci_bus *);
 
        void            (*restart)(char *cmd);
        void            (*power_off)(void);
index 6b4a5b1..d899138 100644 (file)
@@ -119,5 +119,10 @@ static inline struct pci_controller *pci_bus_to_host(struct pci_bus *bus)
        return PCI_DN(busdn)->phb;
 }
 
+/* Return values for ppc_md.pci_probe_mode function */
+#define PCI_PROBE_NONE         -1      /* Don't look at this bus at all */
+#define PCI_PROBE_NORMAL       0       /* Do normal PCI probing */
+#define PCI_PROBE_DEVTREE      1       /* Instantiate from device tree */
+
 #endif
 #endif /* __KERNEL__ */