PCIe在调试过程中,经常会出现扫描不到对端EP设备的问题,在问题定位过程中,了解内核中pcie枚举流程至关重要。
PCIe枚举过程一般分为三步: 1.创建根节点 2.扫描根节点下设备 3.为根节点下设备分配资源
从总线扫描pcie设备的函数pci_scan_child_bus开始分析
unsigned int pci_scan_child_bus(struct pci_bus *bus) { unsigned int devfn, pass, max = bus->busn_res.start; struct pci_dev *dev; dev_dbg(&bus->dev, "scanning bus\n"); /* Go find them, Rover! */ for (devfn = 0; devfn < 0x100; devfn += 8) pci_scan_slot(bus, devfn); /* Reserve buses for SR-IOV capability. */ max += pci_iov_bus_range(bus); /* * After performing arch-dependent fixup of the bus, look behind * all PCI-to-PCI bridges on this bus. */ if (!bus->is_added) { dev_dbg(&bus->dev, "fixups for bus\n"); pcibios_fixup_bus(bus); bus->is_added = 1; } for (pass = 0; pass < 2; pass++) list_for_each_entry(dev, &bus->devices, bus_list) { if (pci_is_bridge(dev)) max = pci_scan_bridge(bus, dev, max, pass); } /* * Make sure a hotplug bridge has at least the minimum requested * number of buses. */ if (bus->self && bus->self->is_hotplug_bridge && pci_hotplug_bus_size) { if (max - bus->busn_res.start < pci_hotplug_bus_size - 1) max = bus->busn_res.start + pci_hotplug_bus_size - 1; } /* * We've scanned the bus and so we know all about what's on * the other side of any bridges that may be on this bus plus * any devices. * * Return how far we've got finding sub-buses. */ dev_dbg(&bus->dev, "bus scan returning with max=%02x\n", max); return max; } EXPORT_SYMBOL_GPL(pci_scan_child_bus);该函数的核心代码为
for (devfn = 0; devfn < 0x100; devfn += 8) pci_scan_slot(bus, devfn);这里的bus变量来源于pci_create_root_bus,也就是创建的根总线的总线号 devfn : 设备和功能号。 这里使用的是穷举法,把所有的dev和function都尝试一次。
pci_scan_slot函数:
int pci_scan_slot(struct pci_bus *bus, int devfn) { unsigned fn, nr = 0; struct pci_dev *dev; if (only_one_child(bus) && (devfn > 0)) return 0; /* Already scanned the entire slot */ dev = pci_scan_single_device(bus, devfn); if (!dev) return 0; if (!dev->is_added) nr++; for (fn = next_fn(bus, dev, 0); fn > 0; fn = next_fn(bus, dev, fn)) { dev = pci_scan_single_device(bus, devfn + fn); if (dev) { if (!dev->is_added) nr++; dev->multifunction = 1; } } /* only one slot has pcie device */ if (bus->self && nr) pcie_aspm_init_link_state(bus->self); return nr; }pci_scan_single_device:
struct pci_dev *pci_scan_single_device(struct pci_bus *bus, int devfn) { struct pci_dev *dev; dev = pci_get_slot(bus, devfn); if (dev) { pci_dev_put(dev); return dev; } dev = pci_scan_device(bus, devfn); if (!dev) return NULL; pci_device_add(dev, bus); return dev; }核心函数为pci_scan_device:
struct pci_dev *pci_scan_single_device(struct pci_bus *bus, int devfn) { struct pci_dev *dev; dev = pci_get_slot(bus, devfn); if (dev) { pci_dev_put(dev); return dev; } dev = pci_scan_device(bus, devfn); if (!dev) return NULL; pci_device_add(dev, bus); return dev; }pci_bus_read_dev_vendor_id会去读设备的devid和venderid,如果读不到,说明设备不存在了,如果读到了,就会创建一个pdev。 所以能不能扫描对端设备,就看能不能读到设备的vendorid。
bool pci_bus_read_dev_vendor_id(struct pci_bus *bus, int devfn, u32 *l, int crs_timeout) { int delay = 1; if (pci_bus_read_config_dword(bus, devfn, PCI_VENDOR_ID, l)) return false; /* some broken boards return 0 or ~0 if a slot is empty: */ if (*l == 0xffffffff || *l == 0x00000000 || *l == 0x0000ffff || *l == 0xffff0000) return false; /* * Configuration Request Retry Status. Some root ports return the * actual device ID instead of the synthetic ID (0xFFFF) required * by the PCIe spec. Ignore the device ID and only check for * (vendor id == 1). */ while ((*l & 0xffff) == 0x0001) { if (!crs_timeout) return false; msleep(delay); delay *= 2; if (pci_bus_read_config_dword(bus, devfn, PCI_VENDOR_ID, l)) return false; /* Card hasn't responded in 60 seconds? Must be stuck. */ if (delay > crs_timeout) { printk(KERN_WARNING "pci %04x:%02x:%02x.%d: not responding\n", pci_domain_nr(bus), bus->number, PCI_SLOT(devfn), PCI_FUNC(devfn)); return false; } } return true; }while循环里每隔一段时间就会进行一次venderid的配置读写,从上述代码中可以看出,退出循环的原因会有两个 1. config配置读写失败了 (说明链路不通) 2. 超时没有得到响应(预留时间为60s,已经非常长了,第一点失败的可能性更大)
所以扫描不到对端设备时: 1.确认建链是否成功,建链失败,肯定扫描不到对端 2.确认对端的配置空间是否可写(对端的pcie模块是否处于解复位状态) 3.确认type0,type1,iatu等参数配置是否正确,如果正确了,确认配置访问的地址空间大小是否足够。