PCI/PCIE Capability
PCI/PCIE设备的配置空间记录了PCIE设备的capability支持信息,每个capability定义了一个ID标识,可以通过函数pci_find_capability和pci_find_ext_capability来探测和获取这些配置信息的位置。这些ID定义在文件include/uapi/linux/pci_regs.h中:
以AER为例,其capibility ID定义为PCI_EXT_CAP_ID_ERR,在小米笔记本上,通过探测只有设备0000:02:00.0和0000:00:1c.4支持AER,而这两个设备恰好是互相连接的,保证AER的传输路径是通的:
再举一个例子,PCI_EXT_CAP_ID_REBAR CAP表示PCIE设备对resize bar的支持程度,在支持RESIZE BAR的平台上,可以运行CPU直接访问显存。
如何判断设备是否为PCIE设备
PCI/PCIE的配置空间布局如下图:
1.PCI设备配置空间有256B大小,其中前64B是配置空间头,里面定义了vendor id, device id,bar空间等等信息。
2.PCIe配置空间大小有4K,包含PCI的256B,如果设备是PCIE设备,则Legacy的PCI配置空间 (64-256 )中会包含一个PCIE Express Capability structure 描述结构,然后从0x100B开始,全部是PCIE的configuration cap. and pareameter信息。如同下图中的黄色区域的定义。
比如在我的小米电脑中,NVIDIA MX250设备中,其PCIE Capability Structure就在0x78的位置。
如何判断一个PCI设备是否是PCIE设备,就可以根据判断256B的Legacy空间中是否有图中黄色字段的定义。可以查看配置空间中是否存在PCI_CAP_ID_EXP能力,对应图中的黄色字段,有定义PCI_CAP_ID_EXP就属于PCIE设备,其/sys/devices/pci0000:00/0000:00:1c.4/config文件有4K个字节,比PCI设备(256字节)要多。关于PCIE设备,需要注意的是,如果终端EP是PCIE设备,则链路上有所有的设备也必须是PCIE设备,才能发挥出最大带宽性能,比如下图,0000:02:00.0是N卡PCIE设备,它的上游设备0000:00:1c.4也是PCIE设备。
枚举配置空间CAP列表
根据前面分析的PCIE配置空间的布局格式,可以写一个解析PCIE配置空间的程序如下:
#include <stdlib.h>
#include <stdio.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <string.h>
#include <unistd.h>
// allow "gcc -DBUF_FIZE=xxx" to override definition.
#ifndef BUF_SIZE
#define BUF_SIZE 1024
#endif
#define CONFIG_SZ_PCIE 4096
#define CONFIG_SZ_PCI 256
#define DBG(fmt, ...) do { printf("%s line %d, "fmt, __func__, __LINE__, ##__VA_ARGS__); } while (0)
#define assert(expr) \
if (!(expr)) { \
printf("Assertion failed! %s,%s,%s,line=%d\n",\
#expr,__FILE__,__func__,__LINE__); \
while(1); \
}
struct reg_name {
unsigned int cap;
unsigned int offset;
unsigned int width;
unsigned int hdr_type_mask;
const char *name;
};
static unsigned char pcie_config[CONFIG_SZ_PCIE];
void dump_memory(unsigned char *buf, int len)
{
int i;
for (i = 0; i < len; i ++) {
if (i % 16 == 0)
printf("0x%03x: ", i);
printf("0x%02x ", buf[i]);
if (i % 16 == 15)
printf("\n");
}
return;
}
// legacy pci capabilities.
#define PCI_CAP_LIST_ID 0 /* Capability ID */
#define PCI_CAP_ID_PM 0x01 /* Power Management */
#define PCI_CAP_ID_AGP 0x02 /* Accelerated Graphics Port */
#define PCI_CAP_ID_VPD 0x03 /* Vital Product Data */
#define PCI_CAP_ID_SLOTID 0x04 /* Slot Identification */
#define PCI_CAP_ID_MSI 0x05 /* Message Signalled Interrupts */
#define PCI_CAP_ID_CHSWP 0x06 /* CompactPCI HotSwap */
#define PCI_CAP_ID_PCIX 0x07 /* PCI-X */
#define PCI_CAP_ID_HT 0x08 /* HyperTransport */
#define PCI_CAP_ID_VNDR 0x09 /* Vendor-Specific */
#define PCI_CAP_ID_DBG 0x0A /* Debug port */
#define PCI_CAP_ID_CCRC 0x0B /* CompactPCI Central Resource Control */
#define PCI_CAP_ID_SHPC 0x0C /* PCI Standard Hot-Plug Controller */
#define PCI_CAP_ID_SSVID 0x0D /* Bridge subsystem vendor/device ID */
#define PCI_CAP_ID_AGP3 0x0E /* AGP Target PCI-PCI bridge */
#define PCI_CAP_ID_SECDEV 0x0F /* Secure Device */
#define PCI_CAP_ID_EXP 0x10 /* PCI Express */
#define PCI_CAP_ID_MSIX 0x11 /* MSI-X */
#define PCI_CAP_ID_SATA 0x12 /* SATA Data/Index Conf. */
#define PCI_CAP_ID_AF 0x13 /* PCI Advanced Features */
#define PCI_CAP_ID_EA 0x14 /* PCI Enhanced Allocation */
#define PCI_CAP_ID_MAX PCI_CAP_ID_EA
#define PCI_CAP_LIST_NEXT 1 /* Next capability in the list */
#define PCI_CAP_FLAGS 2 /* Capability defined flags (16 bits) */
#define PCI_CAP_SIZEOF 4
//pcie extension capabilities.
#define PCI_EXT_CAP_ID_ERR 0x01 /* Advanced Error Reporting */
#define PCI_EXT_CAP_ID_VC 0x02 /* Virtual Channel Capability */
#define PCI_EXT_CAP_ID_DSN 0x03 /* Device Serial Number */
#define PCI_EXT_CAP_ID_PWR 0x04 /* Power Budgeting */
#define PCI_EXT_CAP_ID_RCLD 0x05 /* Root Complex Link Declaration */
#define PCI_EXT_CAP_ID_RCILC 0x06 /* Root Complex Internal Link Control */
#define PCI_EXT_CAP_ID_RCEC 0x07 /* Root Complex Event Collector */
#define PCI_EXT_CAP_ID_MFVC 0x08 /* Multi-Function VC Capability */
#define PCI_EXT_CAP_ID_VC9 0x09 /* same as _VC */
#define PCI_EXT_CAP_ID_RCRB 0x0A /* Root Complex RB? */
#define PCI_EXT_CAP_ID_VNDR 0x0B /* Vendor-Specific */
#define PCI_EXT_CAP_ID_CAC 0x0C /* Config Access - obsolete */
#define PCI_EXT_CAP_ID_ACS 0x0D /* Access Control Services */
#define PCI_EXT_CAP_ID_ARI 0x0E /* Alternate Routing ID */
#define PCI_EXT_CAP_ID_ATS 0x0F /* Address Translation Services */
#define PCI_EXT_CAP_ID_SRIOV 0x10 /* Single Root I/O Virtualization */
#define PCI_EXT_CAP_ID_MRIOV 0x11 /* Multi Root I/O Virtualization */
#define PCI_EXT_CAP_ID_MCAST 0x12 /* Multicast */
#define PCI_EXT_CAP_ID_PRI 0x13 /* Page Request Interface */
#define PCI_EXT_CAP_ID_AMD_XXX 0x14 /* Reserved for AMD */
#define PCI_EXT_CAP_ID_REBAR 0x15 /* Resizable BAR */
#define PCI_EXT_CAP_ID_DPA 0x16 /* Dynamic Power Allocation */
#define PCI_EXT_CAP_ID_TPH 0x17 /* TPH Requester */
#define PCI_EXT_CAP_ID_LTR 0x18 /* Latency Tolerance Reporting */
#define PCI_EXT_CAP_ID_SECPCI 0x19 /* Secondary PCIe Capability */
#define PCI_EXT_CAP_ID_PMUX 0x1A /* Protocol Multiplexing */
#define PCI_EXT_CAP_ID_PASID 0x1B /* Process Address Space ID */
#define PCI_EXT_CAP_ID_DPC 0x1D /* Downstream Port Containment */
#define PCI_EXT_CAP_ID_L1SS 0x1E /* L1 PM Substates */
#define PCI_EXT_CAP_ID_PTM 0x1F /* Precision Time Measurement */
#define PCI_EXT_CAP_ID_DLF 0x25 /* Data Link Feature */
#define PCI_EXT_CAP_ID_PL_16GT 0x26 /* Physical Layer 16.0 GT/s */
#define PCI_EXT_CAP_ID_MAX PCI_EXT_CAP_ID_PL_16GT
static const struct reg_name pci_reg_names[] = {
{ 0, 0x00, 2, 0x0, "VENDOR_ID" },
{ 0, 0x02, 2, 0x0, "DEVICE_ID" },
{ 0, 0x04, 2, 0x0, "COMMAND" },
{ 0, 0x06, 2, 0x0, "STATUS" },
{ 0, 0x08, 1, 0x0, "REVISION" },
{ 0, 0x09, 1, 0x0, "CLASS_PROG" },
{ 0, 0x0a, 2, 0x0, "CLASS_DEVICE" },
{ 0, 0x0c, 1, 0x0, "CACHE_LINE_SIZE" },
{ 0, 0x0d, 1, 0x0, "LATENCY_TIMER" },
{ 0, 0x0e, 1, 0x0, "HEADER_TYPE" },
{ 0, 0x0f, 1, 0x0, "BIST" },
{ 0, 0x10, 4, 0x3, "BASE_ADDRESS_0" },
{ 0, 0x14, 4, 0x3, "BASE_ADDRESS_1" },
{ 0, 0x18, 4, 0x1, "BASE_ADDRESS_2" },
{ 0, 0x1c, 4, 0x1, "BASE_ADDRESS_3" },
{ 0, 0x20, 4, 0x1, "BASE_ADDRESS_4" },
{ 0, 0x24, 4, 0x1, "BASE_ADDRESS_5" },
{ 0, 0x28, 4, 0x1, "CARDBUS_CIS" },
{ 0, 0x2c, 2, 0x1, "SUBSYSTEM_VENDOR_ID" },
{ 0, 0x2e, 2, 0x1, "SUBSYSTEM_ID" },
{ 0, 0x30, 4, 0x1, "ROM_ADDRESS" },
{ 0, 0x34, 1, 0x3, "CAPABILITIES" },
{ 0, 0x3c, 1, 0x3, "INTERRUPT_LINE" },
{ 0, 0x3d, 1, 0x3, "INTERRUPT_PIN" },
{ 0, 0x3e, 1, 0x1, "MIN_GNT" },
{ 0, 0x3f, 1, 0x1, "MAX_LAT" },
{ 0, 0x18, 1, 0x2, "PRIMARY_BUS" },
{ 0, 0x19, 1, 0x2, "SECONDARY_BUS" },
{ 0, 0x1a, 1, 0x2, "SUBORDINATE_BUS" },
{ 0, 0x1b, 1, 0x2, "SEC_LATENCY_TIMER" },
{ 0, 0x1c, 1, 0x2, "IO_BASE" },
{ 0, 0x1d, 1, 0x2, "IO_LIMIT" },
{ 0, 0x1e, 2, 0x2, "SEC_STATUS" },
{ 0, 0x20, 2, 0x2, "MEMORY_BASE" },
{ 0, 0x22, 2, 0x2, "MEMORY_LIMIT" },
{ 0, 0x24, 2, 0x2, "PREF_MEMORY_BASE" },
{ 0, 0x26, 2, 0x2, "PREF_MEMORY_LIMIT" },
{ 0, 0x28, 4, 0x2, "PREF_BASE_UPPER32" },
{ 0, 0x2c, 4, 0x2, "PREF_LIMIT_UPPER32" },
{ 0, 0x30, 2, 0x2, "IO_BASE_UPPER16" },
{ 0, 0x32, 2, 0x2, "IO_LIMIT_UPPER16" },
{ 0, 0x38, 4, 0x2, "BRIDGE_ROM_ADDRESS" },
{ 0, 0x3e, 2, 0x2, "BRIDGE_CONTROL" },
{ 0, 0x10, 4, 0x4, "CB_CARDBUS_BASE" },
{ 0, 0x14, 2, 0x4, "CB_CAPABILITIES" },
{ 0, 0x16, 2, 0x4, "CB_SEC_STATUS" },
{ 0, 0x18, 1, 0x4, "CB_BUS_NUMBER" },
{ 0, 0x19, 1, 0x4, "CB_CARDBUS_NUMBER" },
{ 0, 0x1a, 1, 0x4, "CB_SUBORDINATE_BUS" },
{ 0, 0x1b, 1, 0x4, "CB_CARDBUS_LATENCY" },
{ 0, 0x1c, 4, 0x4, "CB_MEMORY_BASE_0" },
{ 0, 0x20, 4, 0x4, "CB_MEMORY_LIMIT_0" },
{ 0, 0x24, 4, 0x4, "CB_MEMORY_BASE_1" },
{ 0, 0x28, 4, 0x4, "CB_MEMORY_LIMIT_1" },
{ 0, 0x2c, 2, 0x4, "CB_IO_BASE_0" },
{ 0, 0x2e, 2, 0x4, "CB_IO_BASE_0_HI" },
{ 0, 0x30, 2, 0x4, "CB_IO_LIMIT_0" },
{ 0, 0x32, 2, 0x4, "CB_IO_LIMIT_0_HI" },
{ 0, 0x34, 2, 0x4, "CB_IO_BASE_1" },
{ 0, 0x36, 2, 0x4, "CB_IO_BASE_1_HI" },
{ 0, 0x38, 2, 0x4, "CB_IO_LIMIT_1" },
{ 0, 0x3a, 2, 0x4, "CB_IO_LIMIT_1_HI" },
{ 0, 0x40, 2, 0x4, "CB_SUBSYSTEM_VENDOR_ID" },
{ 0, 0x42, 2, 0x4, "CB_SUBSYSTEM_ID" },
{ 0, 0x44, 4, 0x4, "CB_LEGACY_MODE_BASE" },
{ 0x10001, 0, 0, 0x0, "CAP_PM" },
{ 0x10002, 0, 0, 0x0, "CAP_AGP" },
{ 0x10003, 0, 0, 0x0, "CAP_VPD" },
{ 0x10004, 0, 0, 0x0, "CAP_SLOTID" },
{ 0x10005, 0, 0, 0x0, "CAP_MSI" },
{ 0x10006, 0, 0, 0x0, "CAP_CHSWP" },
{ 0x10007, 0, 0, 0x0, "CAP_PCIX" },
{ 0x10008, 0, 0, 0x0, "CAP_HT" },
{ 0x10009, 0, 0, 0x0, "CAP_VNDR" },
{ 0x1000a, 0, 0, 0x0, "CAP_DBG" },
{ 0x1000b, 0, 0, 0x0, "CAP_CCRC" },
{ 0x1000c, 0, 0, 0x0, "CAP_HOTPLUG" },
{ 0x1000d, 0, 0, 0x0, "CAP_SSVID" },
{ 0x1000e, 0, 0, 0x0, "CAP_AGP3" },
{ 0x1000f, 0, 0, 0x0, "CAP_SECURE" },
{ 0x10010, 0, 0, 0x0, "CAP_EXP" },
{ 0x10011, 0, 0, 0x0, "CAP_MSIX" },
{ 0x10012, 0, 0, 0x0, "CAP_SATA" },
{ 0x10013, 0, 0, 0x0, "CAP_AF" },
{ 0x10014, 0, 0, 0x0, "CAP_EA" },
{ 0x20001, 0, 0, 0x0, "ECAP_AER" },
{ 0x20002, 0, 0, 0x0, "ECAP_VC" },
{ 0x20003, 0, 0, 0x0, "ECAP_DSN" },
{ 0x20004, 0, 0, 0x0, "ECAP_PB" },
{ 0x20005, 0, 0, 0x0, "ECAP_RCLINK" },
{ 0x20006, 0, 0, 0x0, "ECAP_RCILINK" },
{ 0x20007, 0, 0, 0x0, "ECAP_RCEC" },
{ 0x20008, 0, 0, 0x0, "ECAP_MFVC" },
{ 0x20009, 0, 0, 0x0, "ECAP_VC2" },
{ 0x2000a, 0, 0, 0x0, "ECAP_RBCB" },
{ 0x2000b, 0, 0, 0x0, "ECAP_VNDR" },
{ 0x2000d, 0, 0, 0x0, "ECAP_ACS" },
{ 0x2000e, 0, 0, 0x0, "ECAP_ARI" },
{ 0x2000f, 0, 0, 0x0, "ECAP_ATS" },
{ 0x20010, 0, 0, 0x0, "ECAP_SRIOV" },
{ 0x20011, 0, 0, 0x0, "ECAP_MRIOV" },
{ 0x20012, 0, 0, 0x0, "ECAP_MCAST" },
{ 0x20013, 0, 0, 0x0, "ECAP_PRI" },
{ 0x20015, 0, 0, 0x0, "ECAP_REBAR" },
{ 0x20016, 0, 0, 0x0, "ECAP_DPA" },
{ 0x20017, 0, 0, 0x0, "ECAP_TPH" },
{ 0x20018, 0, 0, 0x0, "ECAP_LTR" },
{ 0x20019, 0, 0, 0x0, "ECAP_SECPCI" },
{ 0x2001a, 0, 0, 0x0, "ECAP_PMUX" },
{ 0x2001b, 0, 0, 0x0, "ECAP_PASID" },
{ 0x2001c, 0, 0, 0x0, "ECAP_LNR" },
{ 0x2001d, 0, 0, 0x0, "ECAP_DPC" },
{ 0x2001e, 0, 0, 0x0, "ECAP_L1PM" },
{ 0x2001f, 0, 0, 0x0, "ECAP_PTM" },
{ 0x20020, 0, 0, 0x0, "ECAP_M_PCIE" },
{ 0x20021, 0, 0, 0x0, "ECAP_FRS" },
{ 0x20022, 0, 0, 0x0, "ECAP_RTR" },
{ 0x20023, 0, 0, 0x0, "ECAP_DVSEC" },
{ 0x20024, 0, 0, 0x0, "ECAP_VF_REBAR" },
{ 0x20025, 0, 0, 0x0, "ECAP_DLNK" },
{ 0x20026, 0, 0, 0x0, "ECAP_16GT" },
{ 0x20027, 0, 0, 0x0, "ECAP_LMR" },
{ 0x20028, 0, 0, 0x0, "ECAP_HIER_ID" },
{ 0x20029, 0, 0, 0x0, "ECAP_NPEM" },
{ 0x20030, 0, 0, 0x0, "ECAP_IDE" },
{ 0, 0, 0, 0x0, NULL }
};
static const struct reg_name *parse_reg_name(char *name)
{
const struct reg_name *r;
for (r = pci_reg_names; r->name; r++) {
if (!strcasecmp(r->name, name)) {
return r;
}
}
return NULL;
}
static const char *parse_reg_capid(unsigned short cap_id, int pci_ext)
{
const struct reg_name *r;
// 0x20000/0x10000 just a tag on cap_id
// to distinguish the two different capabilities.
for (r = pci_reg_names; r->name; r++) {
if (pci_ext) {
if (r->cap < 0x20000)
continue;
if ((r->cap - 0x20000) != cap_id)
continue;
return r->name;
} else {
if (r->cap < 0x10000)
continue;
if (r->cap >= 0x20000)
break;
if ((r->cap - 0x10000) != cap_id)
continue;
return r->name;
}
}
}
int main(int argc, char **argv)
{
int inputfd;
mode_t fileperms;
ssize_t num_read;
char buf[BUF_SIZE];
char filename[256];
int read_sz;
int domain, bus, slot, func;
if ((argc != 2) || (strcmp(argv[1], "--help") == 0)) {
printf("%s BDF\n", argv[0]);
exit(0);
}
sscanf(argv[1], "%d:%d:%d.%d", &domain, &bus, &slot, &func);
memset(filename, 0x00, 256);
snprintf(filename, 99, "/sys/bus/pci/devices/%04x:%02x:%02x.%1x/config",
domain, bus, slot, func);
DBG("domain:%d, bus:%d, slot:%d, func:%d.\n", domain, bus, slot, func);
inputfd = open(filename, O_RDONLY);
if (inputfd == -1) {
DBG("opening file %s error.\n", argv[1]);
exit(-1);
}
read_sz = 0;
while ((num_read = read(inputfd, buf, BUF_SIZE)) > 0) {
memcpy(pcie_config + read_sz, buf, num_read);
read_sz += num_read;
}
if ((read_sz != CONFIG_SZ_PCIE) && (read_sz != CONFIG_SZ_PCI)) {
DBG("config space not 4K/256B, it is not a valid pcie/pci device, sz 0x%x.\n",
read_sz);
goto end;
}
dump_memory(pcie_config, read_sz);
// status bit4 means capabilities exists.
// Support Capability List, corrspond PCI_STATUS_CAP_LIST in linux kernel.
if (!(pcie_config[6] & 0x10)) {
printf("Not Support Capabilities List.\n");
return -1;
}
// legacy pci capabilities.
int chain = 0x34;
while (pcie_config[chain]) {
int cap;
cap = pcie_config[pcie_config[chain]];
DBG("PCI Capility ID:0x%02x, offset 0x%03x, name %s.\n",
cap, pcie_config[chain], parse_reg_capid(cap, 0));
chain = pcie_config[chain] + 1;
}
// pcie extension capabilities.
if (read_sz == CONFIG_SZ_PCIE) {
chain = 0x100;
do {
int ver, next, cap;
cap = pcie_config[chain + 1] << 8 | pcie_config[chain + 0];
ver = pcie_config[chain + 2] & 0x0f;
next = (pcie_config[chain + 3] << 4) | ((pcie_config[chain + 2] & 0xf0) >> 4);
if (next) {
DBG("PCI Extension Capility ID:0x%04x, next 0x%03x ver %d, name %s.\n",
cap, next, ver, parse_reg_capid(cap, 1));
}
chain = next;
} while (chain);
}
end:
if (close(inputfd) == -1) {
DBG("fatal error, close input file %s failure.\n", argv[1]);
return -1;
}
return 0;
}
用上面的程序分析电脑上的MX250独立显卡的CAP 信息如下:
根据输出可以看到,MX250独显的LEGACY CAP有:CAP_PM(电源管理), CAP_MSI(中断).CAP_EXP.(PCIE Extenend).PCI Extension Capility包含:ECAP_VC,ECAP_LTR,ECAP_L1PM,ECAP_PB,ECAP_AER,ECAP_VNDR.
下图是AMD显卡的Capability信息,可以看到,其支持更多的特性,比如SRIOV,ECAP_REBAR(Resize Bar)等功能。
PCIE CAP ID分配规范
CAP ID由PCIE标准组织统一管理,当前分配情况可以参考如下文档:
https://pcisig.com/sites/default/files/files/PCI_Code-ID_r_1_11__v24_Jan_2019.pdf
Legacy CAP ID分配,在配置空间的[40B-100B]区间:
扩展CAP ID分配(位于配置空间256B-4KB区域):