接前一篇文章:
上回书继续讲解virtio_pci_driver的probe回调函数virtio_pci_probe(),在讲到第5段代码的时候,
if (force_legacy) {
rc = virtio_pci_legacy_probe(vp_dev);
/* Also try modern mode if we can't map BAR0 (no IO space). */
if (rc == -ENODEV || rc == -ENOMEM)
rc = virtio_pci_modern_probe(vp_dev);
if (rc)
goto err_probe;
} else {
rc = virtio_pci_modern_probe(vp_dev);
if (rc == -ENODEV)
rc = virtio_pci_legacy_probe(vp_dev);
if (rc)
goto err_probe;
}
引出来两个函数:virtio_pci_legacy_probe和virtio_pci_modern_probe。本回就来对它们进行解析。当然,由于legacy已成过去,因此重点围绕virtio_pci_modern_probe函数进行解析,捎带手地也讲一下virtio_pci_legacy_probe()。为了便于理解,再次贴出两个函数的源码:
- virtio_pci_legacy_probe
virtio_pci_legacy_probe函数在Linux内核源码/drivers/virtio/virtio_pci_legacy.c中,代码如下:
/* the PCI probing function */
int virtio_pci_legacy_probe(struct virtio_pci_device *vp_dev)
{
struct virtio_pci_legacy_device *ldev = &vp_dev->ldev;
struct pci_dev *pci_dev = vp_dev->pci_dev;
int rc;
ldev->pci_dev = pci_dev;
rc = vp_legacy_probe(ldev);
if (rc)
return rc;
vp_dev->isr = ldev->isr;
vp_dev->vdev.id = ldev->id;
vp_dev->vdev.config = &virtio_pci_config_ops;
vp_dev->config_vector = vp_config_vector;
vp_dev->setup_vq = setup_vq;
vp_dev->del_vq = del_vq;
return 0;
}
- virtio_pci_modern_probe
virtio_pci_modern_probe函数在Linux内核源码/drivers/virtio/virtio_pci_modern.c中,代码如下:
/* the PCI probing function */
int virtio_pci_modern_probe(struct virtio_pci_device *vp_dev)
{
struct virtio_pci_modern_device *mdev = &vp_dev->mdev;
struct pci_dev *pci_dev = vp_dev->pci_dev;
int err;
mdev->pci_dev = pci_dev;
err = vp_modern_probe(mdev);
if (err)
return err;
if (mdev->device)
vp_dev->vdev.config = &virtio_pci_config_ops;
else
vp_dev->vdev.config = &virtio_pci_config_nodev_ops;
vp_dev->config_vector = vp_config_vector;
vp_dev->setup_vq = setup_vq;
vp_dev->del_vq = del_vq;
vp_dev->isr = mdev->isr;
vp_dev->vdev.id = mdev->id;
return 0;
}
virtio_pci_modern_probe函数中最主要的是调用了vp_modern_probe函数,其在Linux内核源码/drivers/virtio/virtio_pci_modern_dev.c中,代码如下:
/*
* vp_modern_probe: probe the modern virtio pci device, note that the
* caller is required to enable PCI device before calling this function.
* @mdev: the modern virtio-pci device
*
* Return 0 on succeed otherwise fail
*/
int vp_modern_probe(struct virtio_pci_modern_device *mdev)
{
struct pci_dev *pci_dev = mdev->pci_dev;
int err, common, isr, notify, device;
u32 notify_length;
u32 notify_offset;
check_offsets();
/* We only own devices >= 0x1000 and <= 0x107f: leave the rest. */
if (pci_dev->device < 0x1000 || pci_dev->device > 0x107f)
return -ENODEV;
if (pci_dev->device < 0x1040) {
/* Transitional devices: use the PCI subsystem device id as
* virtio device id, same as legacy driver always did.
*/
mdev->id.device = pci_dev->subsystem_device;
} else {
/* Modern devices: simply use PCI device id, but start from 0x1040. */
mdev->id.device = pci_dev->device - 0x1040;
}
mdev->id.vendor = pci_dev->subsystem_vendor;
/* check for a common config: if not, use legacy mode (bar 0). */
common = virtio_pci_find_capability(pci_dev, VIRTIO_PCI_CAP_COMMON_CFG,
IORESOURCE_IO | IORESOURCE_MEM,
&mdev->modern_bars);
if (!common) {
dev_info(&pci_dev->dev,
"virtio_pci: leaving for legacy driver\n");
return -ENODEV;
}
/* If common is there, these should be too... */
isr = virtio_pci_find_capability(pci_dev, VIRTIO_PCI_CAP_ISR_CFG,
IORESOURCE_IO | IORESOURCE_MEM,
&mdev->modern_bars);
notify = virtio_pci_find_capability(pci_dev, VIRTIO_PCI_CAP_NOTIFY_CFG,
IORESOURCE_IO | IORESOURCE_MEM,
&mdev->modern_bars);
if (!isr || !notify) {
dev_err(&pci_dev->dev,
"virtio_pci: missing capabilities %i/%i/%i\n",
common, isr, notify);
return -EINVAL;
}
err = dma_set_mask_and_coherent(&pci_dev->dev, DMA_BIT_MASK(64));
if (err)
err = dma_set_mask_and_coherent(&pci_dev->dev,
DMA_BIT_MASK(32));
if (err)
dev_warn(&pci_dev->dev, "Failed to enable 64-bit or 32-bit DMA. Trying to continue, but this might not work.\n");
/* Device capability is only mandatory for devices that have
* device-specific configuration.
*/
device = virtio_pci_find_capability(pci_dev, VIRTIO_PCI_CAP_DEVICE_CFG,
IORESOURCE_IO | IORESOURCE_MEM,
&mdev->modern_bars);
err = pci_request_selected_regions(pci_dev, mdev->modern_bars,
"virtio-pci-modern");
if (err)
return err;
err = -EINVAL;
mdev->common = vp_modern_map_capability(mdev, common,
sizeof(struct virtio_pci_common_cfg), 4,
0, sizeof(struct virtio_pci_common_cfg),
NULL, NULL);
if (!mdev->common)
goto err_map_common;
mdev->isr = vp_modern_map_capability(mdev, isr, sizeof(u8), 1,
0, 1,
NULL, NULL);
if (!mdev->isr)
goto err_map_isr;
/* Read notify_off_multiplier from config space. */
pci_read_config_dword(pci_dev,
notify + offsetof(struct virtio_pci_notify_cap,
notify_off_multiplier),
&mdev->notify_offset_multiplier);
/* Read notify length and offset from config space. */
pci_read_config_dword(pci_dev,
notify + offsetof(struct virtio_pci_notify_cap,
cap.length),
¬ify_length);
pci_read_config_dword(pci_dev,
notify + offsetof(struct virtio_pci_notify_cap,
cap.offset),
¬ify_offset);
/* We don't know how many VQs we'll map, ahead of the time.
* If notify length is small, map it all now.
* Otherwise, map each VQ individually later.
*/
if ((u64)notify_length + (notify_offset % PAGE_SIZE) <= PAGE_SIZE) {
mdev->notify_base = vp_modern_map_capability(mdev, notify,
2, 2,
0, notify_length,
&mdev->notify_len,
&mdev->notify_pa);
if (!mdev->notify_base)
goto err_map_notify;
} else {
mdev->notify_map_cap = notify;
}
/* Again, we don't know how much we should map, but PAGE_SIZE
* is more than enough for all existing devices.
*/
if (device) {
mdev->device = vp_modern_map_capability(mdev, device, 0, 4,
0, PAGE_SIZE,
&mdev->device_len,
NULL);
if (!mdev->device)
goto err_map_device;
}
return 0;
err_map_device:
if (mdev->notify_base)
pci_iounmap(pci_dev, mdev->notify_base);
err_map_notify:
pci_iounmap(pci_dev, mdev->isr);
err_map_isr:
pci_iounmap(pci_dev, mdev->common);
err_map_common:
pci_release_selected_regions(pci_dev, mdev->modern_bars);
return err;
}
EXPORT_SYMBOL_GPL(vp_modern_probe);
实际上在老版本KVM即Linux内核代码中,vp_modern_probe函数中的内容绝大多数是直接放在virtio_pci_modern_probe函数中的,后来才单独封了这样一个函数。
(1)vp_modern_probe首先设置了virtio设备的verdor ID和device ID。代码片段如下:
/* We only own devices >= 0x1000 and <= 0x107f: leave the rest. */
if (pci_dev->device < 0x1000 || pci_dev->device > 0x107f)
return -ENODEV;
if (pci_dev->device < 0x1040) {
/* Transitional devices: use the PCI subsystem device id as
* virtio device id, same as legacy driver always did.
*/
mdev->id.device = pci_dev->subsystem_device;
} else {
/* Modern devices: simply use PCI device id, but start from 0x1040. */
mdev->id.device = pci_dev->device - 0x1040;
}
mdev->id.vendor = pci_dev->subsystem_vendor;
值得注意的是,virtio PCI代理设备的device iD就是前文书(参见QEMU源码全解析 —— virtio(14))在讲virtio_pci_device_plugged函数(QEMU源码中)时设置的PCI_DEVICE_ID_VIRTIO_10_BASE+VIRTIO_ID_BALLOON,即0x1040+5。
所以,这里virtio设备的device ID(mdev->id.device)就是0x1040+5-0x1040=5,也就代表了VIRTIO_ID_BALLOON。
(2)接下来,调用多次virtio_pci_find_capability函数来发现virtio PCI代理设备的pci capability。代码片段如下:
/* check for a common config: if not, use legacy mode (bar 0). */
common = virtio_pci_find_capability(pci_dev, VIRTIO_PCI_CAP_COMMON_CFG,
IORESOURCE_IO | IORESOURCE_MEM,
&mdev->modern_bars);
if (!common) {
dev_info(&pci_dev->dev,
"virtio_pci: leaving for legacy driver\n");
return -ENODEV;
}
/* If common is there, these should be too... */
isr = virtio_pci_find_capability(pci_dev, VIRTIO_PCI_CAP_ISR_CFG,
IORESOURCE_IO | IORESOURCE_MEM,
&mdev->modern_bars);
notify = virtio_pci_find_capability(pci_dev, VIRTIO_PCI_CAP_NOTIFY_CFG,
IORESOURCE_IO | IORESOURCE_MEM,
&mdev->modern_bars);
if (!isr || !notify) {
dev_err(&pci_dev->dev,
"virtio_pci: missing capabilities %i/%i/%i\n",
common, isr, notify);
return -EINVAL;
}
err = dma_set_mask_and_coherent(&pci_dev->dev, DMA_BIT_MASK(64));
if (err)
err = dma_set_mask_and_coherent(&pci_dev->dev,
DMA_BIT_MASK(32));
if (err)
dev_warn(&pci_dev->dev, "Failed to enable 64-bit or 32-bit DMA. Trying to continue, but this might not work.\n");
/* Device capability is only mandatory for devices that have
* device-specific configuration.
*/
device = virtio_pci_find_capability(pci_dev, VIRTIO_PCI_CAP_DEVICE_CFG,
IORESOURCE_IO | IORESOURCE_MEM,
&mdev->modern_bars);
这也是在(QEMU源码)virtio_pci_device_plugged函数中写入到virtio PCI代理设备的配置空间中的,参见QEMU源码全解析 —— virtio(14)和QEMU源码全解析 —— virtio(15)。
(3)virtio_pci_find_capability函数找到所属的PCI BAR,然后写入到virt_pci_device的modern_bars成员中。代码片段如下:
/* Device capability is only mandatory for devices that have
* device-specific configuration.
*/
device = virtio_pci_find_capability(pci_dev, VIRTIO_PCI_CAP_DEVICE_CFG,
IORESOURCE_IO | IORESOURCE_MEM,
&mdev->modern_bars);
从(QEMU源码)virtio_pci_realize函数中可以知道这个modern_bars是1<<4,如下图所示:
(4)接着,pci_request_selected_regions函数就将virtio PCI代理设备的BAR地址空间保留出来了。代码片段如下:
err = pci_request_selected_regions(pci_dev, mdev->modern_bars,
"virtio-pci-modern");
if (err)
return err;
(5)