基于MDEV的PCI设备虚拟化DEMO实现

利用周末时间做了一个MDEV虚拟化PCI设备的小试验,简单记录一下:

DEMO架构,此图参考了内核文档:Documentation/driver-api/vfio-mediated-device.rst

host kernel watchdog pci driver:

#include <linux/init.h>
#include <linux/module.h>
#include <linux/device.h>
#include <linux/kernel.h>
#include <linux/fs.h>
#include <linux/poll.h>
#include <linux/slab.h>
#include <linux/cdev.h>
#include <linux/sched.h>
#include <linux/wait.h>
#include <linux/uuid.h>
#include <linux/vfio.h>
#include <linux/iommu.h>
#include <linux/sysfs.h>
#include <linux/ctype.h>
#include <linux/file.h>
#include <linux/mdev.h>
#include <linux/pci.h>

#define IO_BAR0_SIZE 32
#define IO_CONF_SIZE 0x100
#define CZL_WDG_DEVICE_VENDOR_ID 0xbeef
#define CZL_WDG_DEVICE_DEVICE_ID 0x1001
#define API_DBG(fmt, ...) do { \
                printk("%s line %d, "fmt, __func__, __LINE__, ##__VA_ARGS__); \
        } while (0)

struct czl_wdg_dev {
	dev_t         wdg_devt;
	struct class *wdg_class;
	struct cdev   wdg_cdev;
	struct device dev;
};

struct mdev_region_info {
	u64 start;
	u64 phys_start;
	u32 size;
	u64 vfio_offset;
};

struct wdg_mdev_state {
	u8 *config;
	u8 *iobase;
	struct mdev_device *mdev;
	struct mdev_region_info region_info[VFIO_PCI_NUM_REGIONS];
	u32 bar_mask[VFIO_PCI_NUM_REGIONS];
	struct list_head next;
	struct vfio_device_info dev_info;
	int index;
	struct mutex ops_lock;
};

static const struct file_operations czl_wdg_fops = {
	.owner          = THIS_MODULE,
};

static struct mutex wdg_mdev_list_lock;
static struct list_head wdg_mdev_devices_list;
#define WDG_VFIO_PCI_OFFSET_SHIFT   (40)
#define WDG_VFIO_PCI_OFFSET_TO_INDEX(off)   (off >> WDG_VFIO_PCI_OFFSET_SHIFT)
#define WDG_VFIO_PCI_INDEX_TO_OFFSET(index) \
                                        ((u64)(index) << WDG_VFIO_PCI_OFFSET_SHIFT)
#define WDG_VFIO_PCI_OFFSET_MASK    \
                                (((u64)(1) << WDG_VFIO_PCI_OFFSET_SHIFT) - 1)
#define MAX_WDGS                    (16)
static struct czl_wdg_dev czl_wdg;

static ssize_t
czl_wdg_dev_show(struct device *dev, struct device_attribute *attr,
                 char *buf)
{
	return sprintf(buf, "mdev emulated pci watchdog device by caozilong.\n");
}
static DEVICE_ATTR_RO(czl_wdg_dev);

static struct attribute *wdg_dev_attrs[] = {
	&dev_attr_czl_wdg_dev.attr,
	NULL,
};

static const struct attribute_group wdg_dev_group = {
	.name  = "czl_wdg",
	.attrs = wdg_dev_attrs,
};

static const struct attribute_group *wdg_dev_groups[] = {
	&wdg_dev_group,
	NULL,
};


static ssize_t
mdev_dev_show(struct device *dev, struct device_attribute *attr,
              char *buf)
{
	if (mdev_from_dev(dev)) {
		return sprintf(buf, "This is watchdog %s\n", dev_name(dev));
	}

	return sprintf(buf, "\n");
}

static DEVICE_ATTR_RO(mdev_dev);

static struct attribute *mdev_dev_attrs[] = {
	&dev_attr_mdev_dev.attr,
	NULL,
};

static const struct attribute_group mdev_dev_group = {
	.name  = "caozilong",
	.attrs = mdev_dev_attrs,
};

static const struct attribute_group *mdev_dev_groups[] = {
	&mdev_dev_group,
	NULL,
};


static ssize_t name_show(struct kobject *kobj, struct device *dev, char *buf)
{
	int i;
	char name[128];
	const char *name_str[3] = {"Soft Watchdog", "Hardware Watchdog", "Dummy Watchdog"};

	for (i = 0; i < 3; i++) {
		snprintf(name, 128, "%s-%d", dev_driver_string(dev), i + 1);
		if (!strcmp(kobj->name, name)) {
			return sprintf(buf, "%s\n", name_str[i]);
		}
	}

	return -EINVAL;
}

static ssize_t device_api_show(struct kobject *kobj, struct device *dev,
                               char *buf)
{
	return sprintf(buf, "%s\n", VFIO_DEVICE_API_PCI_STRING);
}

static ssize_t
available_instances_show(struct kobject *kobj, struct device *dev, char *buf)
{
	struct wdg_mdev_state *mds;
	int used = 0;

	list_for_each_entry(mds, &wdg_mdev_devices_list, next) {
		used ++;
	}

	return sprintf(buf, "%d\n", (MAX_WDGS - used));
}

static MDEV_TYPE_ATTR_RO(name);
static MDEV_TYPE_ATTR_RO(device_api);
static MDEV_TYPE_ATTR_RO(available_instances);

static struct attribute *mdev_types_attrs[] = {
	&mdev_type_attr_name.attr,
	&mdev_type_attr_device_api.attr,
	&mdev_type_attr_available_instances.attr,
	NULL,
};

static struct attribute_group mdev_type_group1 = {
	.name  = "1",
	.attrs = mdev_types_attrs,
};

static struct attribute_group mdev_type_group2 = {
	.name  = "2",
	.attrs = mdev_types_attrs,
};

static struct attribute_group mdev_type_group3 = {
	.name  = "3",
	.attrs = mdev_types_attrs,
};

static struct attribute_group *mdev_type_groups[] = {
	&mdev_type_group1,
	&mdev_type_group2,
	&mdev_type_group3,
	NULL,
};

static int czl_wdg_open(struct mdev_device *mdev)
{
	pr_info("%s line %d, wdg device opened.\n",
	        __func__, __LINE__);
	return 0;
}

static void czl_wdg_close(struct mdev_device *mdev)
{
	pr_info("%s line %d, wdg device close.\n",
	        __func__, __LINE__);
	return;
}

// fill pci config space meta data & capabilities.
int wdg_create_config_space(struct wdg_mdev_state *mstate)
{
	// vendor id, device id.
	*((unsigned int *)&mstate->config[0]) = CZL_WDG_DEVICE_VENDOR_ID |
	                                        (CZL_WDG_DEVICE_DEVICE_ID << 16);
	*((unsigned short *)&mstate->config[4]) = 0x0001;
	*((unsigned short *)&mstate->config[6]) = 0x0200;

	mstate->config[0x8] =  0x10;
	mstate->config[0x9] =  0x02;
	mstate->config[0xa] =  0x00;
	mstate->config[0xb] =  0x07;

	*((unsigned int *)&mstate->config[0x10]) = 0x000001;
	mstate->bar_mask[0] = ~(IO_BAR0_SIZE) + 1;
	*((unsigned int *)&mstate->config[0x2c]) = 0x10011af4;

	// cap ptr.
	mstate->config[0x34] =  0x00;
	mstate->config[0x3d] =  0x01;
	mstate->config[0x40] =  0x23;
	mstate->config[0x43] =  0x80;
	mstate->config[0x44] =  0x23;
	mstate->config[0x48] =  0x23;
	mstate->config[0x4c] =  0x23;
	mstate->config[0x60] =  0x50;

	mstate->config[0x61] =  0x43;
	mstate->config[0x62] =  0x49;
	mstate->config[0x63] =  0x20;
	mstate->config[0x64] =  0x53;
	mstate->config[0x65] =  0x65;
	mstate->config[0x66] =  0x72;
	mstate->config[0x67] =  0x69;
	mstate->config[0x68] =  0x61;
	mstate->config[0x69] =  0x6c;
	mstate->config[0x6a] =  0x2f;
	mstate->config[0x6b] =  0x55;
	mstate->config[0x6c] =  0x41;
	mstate->config[0x6d] =  0x52;
	mstate->config[0x6e] =  0x54;

	return 0;
}

static int czl_wdg_create(struct kobject *kobj, struct mdev_device *mdev)
{
	int i;
	struct wdg_mdev_state *mstate;
	char name[32];

	if (!mdev)
		return -EINVAL;

	for (i = 0; i < 3; i++) {
		snprintf(name, 32, "%s-%d", dev_driver_string(mdev_parent_dev(mdev)), i + 1);
		if (!strcmp(kobj->name, name)) {
			break;
		}
	}

	if (i >= 3) {
		return -EINVAL;
	}

	mstate = kzalloc(sizeof(struct wdg_mdev_state), GFP_KERNEL);
	if (mstate == NULL)
		return -ENOMEM;
	// group number in mdev_type.
	mstate->index = i + 1;
	mstate->config = kzalloc(IO_CONF_SIZE, GFP_KERNEL);
	if (mstate->config == NULL) {
		pr_err("%s line %d, alloc pci config buffer failure.\n",
		       __func__, __LINE__);
		kfree(mstate);
		return -ENOMEM;
	}

	mstate->iobase = kzalloc(IO_BAR0_SIZE, GFP_KERNEL);
	if (mstate->iobase == NULL) {
		pr_err("%s line %d, alloc pci io buffer failure.\n",
		       __func__, __LINE__);
		kfree(mstate->config);
		kfree(mstate);
		return -ENOMEM;
	}

	memset(mstate->config, 0x00, IO_CONF_SIZE);

	mutex_init(&mstate->ops_lock);
	mstate->mdev = mdev;
	mdev_set_drvdata(mdev, mstate);
	wdg_create_config_space(mstate);

	mutex_lock(&wdg_mdev_list_lock);
	list_add(&mstate->next, &wdg_mdev_devices_list);
	mutex_unlock(&wdg_mdev_list_lock);

	return 0;
}

static int czl_wdg_remove(struct mdev_device *mdev)
{
	struct wdg_mdev_state *mds, *tmp_mds;
	struct wdg_mdev_state *mstate = mdev_get_drvdata(mdev);

	int ret = -EINVAL;

	mutex_lock(&wdg_mdev_list_lock);
	list_for_each_entry_safe(mds, tmp_mds, &wdg_mdev_devices_list, next) {
		if (mstate == mds) {
			list_del(&mstate->next);
			mdev_set_drvdata(mdev, NULL);
			kfree(mstate->config);
			kfree(mstate->iobase);
			kfree(mstate);
			ret = 0;
			break;
		}
	}
	mutex_unlock(&wdg_mdev_list_lock);

	return ret;
}

static void handle_pci_cfg_space_write(struct wdg_mdev_state *mstate, u16 offset,
                                       u8 *buf, u32 count)
{
	u32 cfg_addr, bar_mask;

	switch (offset) {
	case 0x04: /* device control */
	case 0x06: /* device status */
		// do nothing
		break;
	case 0x3c:
		mstate->config[0x3c] = buf[0];
		break;
	case 0x3d:
		break;
	case 0x10:  /* BAR0 */
		cfg_addr = *(u32 *)buf;
		pr_info("BAR0 addr 0x%x\n", cfg_addr);
		if (cfg_addr == 0xffffffff) {
			bar_mask = mstate->bar_mask[0];
			cfg_addr = (cfg_addr & bar_mask);
		}
		cfg_addr |= (mstate->config[offset] & 0x3ul);
		*((unsigned int *)&mstate->config[offset]) = cfg_addr;
		break;
	case 0x14:  /* BAR1 */
	case 0x18:  /* BAR2 */
	case 0x20:  /* BAR4 */
		*((unsigned int *)&mstate->config[offset]) = 0;
		break;
	default:
		pr_info("PCI config write @0x%x of %d bytes not handled\n",
		        offset, count);
		break;

	}

	return;
}

static void handle_pci_cfg_space_read(struct wdg_mdev_state *mstate, u16 offset,
                                      u8 *buf, u32 count)
{
	memcpy(buf, (mstate->config + offset), count);
	return;
}

static void mdev_read_base(struct wdg_mdev_state *mstate)
{
	int index, pos;
	u32 start_lo, start_hi;
	u32 mem_type;

	pos = PCI_BASE_ADDRESS_0;
	for (index = 0; index <= VFIO_PCI_BAR5_REGION_INDEX; index++)  {
		if (!mstate->region_info[index].size)
			continue;
		start_lo = (*(u32 *)(mstate->config + pos)) &
		           PCI_BASE_ADDRESS_MEM_MASK;
		mem_type = (*(u32 *)(mstate->config + pos)) &
		           PCI_BASE_ADDRESS_MEM_TYPE_MASK;

		switch (mem_type) {
		case PCI_BASE_ADDRESS_MEM_TYPE_64:
			start_hi = (*(u32 *)(mstate->config + pos + 4));
			pos += 4;
			break;
		case PCI_BASE_ADDRESS_MEM_TYPE_32:
		case PCI_BASE_ADDRESS_MEM_TYPE_1M:
		default:
			start_hi = 0;
			break;
		}
		pos += 4;
		mstate->region_info[index].start = ((u64)start_hi << 32) | start_lo;
	}

	return;
}

static void handle_bar_write(unsigned int index, struct wdg_mdev_state *mstate,
                             u16 offset, u8 *buf, u32 count)
{
	pr_info("%s line %d, bar %d, write offset 0x%x, count 0x%x, val 0x%x.\n",
	        __func__, __LINE__, index, offset, count, *buf);
	memcpy(mstate->iobase + offset, buf, count);
	return;
}

static void handle_bar_read(unsigned int index, struct wdg_mdev_state *mstate,
                            u16 offset, u8 *buf, u32 count)
{
	pr_info("%s line %d, bar %d, read offset 0x%x, count 0x%x, val 0x%x.\n",
	        __func__, __LINE__, index, offset, count, *buf);
	memcpy(buf, mstate->iobase + offset, count);
	return;
}

static ssize_t mdev_access(struct mdev_device *mdev, u8 *buf, size_t count,
                           loff_t pos, bool is_write)
{
	int ret = 0;
	unsigned int index;
	loff_t offset;
	struct wdg_mdev_state *mstate;

	if (!mdev || !buf)
		return -EINVAL;

	mstate = mdev_get_drvdata(mdev);
	if (!mstate) {
		pr_err("%s line %d. get mstate failure.\n", __func__, __LINE__);
		return -EINVAL;
	}

	mutex_lock(&mstate->ops_lock);
	index = WDG_VFIO_PCI_OFFSET_TO_INDEX(pos);
	offset = pos & WDG_VFIO_PCI_OFFSET_MASK;
	switch (index) {
	case VFIO_PCI_CONFIG_REGION_INDEX:
		pr_info("%s: PCI config space %s at offset 0x%llx\n",
		        __func__, is_write ? "write" : "read", offset);
		if (is_write) {
			handle_pci_cfg_space_write(mstate, offset, buf, count);
		} else {
			handle_pci_cfg_space_read(mstate, offset, buf, count);
		}
		break;
	case VFIO_PCI_BAR0_REGION_INDEX ... VFIO_PCI_BAR5_REGION_INDEX:
		if (!mstate->region_info[index].start)
			mdev_read_base(mstate);
		if (is_write) {
			pr_info("%s: write bar%d offset 0x%llx, val 0x%x.\n",
			        __func__, index, offset, *buf);
			handle_bar_write(index, mstate, offset, buf, count);
		} else {
			pr_info("%s: read bar%d offset 0x%llx, val 0x%x.\n",
			        __func__, index, offset, *buf);
			handle_bar_read(index, mstate, offset, buf, count);
		}
		break;
	default:
		ret = -1;
		goto failed;
	}

	ret = count;

failed:
	mutex_unlock(&mstate->ops_lock);

	return ret;
}

static ssize_t czl_wdg_read(struct mdev_device *mdev, char __user *buf,
                            size_t count, loff_t *ppos)
{
	unsigned int done = 0;
	int ret;

	pr_info("%s line %d, read count 0x%lx, pos 0x%llx.\n", __func__, __LINE__, count, *ppos);
	while (count) {
		size_t filled;

		if (count >= 4 && !(*ppos % 4)) {
			u32 val;

			ret =  mdev_access(mdev, (u8 *)&val, sizeof(val),
			                   *ppos, false);
			if (ret <= 0)
				goto read_err;
			if (copy_to_user(buf, &val, sizeof(val)))
				goto read_err;
			filled = 4;
		} else if (count >= 2 && !(*ppos % 2)) {
			u16 val;
			ret = mdev_access(mdev, (u8 *)&val, sizeof(val),
			                  *ppos, false);
			if (ret <= 0)
				goto read_err;
			if (copy_to_user(buf, &val, sizeof(val)))
				goto read_err;
			filled = 2;
		} else {
			u8 val;

			ret = mdev_access(mdev, (u8 *)&val, sizeof(val),
			                  *ppos, false);
			if (ret <= 0)
				goto read_err;
			if (copy_to_user(buf, &val, sizeof(val)))
				goto read_err;
			filled = 1;
		}
		count -= filled;
		done += filled;
		*ppos += filled;
		buf += filled;
	}

	pr_info("%s line %d, read count 0x%x.\n", __func__, __LINE__, done);
	return done;

read_err:
	pr_err("%s line %d, read err happend.\n", __func__, __LINE__);
	return -EFAULT;
}

static ssize_t czl_wdg_write(struct mdev_device *mdev, const char __user *buf,
                             size_t count, loff_t *ppos)
{
	unsigned int done = 0;
	int ret;

	pr_info("%s line %d, write count 0x%lx, pos 0x%llx.\n", __func__, __LINE__, count, *ppos);
	while (count) {
		size_t filled;

		if (count >= 4 && !(*ppos % 4)) {
			u32 val;

			if (copy_from_user(&val, buf, sizeof(val)))
				goto write_err;

			ret = mdev_access(mdev, (u8 *)&val, sizeof(val),
			                  *ppos, true);
			if (ret <= 0)
				goto write_err;
			filled = 4;
		}  else if (count >= 2 && !(*ppos % 2)) {
			u16 val;

			if (copy_from_user(&val, buf, sizeof(val)))
				goto write_err;
			ret = mdev_access(mdev, (u8 *)&val, sizeof(val),
			                  *ppos, true);
			if (ret <= 0)
				goto write_err;
			filled = 2;
		} else {
			u8 val;

			if (copy_from_user(&val, buf, sizeof(val)))
				goto write_err;
			ret = mdev_access(mdev, (u8 *)&val, sizeof(val),
			                  *ppos, true);
			if (ret <= 0)
				goto write_err;
			filled = 1;
		}
		count -= filled;
		done += filled;
		*ppos += filled;
		buf += filled;
	}

	pr_info("%s line %d, write count 0x%x.\n", __func__, __LINE__, done);
	return done;

write_err:
	pr_err("%s line %d, write failure.\n", __func__, __LINE__);
	return -EFAULT;
}

static int wdg_get_device_info(struct mdev_device *mdev, struct vfio_device_info *dev_info)
{
	dev_info->flags = VFIO_DEVICE_FLAGS_PCI;
	dev_info->num_regions = VFIO_PCI_NUM_REGIONS;
	dev_info->num_irqs = VFIO_PCI_NUM_IRQS;

	return 0;
}

static int wdg_get_region_info(struct mdev_device *mdev, struct vfio_region_info *region_info)
{
	unsigned int size = 0;
	struct wdg_mdev_state *mstate;
	u32 bar_index;

	if (!mdev) {
		pr_err("%s line %d,mdev is null.\n", __func__, __LINE__);
		return -EINVAL;
	}

	mstate = mdev_get_drvdata(mdev);
	if (!mstate) {
		pr_err("%s line %d,mstat is null.\n", __func__, __LINE__);
		return -EINVAL;
	}

	bar_index = region_info->index;
	if (bar_index >= VFIO_PCI_NUM_REGIONS) {
		pr_err("%s line %d,bar index %d exceeds.\n", __func__, __LINE__, bar_index);
		return -EINVAL;
	}

	mutex_lock(&mstate->ops_lock);
	switch (bar_index) {
	case VFIO_PCI_CONFIG_REGION_INDEX:
		size = IO_CONF_SIZE;
		break;
	case VFIO_PCI_BAR0_REGION_INDEX:
		size = IO_BAR0_SIZE;
		break;
	default:
		size = 0;
		break;
	}

	mstate->region_info[bar_index].size = size;
	mstate->region_info[bar_index].vfio_offset =
	        WDG_VFIO_PCI_INDEX_TO_OFFSET(bar_index);
	region_info->size = size;
	region_info->offset = WDG_VFIO_PCI_INDEX_TO_OFFSET(bar_index);
	region_info->flags = VFIO_REGION_INFO_FLAG_READ |
	                     VFIO_REGION_INFO_FLAG_WRITE;

	mutex_unlock(&mstate->ops_lock);

	return 0;
}

static int wdg_get_irq_info(struct mdev_device *mdev, struct vfio_irq_info *irq_info)
{
	switch (irq_info->index) {
	case VFIO_PCI_INTX_IRQ_INDEX:
	case VFIO_PCI_MSI_IRQ_INDEX:
	case VFIO_PCI_REQ_IRQ_INDEX:
		break;
	default:
		pr_err("%s line %d, irq idx %d is invalid.\n",
		       __func__, __LINE__, irq_info->index);
		return -EINVAL;
	}

	irq_info->flags = VFIO_IRQ_INFO_EVENTFD;
	irq_info->count = 1;
	if (irq_info->index == VFIO_PCI_INTX_IRQ_INDEX)
		irq_info->flags |= (VFIO_IRQ_INFO_MASKABLE |
		                    VFIO_IRQ_INFO_AUTOMASKED);
	else
		irq_info->flags |= VFIO_IRQ_INFO_NORESIZE;

	return 0;
}

static long czl_wdg_ioctl(struct mdev_device *mdev, unsigned int cmd,
                          unsigned long arg)
{
	int ret = 0;
	unsigned long minsz;
	struct wdg_mdev_state *mstate;

	pr_info("czl wdg ioctl enter.\n");

	if (!mdev) {
		pr_err("%s line %d, mdev is null.\n", __func__, __LINE__);
		return -EINVAL;
	}

	mstate = mdev_get_drvdata(mdev);
	if (!mstate) {
		pr_err("%s line %d, cant find mstate data.\n", __func__, __LINE__);
		return -ENODEV;
	}

	switch (cmd) {
	case VFIO_DEVICE_GET_INFO: {
		struct vfio_device_info info;
		minsz = offsetofend(struct vfio_device_info, num_irqs);

		if (copy_from_user(&info, (void __user *)arg, minsz))
			return -EFAULT;
		if (info.argsz < minsz) {
			pr_err("%s line %d, info.argsz %d < minsz %ld.\n",
			       __func__, __LINE__, info.argsz, minsz);
			return -EINVAL;
		}

		ret = wdg_get_device_info(mdev, &info);
		if (ret) {
			pr_err("%s line %d, get device info failure.\n", __func__, __LINE__);
			return ret;
		}
		memcpy(&mstate->dev_info, &info, sizeof(info));
		if (copy_to_user((void __user *)arg, &info, minsz))
			return -EFAULT;
		return 0;
	}
	case VFIO_DEVICE_GET_REGION_INFO: {
		struct vfio_region_info info;

		minsz = offsetofend(struct vfio_region_info, offset);

		if (copy_from_user(&info, (void __user *)arg, minsz))
			return -EFAULT;
		if (info.argsz < minsz) {
			pr_err("%s line %d, info.argsz %d < minsz %ld.\n",
			       __func__, __LINE__, info.argsz, minsz);
			return -EINVAL;
		}

		ret = wdg_get_region_info(mdev, &info);
		if (ret) {
			pr_err("%s line %d, get region info failure.\n", __func__, __LINE__);
			return ret;
		}

		if (copy_to_user((void __user *)arg, &info, minsz))
			return -EFAULT;
		return 0;
	}
	case VFIO_DEVICE_GET_IRQ_INFO: {
		struct vfio_irq_info info;

		minsz = offsetofend(struct vfio_irq_info, count);
		if (copy_from_user(&info, (void __user *)arg, minsz))
			return -EFAULT;
		if ((info.argsz < minsz) ||
		    (info.index >= mstate->dev_info.num_irqs))
			return -EINVAL;
		ret = wdg_get_irq_info(mdev, &info);
		if (ret)
			return ret;
		if (copy_to_user((void __user *)arg, &info, minsz))
			return -EFAULT;
		return 0;
	}
	case VFIO_DEVICE_SET_IRQS: {
		pr_info("%s line %d, set irqs.\n", __func__, __LINE__);
		return 0;
	}
	case VFIO_DEVICE_RESET:
		pr_info("%s line %d, reset.\n", __func__, __LINE__);
		return 0;
	}

	return -EINVAL;
}

static const struct mdev_parent_ops wdg_mdev_fops = {
	.owner                  = THIS_MODULE,
	.dev_attr_groups        = wdg_dev_groups,
	.mdev_attr_groups       = mdev_dev_groups,
	.supported_type_groups  = mdev_type_groups,
	.create                 = czl_wdg_create,
	.remove                 = czl_wdg_remove,
	.open                   = czl_wdg_open,
	.release                = czl_wdg_close,
	.read                   = czl_wdg_read,
	.write                  = czl_wdg_write,
	.ioctl                  = czl_wdg_ioctl,
};

static void wdg_device_release(struct device *dev)
{
	pr_info("czl wdg devide release.\n");
}

static int mdev_wdg_init(void)
{
	int ret = 0;

	pr_info("czl wdg init.\n");

	memset(&czl_wdg, 0x00, sizeof(czl_wdg));

	ret = alloc_chrdev_region(&czl_wdg.wdg_devt, 0, MINORMASK + 1, "czl_wdg");
	if (ret < 0) {
		pr_err("error: failed to register czl wdg device, err:%d\n", ret);
		return -1;
	}

	cdev_init(&czl_wdg.wdg_cdev, &czl_wdg_fops);
	cdev_add(&czl_wdg.wdg_cdev, czl_wdg.wdg_devt, MINORMASK + 1);

	pr_info("major_number:%d\n", MAJOR(czl_wdg.wdg_devt));

	czl_wdg.wdg_class = class_create(THIS_MODULE, "czl_wdg");
	if (IS_ERR(czl_wdg.wdg_class)) {
		pr_err("error: failed to create wdg class.\n");
		ret = -1;
		goto failed1;
	}

	czl_wdg.dev.class = czl_wdg.wdg_class;
	czl_wdg.dev.release = wdg_device_release;
	dev_set_name(&czl_wdg.dev, "%s", "czl_wdg");
	ret = device_register(&czl_wdg.dev);
	if (ret) {
		pr_err("%s line %d, register wdg device failure.\n", __func__, __LINE__);
		ret = -1;
		goto  failed2;
	}

	ret = mdev_register_device(&czl_wdg.dev, &wdg_mdev_fops);
	if (ret) {
		pr_err("%s line %d, register wdg mdev device failure.\n", __func__, __LINE__);
		ret = -1;
		goto  failed3;
	}

	mutex_init(&wdg_mdev_list_lock);
	INIT_LIST_HEAD(&wdg_mdev_devices_list);

	pr_info("czl wdg init success.\n");
	goto done;
failed3:
	device_unregister(&czl_wdg.dev);
failed2:
	class_destroy(czl_wdg.wdg_class);
failed1:
	cdev_del(&czl_wdg.wdg_cdev);
	unregister_chrdev_region(czl_wdg.wdg_devt, MINORMASK + 1);
done:
	return ret;
}

static void mdev_wdg_exit(void)
{
	czl_wdg.dev.bus = NULL;
	mdev_unregister_device(&czl_wdg.dev);
	device_unregister(&czl_wdg.dev);
	cdev_del(&czl_wdg.wdg_cdev);
	unregister_chrdev_region(czl_wdg.wdg_devt, MINORMASK + 1);
	class_destroy(czl_wdg.wdg_class);
	czl_wdg.wdg_class = NULL;

	pr_info("czl_wdg_unload.\n");
	return;
}

module_init(mdev_wdg_init)
module_exit(mdev_wdg_exit)
MODULE_LICENSE("GPL v2");

virtual machine pci watchdog pci driver

#include <linux/init.h>
#include <linux/module.h>
#include <linux/device.h>
#include <linux/kernel.h>
#include <linux/fs.h>
#include <linux/poll.h>
#include <linux/slab.h>
#include <linux/cdev.h>
#include <linux/sched.h>
#include <linux/wait.h>
#include <linux/uuid.h>
#include <linux/vfio.h>
#include <linux/iommu.h>
#include <linux/sysfs.h>
#include <linux/ctype.h>
#include <linux/file.h>
#include <linux/mdev.h>
#include <linux/pci.h>
#include <linux/idr.h>

static int devno;
static DEFINE_IDR(wdg_minors);
static DEFINE_MUTEX(wdg_minors_lock);
#define WDG_MINORS_COUNT 256

struct wdg_pci_state {
	struct pci_dev *pdev;
	struct device *dev;
	int iobase;
	int iolen;
	int major;
	int minor;
};

static struct class *wdg_class;
static const struct pci_device_id czl_pci_table[] = {
	{       PCI_DEVICE(0xbeef, 0x1001),       },
	{ 0,                                      }
};

static int czl_wdg_open(struct inode *inode, struct file *file)
{
	int rc = 0;
	int major, minor;

	major = imajor(inode);
	minor = iminor(inode);
	mutex_lock(&wdg_minors_lock);
	file->private_data = idr_find(&wdg_minors, minor);
	mutex_unlock(&wdg_minors_lock);
	if (!file->private_data) {
		pr_err("%s line %d, cant find wdg structure.\n",
		       __func__, __LINE__);
		rc = -1;
	}

	return rc;
}

static int czl_wdg_release(struct inode *inode, struct file *file)
{
	return 0;
}

ssize_t czl_wdg_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
{
	int i;
	struct wdg_pci_state *wdgdev = NULL;
	unsigned char *kbuf = NULL;
	int actuallen = 0;

	wdgdev = file->private_data;
	if (!wdgdev) {
		pr_err("%s line %d, read failure.\n", __func__, __LINE__);
		return -1;
	}

	if (*ppos > wdgdev->iolen) {
		pr_err("%s line %d, read pos %lld exceed max io len %d.\n",
		       __func__, __LINE__, *ppos, wdgdev->iolen);
		return -1;
	}

	kbuf = kzalloc(GFP_KERNEL, size);
	if (kbuf == NULL) {
		pr_err("%s line %d, alloc kbuf failure.\n",
		       __func__, __LINE__);
		return -1;
	}

	for (i = 0; (i < size) && ((*ppos + i) <  wdgdev->iolen); i++) {
		kbuf[i] = inb(wdgdev->iobase + *ppos + i);
		actuallen ++;
	}

	copy_to_user(buf, kbuf, actuallen);
	kfree(kbuf);
	return actuallen;
}

static ssize_t czl_wdg_write(struct file *file, const char __user *buf,
                             size_t count, loff_t *ppos)
{
	int i;
	struct wdg_pci_state *wdgdev = NULL;
	unsigned char *kbuf = NULL;
	int actuallen = 0;

	wdgdev = file->private_data;
	if (!wdgdev) {
		pr_err("%s line %d, read failure.\n", __func__, __LINE__);
		return -1;
	}

	if (*ppos > wdgdev->iolen) {
		pr_err("%s line %d, read pos %lld exceed max io len %d.\n",
		       __func__, __LINE__, *ppos, wdgdev->iolen);
		return -1;
	}

	kbuf = kzalloc(GFP_KERNEL, count);
	if (kbuf == NULL) {
		pr_err("%s line %d, alloc kbuf failure.\n",
		       __func__, __LINE__);
		return -1;
	}

	copy_from_user(kbuf, buf, count);

	for (i = 0; (i < count) && ((*ppos + i) <  wdgdev->iolen); i++) {
		outb((u8)kbuf[i], wdgdev->iobase + *ppos + i);
		actuallen ++;
	}

	kfree(kbuf);
	return actuallen;
}

static const struct file_operations czl_wdg_fops = {
	.owner          = THIS_MODULE,
	.open           = czl_wdg_open,
	.release        = czl_wdg_release,
	.read           = czl_wdg_read,
	.write          = czl_wdg_write,
};

static char *wdg_devnode(struct device *dev, umode_t *mode)
{
	if (mode)
		*mode = 06666;
	return kasprintf(GFP_KERNEL, "%s", dev_name(dev));
}

static int wdg_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
	struct wdg_pci_state *wdgdev = NULL;

	pr_info("%s line %d, wdg pci device & driver binding.\n", __func__, __LINE__);

	wdgdev = kzalloc(GFP_KERNEL, sizeof(*wdgdev));
	if (!wdgdev) {
		pr_err("%s line %d, fail to alloc buffer.\n",
		       __func__, __LINE__);
		goto err0;
	}

	wdgdev->major = devno;

	wdgdev->pdev = pci_dev_get(pdev);
	wdgdev->iobase = pci_resource_start(pdev, 0);
	wdgdev->iolen = pci_resource_len(pdev, 0);
	mutex_lock(&wdg_minors_lock);
	wdgdev->minor = idr_alloc(&wdg_minors, wdgdev, 0, WDG_MINORS_COUNT, GFP_KERNEL);
	mutex_unlock(&wdg_minors_lock);
	if (wdgdev->minor < 0) {
		pr_err("%s line %d, get minor failure from idr.\n", __func__, __LINE__);
		goto err1;
	}

	pr_info("%s line %d, major %d, minor %d, iobase 0x%x.\n", __func__, __LINE__,
	        devno, wdgdev->minor, wdgdev->iobase);
	wdgdev->dev = device_create(wdg_class, NULL, MKDEV(devno, wdgdev->minor),
	                            NULL, "czl-wdg-%d", wdgdev->minor);
	if (!wdgdev->dev || IS_ERR(wdgdev->dev)) {
		pr_err("%s line %d, create wdg device failure.\n",
		       __func__, __LINE__);
		goto err2;
	}

	pci_set_drvdata(pdev, wdgdev);
	return 0;
err2:
	idr_remove(&wdg_minors, wdgdev->minor);
err1:
	if (wdgdev) {
		kfree(wdgdev);
	}
err0:
	return -1;
}

static void wdg_pci_remove(struct pci_dev *pdev)
{
	struct wdg_pci_state *wdgdev;

	pr_info("%s line %d, wdg pci device & driver removing.\n", __func__, __LINE__);

	wdgdev = pci_get_drvdata(pdev);
	pci_set_drvdata(pdev, NULL);
	pci_dev_put(pdev);
	wdgdev->pdev = NULL;
	device_destroy(wdg_class, MKDEV(devno, wdgdev->minor));
	idr_remove(&wdg_minors, wdgdev->minor);
	kfree(wdgdev);

	return;
}

static struct pci_driver czl_wdg_driver = {
	.name           = "czl-mdev-wdg",
	.id_table       = czl_pci_table,
	.probe          = wdg_pci_probe,
	.remove         = wdg_pci_remove,
};
static int czl_wdg_init(void)
{
	int ret;

	wdg_class = class_create(THIS_MODULE, "czl-wdg");
	if (!wdg_class) {
		pr_err("%s line %d, create watchdog class failure.\n",
		       __func__, __LINE__);
		return -1;
	}

	wdg_class->devnode = wdg_devnode;

	devno = register_chrdev(0, "czl-wdg", &czl_wdg_fops);
	if (devno < 0) {
		pr_err("%s line %d, register wdg device chrno failure.\n",
		       __func__, __LINE__);
		class_destroy(wdg_class);
		return -1;
	}

	ret = pci_register_driver(&czl_wdg_driver);

	return ret;
}

static void czl_wdg_exit(void)
{
	pci_unregister_driver(&czl_wdg_driver);
	unregister_chrdev(devno, "czl-wdg");
	class_destroy(wdg_class);
	idr_destroy(&wdg_minors);
	return;
}

module_init(czl_wdg_init)
module_exit(czl_wdg_exit)
MODULE_LICENSE("GPL v2");

virtual machine kernel space test case

#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <fcntl.h>
#include <stddef.h>
#include <stdint.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdarg.h>

void dump_buf(unsigned char *buf, int len)
{
	int i;

	for (i = 0; i < len; i++) {
		if (i % 16 == 0)
			printf("\n0x%04x: ", i);
		printf("0x%02x ", buf[i]);
	}

	printf("\n");
	return;
}

int main(void)
{
	int wdgfd;
	int status;
	unsigned char buf[32];

	wdgfd = open("/dev/czl-wdg-0", O_RDWR);
	if (wdgfd < 0) {
		printf("%s line %d, open failure.\n",
		       __func__, __LINE__);
		return -1;
	}

	while (1) {
		memset(buf, 0x00, 32);

		status = read(wdgfd, buf, 32);
		if (status < 0) {
			printf("%s line %d, read failure.\n",
			       __func__, __LINE__);
			return -1;
		}

		printf("%s line %d, read %d.\n", __func__, __LINE__, status);

		dump_buf(buf, 32);

		memset(buf, 0x5a, 32);
		lseek(wdgfd, 0, SEEK_SET);
		status = write(wdgfd, buf, 32);
		if (status < 0) {
			printf("%s line %d, read failure.\n",
			       __func__, __LINE__);
			return -1;
		}
		printf("%s line %d, read %d.\n", __func__, __LINE__, status);

		sleep(1);
	}

	close(wdgfd);
	return 0;
}

测试过程:

1.安装WDG MDEV驱动:

sudo insmod czl-mdev-wdg.ko

2.创建mdev设备

创建两个mdev设备

echo "f422fd86-35c0-11ef-8e50-9342c1138a56" > /sys/devices/virtual/czl_wdg/czl_wdg/mdev_supported_types/czl_wdg-1/create
echo "c04de378-35d8-11ef-95c3-339660dfc874" > /sys/devices/virtual/czl_wdg/czl_wdg/mdev_supported_types/czl_wdg-2/create

3.将第二步创建的mdev设别透传给QEMU虚拟机启动:

qemu-system-x86_64 -m 4096 -smp 4 --enable-kvm -drive file=/home/zlcao/Workspace/iso/ps.img -device vfio-pci,sysfsdev=/sys/bus/mdev/devices/f422fd86-35c0-11ef-8e50-9342c1138a56 -device vfio-pci,sysfsdev=/sys/bus/mdev/devices/c04de378-35d8-11ef-95c3-339660dfc874

系统启动后,可以看到虚拟机环境下出现了透传的MDEV PCI设备,设备vendor/device id为0xbeef1001,符合代码设定。

4.虚拟机内安装WDG PCI设备驱动:

上图中可以看到,两个透传的MDEV设备已经和一个名为"serial"的PCI设备驱动绑定,这并不符合预期,需要将默认的"serial"驱动和MDEV设备解绑,在QEMU虚拟机控制台中输入如下命令解绑驱动:

echo -n 0000:00:04.0 > /sys/bus/pci/drivers/serial/unbind
echo -n 0000:00:05.0 > /sys/bus/pci/drivers/serial/unbind

之后就可以安装我们的WDG PCI驱动了:

sudo insmod czl-mdev-drv.ko

安装成功后,虚拟机设备目录下出现了WDG PCI的设备节点:

此时,两个MDEV PCI设备也显示绑定到了正确的驱动:

5.运行测试用例,读写WDG PCI设备的BAR0地址空间:

此时可以看到,虚拟机中对WDG设备BAR0空间的读写调用被“透传"到了HOST机的MDEV PCI设备驱动上,可以基于对BAR0空间的回调实现我们的业务逻辑。


结束

本文来自互联网用户投稿,该文观点仅代表作者本人,不代表本站立场。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如若转载,请注明出处:http://www.mfbz.cn/a/756689.html

如若内容造成侵权/违法违规/事实不符,请联系我们进行投诉反馈qq邮箱809451989@qq.com,一经查实,立即删除!

相关文章

yolov8obb角度预测原理解析

预测头 ultralytics/nn/modules/head.py class OBB(Detect):"""YOLOv8 OBB detection head for detection with rotation models."""def __init__(self, nc80, ne1, ch()):"""Initialize OBB with number of classes nc and la…

【Dison夏令营 Day 02】使用 Python 玩井字游戏

在本文中&#xff0c;我们将介绍使用 Python 语言从零开始创建井字游戏的步骤。 在本文中&#xff0c;我们将介绍使用 Python 语言从零开始创建井字游戏的步骤。 游戏简介 井字游戏是一种双人游戏&#xff0c;在 33 正方形网格上进行。每位玩家轮流占据一个单元格&#xff0c…

CMake(1)基础使用

CMake之(1)基础使用 Author: Once Day Date: 2024年6月29日 一位热衷于Linux学习和开发的菜鸟&#xff0c;试图谱写一场冒险之旅&#xff0c;也许终点只是一场白日梦… 漫漫长路&#xff0c;有人对你微笑过嘛… 全系列文章可参考专栏: Linux实践记录_Once-Day的博客-CSDN博客…

双指针算法第一弹(移动零 复写零 快乐数)

目录 前言 1. 移动零 &#xff08;1&#xff09;题目及示例 &#xff08;2&#xff09;一般思路 &#xff08;3&#xff09;双指针解法 2. 复写零 &#xff08;1&#xff09;题目及示例 &#xff08;2&#xff09;一般解法 &#xff08;3&#xff09;双指针解法 3. 快…

计算机基础知识——C基础+C指针+char类型

指针 这里讲的很细 https://blog.csdn.net/weixin_43624626/article/details/130715839 内存地址&#xff1a;内存中每个字节单位都有一个编号&#xff08;一般用十六进制表示&#xff09; 存储类型 数据类型 *指针变量名&#xff1b;int *p; //定义了一个指针变量p,指向的数…

在Redis中使用Lua脚本实现多条命令的原子性操作

Redis作为一个高性能的键值对数据库&#xff0c;被广泛应用于各种场景。然而&#xff0c;在某些情况下&#xff0c;我们需要执行一系列Redis命令&#xff0c;并确保这些命令的原子性。这时&#xff0c;Lua脚本就成为了一个非常实用的解决方案。 问题的提出 假设我们有一个计数…

【深度学习】图形模型基础(2):概率机器学习模型与人工智能

1.引言 1.1.背景 当机器需要从经验中汲取知识时&#xff0c;概率建模成为了一个至关重要的工具。它不仅为理解学习机制提供了理论框架&#xff0c;而且在实际应用中&#xff0c;特别是在设计能够从数据中学习的机器时&#xff0c;概率建模展现出了其独特的价值。概率框架的核…

Power BI可视化表格矩阵如何保持样式导出数据?

故事背景&#xff1a; 有朋友留言询问&#xff1a;自己从Power BI可视化矩阵表格中导出数据时&#xff0c;导出的表格样式会发生改变&#xff0c;需要线下再手动调整&#xff0c;重新进行透视组合成自己想要的格式。 有没有什么办法让表格导出来跟可视化一样&#xff1f; Po…

汽车电子工程师入门系列——CAN 规范系列通读

我是穿拖鞋的汉子,魔都中坚持长期主义的汽车电子工程师。 老规矩,分享一段喜欢的文字,避免自己成为高知识低文化的工程师: 屏蔽力是信息过载时代一个人的特殊竞争力,任何消耗你的人和事,多看一眼都是你的不对。非必要不费力证明自己,无利益不试图说服别人,是精神上的节…

SiteSucker Pro for Mac:一键下载整站,轻松备份与离线浏览!

SiteSucker Pro for Mac是一款专为苹果电脑用户设计的网站下载与备份工具&#x1f578;️。它以其强大的整站下载能力和用户友好的界面&#xff0c;成为了众多Mac用户备份网站、离线浏览的得力助手&#x1f4bb;。 这款软件允许用户一键下载整个网站&#xff0c;包括所有的网页…

Docker(八)-Docker运行mysql8容器实例

1.运行mysql8容器实例并挂载数据卷 -e:配置环境变量 --lower_case_table_names1 设置忽略表名大小写一定要放在镜像之后运行mysql8容器实例之前&#xff0c;先查看是否存在mysql8镜像以及是否存在已运行的mysql实例docker run -d -p 3306:3306 --privilegedtrue -v 【宿主机日…

L03_Redis知识图谱

这些知识点你都掌握了吗?大家可以对着问题看下自己掌握程度如何?对于没掌握的知识点,大家自行网上搜索,都会有对应答案,本文不做知识点详细说明,只做简要文字或图示引导。 Redis 全景图 Redis 知识全景图都包括什么呢?简单来说,就是“两大维度,三大主线”。 Redis …

MySQL连接IDEA(Java Web)保姆级教程

第一步&#xff1a;新建项目(File)->Project 第二步&#xff1a;New Project(JDK最好设置1.8版本与数据库适配&#xff0c;详细适配网请到MySQL官网查询MySQL :: MySQL 8.3 Reference Manual :: Search Results) 第三步&#xff1a;点中MySQLTest(项目名)并连续双击shift键-…

昇思25天学习打卡营第2天|数据集Dataset

学习目标&#xff1a;熟练掌握mindspore.dataset mindspore.dataset中有常用的视觉、文本、音频开源数据集供下载&#xff0c;点赞、关注收藏哦 了解mindspore.dataset mindspore.dataset应用实践 拓展自定义数据集 昇思平台学习时间记录: 一、关于mindspore.dataset minds…

【STM32】在标准库中使用定时器

1.TIM简介 STM32F407系列控制器有2个高级控制定时器、10个通用定时器和2个基本定时器。通常情况下&#xff0c;先看定时器挂在哪个总线上APB1或者APB2&#xff0c;然后定时器时钟需要在此基础上乘以2。 2.标准库实现定时中断 #ifndef __BSP_TIMER_H #define __BSP_TIMER_H#if…

.[emcrypts@tutanota.de].mkp勒索病毒新变种该如何应对?

引言 在数字化时代&#xff0c;随着信息技术的迅猛发展&#xff0c;网络安全问题日益凸显。其中&#xff0c;勒索病毒作为一种极具破坏力的恶意软件&#xff0c;给个人和企业带来了巨大的经济损失和数据安全风险。近期&#xff0c;一种名为“.mkp勒索病毒”的新型威胁开始在网络…

多线程引发的安全问题

前言&#x1f440;~ 上一章我们介绍了线程的一些基础知识点&#xff0c;例如创建线程、查看线程、中断线程、等待线程等知识点&#xff0c;今天我们讲解多线程下引发的安全问题 线程安全&#xff08;最复杂也最重要&#xff09; 产生线程安全问题的原因 锁&#xff08;重要…

在 Python 中创建列表时,应该写 `[]` 还是 `list()`?

在 Python 中&#xff0c;创建列表有两种写法&#xff1a; # 写法一&#xff1a;使用一对方括号 list_1 []# 写法二&#xff1a;调用 list() list_2 list() 那么哪种写法更好呢&#xff1f; 单从写法上来看&#xff0c;[] 要比 list() 简洁&#xff0c;那在性能和功能方面…

江科大笔记—读写内部闪存FLASH读取芯片ID

读写内部闪存FLASH 右下角是OLED&#xff0c;然后左上角在PB1和PB11两个引脚&#xff0c;插上两个按键用于控制。下一个代码读取芯片ID&#xff0c;这个也是接上一个OLED&#xff0c;能显示测试数据就可以了。 STM32-STLINK Utility 本节的代码调试&#xff0c;使用辅助软件…

[机缘参悟-200] - 对自然、人性、人生、人心、人际、企业、社会、宇宙全面系统的感悟 - 全图解

对自然、人性、人生、人心、人际、企业、社会、宇宙进行全面系统的感悟&#xff0c;是一个极其深邃且复杂的主题。以下是对这些领域的简要感悟&#xff1a; 自然&#xff1a; 自然是人类生存的根基&#xff0c;它充满了无尽的奥秘和美丽。自然界的平衡和循环规律&#xff0c;教…