生成块设备
我们以虚拟文件的接口,来看这个框架;因为这是从从应用层到内核的必经之路;使用vfs_mknod来生成块设备文件,并初始化fops
mknod
do_mknodat
vfs_mknod
shmem_mknod
shmem_get_inode
init_special_inode
void init_special_inode(struct inode *inode, umode_t mode, dev_t rdev)
{
inode->i_mode = mode;
if (S_ISCHR(mode)) {
inode->i_fop = &def_chr_fops;
inode->i_rdev = rdev;
} else if (S_ISBLK(mode)) {
if (IS_ENABLED(CONFIG_BLOCK))
inode->i_fop = &def_blk_fops;
inode->i_rdev = rdev;
} else if (S_ISFIFO(mode))
inode->i_fop = &pipefifo_fops;
else if (S_ISSOCK(mode))
; /* leave it no_open_fops */
else
printk(KERN_DEBUG "init_special_inode: bogus i_mode (%o) for"
" inode %s:%lu\n", mode, inode->i_sb->s_id,
inode->i_ino);
}
EXPORT_SYMBOL(init_special_inode);
const struct file_operations def_blk_fops = {
.open = blkdev_open,
.release = blkdev_release,
.llseek = blkdev_llseek,
.read_iter = blkdev_read_iter,
.write_iter = blkdev_write_iter,
.iopoll = iocb_bio_iopoll,
.mmap = blkdev_mmap,
.fsync = blkdev_fsync,
.unlocked_ioctl = blkdev_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = compat_blkdev_ioctl,
#endif
.splice_read = filemap_splice_read,
.splice_write = iter_file_splice_write,
.fallocate = blkdev_fallocate,
};
打开块设备
open系统调用打开字符设备文件时,会调用do_dentry_open,里面的f->f_op = fops_get(inode->i_fop);就是把上面init_special_inode里的inode->i_fop = &def_blk_fops赋给f->f_op;其中的open为blkdev_open;会调用块设备驱动的open
open
do_sys_open
do_sys_openat2
do_filp_open
path_openat
vfs_open
do_dentry_open
static int do_dentry_open(struct file *f,
struct inode *inode,
int (*open)(struct inode *, struct file *))
{
static const struct file_operations empty_fops = {};
int error;
path_get(&f->f_path);
f->f_inode = inode;
f->f_mapping = inode->i_mapping;
/* Ensure that we skip any errors that predate opening of the file */
f->f_wb_err = filemap_sample_wb_err(f->f_mapping);
if (unlikely(f->f_flags & O_PATH)) {
f->f_mode = FMODE_PATH | FMODE_OPENED;
f->f_op = &empty_fops;
return 0;
}
/* Any file opened for execve()/uselib() has to be a regular file. */
if (unlikely(f->f_flags & FMODE_EXEC && !S_ISREG(inode->i_mode))) {
error = -EACCES;
goto cleanup_file;
}
if (f->f_mode & FMODE_WRITE && !special_file(inode->i_mode)) {
error = get_write_access(inode);
if (unlikely(error))
goto cleanup_file;
error = __mnt_want_write(f->f_path.mnt);
if (unlikely(error)) {
put_write_access(inode);
goto cleanup_file;
}
f->f_mode |= FMODE_WRITER;
}
/* POSIX.1-2008/SUSv4 Section XSI 2.9.7 */
if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode))
f->f_mode |= FMODE_ATOMIC_POS;
f->f_op = fops_get(inode->i_fop);
if (WARN_ON(!f->f_op)) {
error = -ENODEV;
goto cleanup_all;
}
error = security_file_open(f);
if (error)
goto cleanup_all;
error = break_lease(locks_inode(f), f->f_flags);
if (error)
goto cleanup_all;
/* normally all 3 are set; ->open() can clear them if needed */
f->f_mode |= FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE;
if (!open)
open = f->f_op->open;
if (open) {
error = open(inode, f);
if (error)
goto cleanup_all;
}
f->f_mode |= FMODE_OPENED;
if ((f->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
i_readcount_inc(inode);
if ((f->f_mode & FMODE_READ) &&
likely(f->f_op->read || f->f_op->read_iter))
f->f_mode |= FMODE_CAN_READ;
if ((f->f_mode & FMODE_WRITE) &&
likely(f->f_op->write || f->f_op->write_iter))
f->f_mode |= FMODE_CAN_WRITE;
f->f_write_hint = WRITE_LIFE_NOT_SET;
f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping);
/* NB: we're sure to have correct a_ops only after f_op->open */
if (f->f_flags & O_DIRECT) {
if (!f->f_mapping->a_ops || !f->f_mapping->a_ops->direct_IO)
return -EINVAL;
}
/*
* XXX: Huge page cache doesn't support writing yet. Drop all page
* cache for this file before processing writes.
*/
if ((f->f_mode & FMODE_WRITE) && filemap_nr_thps(inode->i_mapping))
truncate_pagecache(inode, 0);
return 0;
cleanup_all:
if (WARN_ON_ONCE(error > 0))
error = -EINVAL;
fops_put(f->f_op);
if (f->f_mode & FMODE_WRITER) {
put_write_access(inode);
__mnt_drop_write(f->f_path.mnt);
}
cleanup_file:
path_put(&f->f_path);
f->f_path.mnt = NULL;
f->f_path.dentry = NULL;
f->f_inode = NULL;
return error;
}
读块设备
不管有没有文件系统,应用层都是通过虚拟文件系统的接口来到内核的,通过open后;vfs_read里的file->f_op->read_iter就等于def_blk_fops的read_iter成员
ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos)
{
ssize_t ret;
if (!(file->f_mode & FMODE_READ))
return -EBADF;
if (!(file->f_mode & FMODE_CAN_READ))
return -EINVAL;
if (unlikely(!access_ok(buf, count)))
return -EFAULT;
ret = rw_verify_area(READ, file, pos, count);
if (ret)
return ret;
if (count > MAX_RW_COUNT)
count = MAX_RW_COUNT;
if (file->f_op->read)
ret = file->f_op->read(file, buf, count, pos);
else if (file->f_op->read_iter)
ret = new_sync_read(file, buf, count, pos);
else
ret = -EINVAL;
if (ret > 0) {
fsnotify_access(file);
add_rchar(current, ret);
}
inc_syscr(current);
return ret;
}
简化的read框图如下
写块设备
其中file->f_op->write_iter就等于def_blk_fops的write_iter成员
ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_t *pos)
{
ssize_t ret;
if (!(file->f_mode & FMODE_WRITE))
return -EBADF;
if (!(file->f_mode & FMODE_CAN_WRITE))
return -EINVAL;
if (unlikely(!access_ok(buf, count)))
return -EFAULT;
ret = rw_verify_area(WRITE, file, pos, count);
if (ret)
return ret;
if (count > MAX_RW_COUNT)
count = MAX_RW_COUNT;
file_start_write(file);
if (file->f_op->write)
ret = file->f_op->write(file, buf, count, pos);
else if (file->f_op->write_iter)
ret = new_sync_write(file, buf, count, pos);
else
ret = -EINVAL;
if (ret > 0) {
fsnotify_modify(file);
add_wchar(current, ret);
}
inc_syscw(current);
file_end_write(file);
return ret;
}
简化的write框图如下
内核实例
比如插上sd卡,生成/dev/mmcblk0p1;然后usb的U盘功能的bind去打开这个块设备,就能在windows上枚举出一个U盘
drivers/usb/gadget/function/storage_common.c里面就会去,判断下/dev/mmcblk0p1这个设备的read/write相关的接口有没有
static int fsg_lun_open(struct fsg_lun *curlun, const char *filename)
{
int ro;
struct file *filp = NULL;
int rc = -EINVAL;
struct inode *inode = NULL;
loff_t size;
loff_t num_sectors;
loff_t min_sectors;
unsigned int blkbits;
unsigned int blksize;
/* R/W if we can, R/O if we must */
ro = curlun->initially_ro;
if (!ro) {
filp = filp_open(filename, O_RDWR | O_LARGEFILE, 0);
if (PTR_ERR(filp) == -EROFS || PTR_ERR(filp) == -EACCES)
ro = 1;
}
if (ro)
filp = filp_open(filename, O_RDONLY | O_LARGEFILE, 0);
if (IS_ERR(filp)) {
LINFO(curlun, "unable to open backing file: %s\n", filename);
return PTR_ERR(filp);
}
if (!(filp->f_mode & FMODE_WRITE))
ro = 1;
inode = file_inode(filp);
if ((!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode))) {
LINFO(curlun, "invalid file type: %s\n", filename);
goto out;
}
/*
* If we can't read the file, it's no good.
* If we can't write the file, use it read-only.
*/
if (!(filp->f_op->read || filp->f_op->read_iter)) {
LINFO(curlun, "file not readable: %s\n", filename);
goto out;
}
if (!(filp->f_op->write || filp->f_op->write_iter))
ro = 1;
size = i_size_read(inode->i_mapping->host);
if (size < 0) {
LINFO(curlun, "unable to find file size: %s\n", filename);
rc = (int) size;
goto out;
}
if (curlun->cdrom) {
blksize = 2048;
blkbits = 11;
} else if (inode->i_bdev) {
blksize = bdev_logical_block_size(inode->i_bdev);
blkbits = blksize_bits(blksize);
} else {
blksize = 512;
blkbits = 9;
}
num_sectors = size >> blkbits; /* File size in logic-block-size blocks */
min_sectors = 1;
if (curlun->cdrom) {
min_sectors = 0;
if (num_sectors >= 256*60*75) {
num_sectors = 256*60*75 - 1;
LINFO(curlun, "file too big: %s\n", filename);
LINFO(curlun, "using only first %d blocks\n",
(int) num_sectors);
}
}
if (num_sectors < min_sectors) {
LINFO(curlun, "file too small: %s\n", filename);
rc = -ETOOSMALL;
goto out;
}
if (fsg_lun_is_open(curlun))
fsg_lun_close(curlun);
curlun->blksize = blksize;
curlun->blkbits = blkbits;
curlun->ro = ro;
curlun->filp = filp;
curlun->file_length = size;
curlun->num_sectors = num_sectors;
LDBG(curlun, "open backing file: %s\n", filename);
return 0;
out:
fput(filp);
return rc;
}
根据最上面的def_blk_fops;5.4.195的内核是没有read成员的,只有read_iter;所以就会打印“file not readable”;需做如下修改