Linux内核源代码情景分析-文件的打开
打开文件的系统调用是open(),在内核中通过sys_open()实现,假设filename是"/usr/local/hello.c",且假设这个文件已经存在,代码如下:
asmlinkage long sys_open(const char * filename, int flags, int mode) { char * tmp; int fd, error; #if BITS_PER_LONG != 32 flags |= O_LARGEFILE; #endif tmp = getname(filename);//从用户空间把文件的路径名拷贝到系统空间 fd = PTR_ERR(tmp); if (!IS_ERR(tmp)) { fd = get_unused_fd();//从当前进程的"打开文件表"中找到一个空闲的表项,该表项的下标即为"打开文件号" if (fd >= 0) { struct file *f = filp_open(tmp, flags, mode);//获得一个关联文件的file结构 error = PTR_ERR(f); if (IS_ERR(f)) goto out_error; fd_install(fd, f);//将新建的file数据结构的指针"安装"到当前进程的file_struct结构中 } out: putname(tmp); } return fd;//最后返回文件号 out_error: put_unused_fd(fd); fd = error; goto out; }
int get_unused_fd(void) { struct files_struct * files = current->files; int fd, error; error = -EMFILE; write_lock(&files->file_lock); repeat: fd = find_next_zero_bit(files->open_fds, files->max_fdset, files->next_fd);//在open_fds中,找到空闲打开文件号 /* * N.B. For clone tasks sharing a files structure, this test * will limit the total number of files that can be opened. */ if (fd >= current->rlim[RLIMIT_NOFILE].rlim_cur) goto out; /* Do we need to expand the fdset array? */ if (fd >= files->max_fdset) {//如果位图容量不够,则扩展 error = expand_fdset(files, fd); if (!error) { error = -EMFILE; goto repeat; } goto out; } /* * Check whether we need to expand the fd array. */ if (fd >= files->max_fds) {//如果file结构指针数组的容量不够,则扩展 error = expand_fd_array(files, fd); if (!error) { error = -EMFILE; goto repeat; } goto out; } FD_SET(fd, files->open_fds);//置位,下次就找不到了 FD_CLR(fd, files->close_on_exec); files->next_fd = fd + 1;//下一个打开文件号加1 #if 1 /* Sanity check */ if (files->fd[fd] != NULL) { printk("get_unused_fd: slot %d not NULL!\n", fd); files->fd[fd] = NULL; } #endif error = fd; out: write_unlock(&files->file_lock); return error; }
struct files_struct { atomic_t count; rwlock_t file_lock; int max_fds; //当前file结构指针数组的容量 int max_fdset;//位图的容量 int next_fd; //下个打开文件号 struct file ** fd; //指向了fd_array fd_set *close_on_exec; //指向了close_on_exec_init fd_set *open_fds; //指向了open_fds_init fd_set close_on_exec_init; fd_set open_fds_init; struct file * fd_array[NR_OPEN_DEFAULT]; };
struct file { struct list_head f_list; struct dentry *f_dentry;//指向文件的dentry结构的指针f_dentry struct vfsmount *f_vfsmnt;//指向将文件所在设备安装在文件系统中的vfsmnt结构的指针 struct file_operations *f_op; atomic_t f_count; unsigned int f_flags; mode_t f_mode; loff_t f_pos;//当前的读写位置 unsigned long f_reada, f_ramax, f_raend, f_ralen, f_rawin; struct fown_struct f_owner; unsigned int f_uid, f_gid; int f_error; unsigned long f_version; /* needed for tty driver, and maybe others */ void *private_data; };file_open,代码如下:
struct file *filp_open(const char * filename, int flags, int mode)
{
int namei_flags, error;
struct nameidata nd;
namei_flags = flags;
if ((namei_flags+1) & O_ACCMODE)
namei_flags++;
if (namei_flags & O_TRUNC)
namei_flags |= 2;
error = open_namei(filename, namei_flags, mode, &nd);//获得nd->dentry结构
if (!error)
return dentry_open(nd.dentry, nd.mnt, flags);//根据nd->dentry结构填充file结构
return ERR_PTR(error);
}
int open_namei(const char * pathname, int flag, int mode, struct nameidata *nd) { int acc_mode, error = 0; struct inode *inode; struct dentry *dentry; struct dentry *dir; int count = 0; acc_mode = ACC_MODE(flag); /* * The simplest case - just a plain lookup. */ if (!(flag & O_CREAT)) {//假设flag为O_CREATE,如果文件不存在就创建 if (path_init(pathname, lookup_flags(flag), nd)) error = path_walk(pathname, nd); if (error) return error; dentry = nd->dentry; goto ok; } /* * Create - we need to know the parent. */ if (path_init(pathname, LOOKUP_PARENT, nd)) error = path_walk(pathname, nd);//找到父节点 if (error) return error; /* * We have the parent and last component. First of all, check * that we are not asked to creat(2) an obvious directory - that * will not do. */ error = -EISDIR; if (nd->last_type != LAST_NORM || nd->last.name[nd->last.len])//虽然nd->dentry保存的是父节点的dentry结构,而nd->last保存的是最后一个节点的名字,nd->last_type保存的是最后一个节点的类型;这里确保last_type是LAST_NORM,且last节点名必须以/0结尾 goto exit; dir = nd->dentry; down(&dir->d_inode->i_sem); dentry = lookup_hash(&nd->last, nd->dentry);//寻找最后一个节点的dentry结构 do_last: error = PTR_ERR(dentry); if (IS_ERR(dentry)) { up(&dir->d_inode->i_sem); goto exit; } /* Negative dentry, just create the file */ if (!dentry->d_inode) {//我们假设最后一个节点存在,也就是inode结构存在 error = vfs_create(dir->d_inode, dentry, mode); up(&dir->d_inode->i_sem); dput(nd->dentry); nd->dentry = dentry; if (error) goto exit; /* Don‘t check for write permission, don‘t truncate */ acc_mode = 0; flag &= ~O_TRUNC; goto ok; } /* * It already exists. */ up(&dir->d_inode->i_sem); error = -EEXIST; if (flag & O_EXCL) goto exit_dput; if (d_mountpoint(dentry)) {//是否是挂载点 error = -ELOOP; if (flag & O_NOFOLLOW) goto exit_dput; do __follow_down(&nd->mnt,&dentry); while(d_mountpoint(dentry)); } error = -ENOENT; if (!dentry->d_inode) goto exit_dput; if (dentry->d_inode->i_op && dentry->d_inode->i_op->follow_link) goto do_link; dput(nd->dentry); nd->dentry = dentry;//最后一个节点的dentry结构保存在nd->dentry中 error = -EISDIR; if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode)) goto exit; ok: error = -ENOENT;//往下暂不关心 inode = dentry->d_inode; if (!inode) goto exit; error = -ELOOP; if (S_ISLNK(inode->i_mode)) goto exit; error = -EISDIR; if (S_ISDIR(inode->i_mode) && (flag & FMODE_WRITE)) goto exit; error = permission(inode,acc_mode); if (error) goto exit; /* * FIFO‘s, sockets and device files are special: they don‘t * actually live on the filesystem itself, and as such you * can write to them even if the filesystem is read-only. */ if (S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) { flag &= ~O_TRUNC; } else if (S_ISBLK(inode->i_mode) || S_ISCHR(inode->i_mode)) { error = -EACCES; if (IS_NODEV(inode)) goto exit; flag &= ~O_TRUNC; } else { error = -EROFS; if (IS_RDONLY(inode) && (flag & 2)) goto exit; } /* * An append-only file must be opened in append mode for writing. */ error = -EPERM; if (IS_APPEND(inode)) { if ((flag & FMODE_WRITE) && !(flag & O_APPEND)) goto exit; if (flag & O_TRUNC) goto exit; } /* * Ensure there are no outstanding leases on the file. */ error = get_lease(inode, flag); if (error) goto exit; if (flag & O_TRUNC) { error = get_write_access(inode); if (error) goto exit; /* * Refuse to truncate files with mandatory locks held on them. */ error = locks_verify_locked(inode); if (!error) { DQUOT_INIT(inode); error = do_truncate(dentry, 0); } put_write_access(inode); if (error) goto exit; } else if (flag & FMODE_WRITE) DQUOT_INIT(inode); return 0; exit_dput: dput(dentry); exit: path_release(nd); return error; do_link: error = -ELOOP; if (flag & O_NOFOLLOW) goto exit_dput; /* * This is subtle. Instead of calling do_follow_link() we do the * thing by hands. The reason is that this way we have zero link_count * and path_walk() (called from ->follow_link) honoring LOOKUP_PARENT. * After that we have the parent and last component, i.e. * we are in the same situation as after the first path_walk(). * Well, almost - if the last component is normal we get its copy * stored in nd->last.name and we will have to putname() it when we * are done. Procfs-like symlinks just set LAST_BIND. */ UPDATE_ATIME(dentry->d_inode); error = dentry->d_inode->i_op->follow_link(dentry, nd); dput(dentry); if (error) return error; if (nd->last_type == LAST_BIND) { dentry = nd->dentry; goto ok; } error = -EISDIR; if (nd->last_type != LAST_NORM) goto exit; if (nd->last.name[nd->last.len]) { putname(nd->last.name); goto exit; } if (count++==32) { dentry = nd->dentry; putname(nd->last.name); goto ok; } dir = nd->dentry; down(&dir->d_inode->i_sem); dentry = lookup_hash(&nd->last, nd->dentry); putname(nd->last.name); goto do_last; }
返回file_open,继续执行dentry_open,来填充file结构,代码如下:
struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags) { struct file * f; struct inode *inode; int error; error = -ENFILE; f = get_empty_filp();//分配一个空闲的file数据结构 if (!f) goto cleanup_dentry; f->f_flags = flags; f->f_mode = (flags+1) & O_ACCMODE; inode = dentry->d_inode; if (f->f_mode & FMODE_WRITE) { error = get_write_access(inode); if (error) goto cleanup_file; } f->f_dentry = dentry;//该节点的dentry结构 f->f_vfsmnt = mnt;//该节点的vfsmount结构 f->f_pos = 0; f->f_reada = 0; f->f_op = fops_get(inode->i_fop);//f->f_op被赋值为inode_i_fop if (inode->i_sb) file_move(f, &inode->i_sb->s_files);//将其从中间队列脱链而挂入该文件所在设备的super_block结构中的file结构队列s_files if (f->f_op && f->f_op->open) { error = f->f_op->open(inode,f); if (error) goto cleanup_all; } f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC); return f; cleanup_all: fops_put(f->f_op); if (f->f_mode & FMODE_WRITE) put_write_access(inode); f->f_dentry = NULL; f->f_vfsmnt = NULL; cleanup_file: put_filp(f); cleanup_dentry: dput(dentry); mntput(mnt); return ERR_PTR(error); }get_empty_filp,分配一个空闲的file数据结构。内核中有一个空闲file结构的队列free_list,需要file结构时就从该队列中摘下一个,并将其暂时挂入一个中间队列anon_list。在确认了对该文件可以进行写操作以后,就对这个空闲file结构进行初始化。然后通过file_move()将其从中间队列脱链而挂入该文件所在设备的super_block结构中的file结构队列s_files。
struct file * get_empty_filp(void) { static int old_max = 0; struct file * f; file_list_lock(); if (files_stat.nr_free_files > NR_RESERVED_FILES) { used_one: f = list_entry(free_list.next, struct file, f_list); list_del(&f->f_list);//内核中有一个空闲file结构的队列free_list,需要file结构时就从该队列中摘下一个 files_stat.nr_free_files--; new_one: memset(f, 0, sizeof(*f)); atomic_set(&f->f_count,1); f->f_version = ++event; f->f_uid = current->fsuid; f->f_gid = current->fsgid; list_add(&f->f_list, &anon_list);//并将其暂时挂入一个中间队列anon_list file_list_unlock(); return f; } /* * Use a reserved one if we‘re the superuser */ if (files_stat.nr_free_files && !current->euid) goto used_one; /* * Allocate a new one if we‘re below the limit. */ if (files_stat.nr_files < files_stat.max_files) { file_list_unlock(); f = kmem_cache_alloc(filp_cachep, SLAB_KERNEL); file_list_lock(); if (f) { files_stat.nr_files++; goto new_one; } /* Big problems... */ printk("VFS: filp allocation failed\n"); } else if (files_stat.max_files > old_max) { printk("VFS: file-max limit %d reached\n", files_stat.max_files); old_max = files_stat.max_files; } file_list_unlock(); return NULL; }至此,filp_open分析完成,返回到sys_open,执行fd_install,将新建的file数据结构的指针"安装"到当前进程的file_struct结构中,代码如下:
static inline void fd_install(unsigned int fd, struct file * file) { struct files_struct *files = current->files; write_lock(&files->file_lock); if (files->fd[fd]) BUG(); files->fd[fd] = file; write_unlock(&files->file_lock); }
郑重声明:本站内容如果来自互联网及其他传播媒体,其版权均属原媒体及文章作者所有。转载目的在于传递更多信息及用于网络分享,并不代表本站赞同其观点和对其真实性负责,也不构成任何其他建议。