Linux内核源代码情景分析-文件的打开

浏览数：26 / 时间：2015年06月20日

打开文件的系统调用是open()，在内核中通过sys_open()实现，假设filename是"/usr/local/hello.c"，且假设这个文件已经存在，代码如下：

asmlinkage long sys_open(const char * filename, int flags, int mode)
{
	char * tmp;
	int fd, error;

#if BITS_PER_LONG != 32
	flags |= O_LARGEFILE;
#endif
	tmp = getname(filename);//从用户空间把文件的路径名拷贝到系统空间
	fd = PTR_ERR(tmp);
	if (!IS_ERR(tmp)) {
		fd = get_unused_fd();//从当前进程的"打开文件表"中找到一个空闲的表项，该表项的下标即为"打开文件号"
		if (fd >= 0) {
			struct file *f = filp_open(tmp, flags, mode);//获得一个关联文件的file结构
			error = PTR_ERR(f);
			if (IS_ERR(f))
				goto out_error;
			fd_install(fd, f);//将新建的file数据结构的指针"安装"到当前进程的file_struct结构中
		}
out:
		putname(tmp);
	}
	return fd;//最后返回文件号

out_error:
	put_unused_fd(fd);
	fd = error;
	goto out;
}

get_unused_fd，从当前进程的"打开文件表"中找到一个空闲的表项，该表项的下标即为"打开文件号"，代码如下：

int get_unused_fd(void)
{
	struct files_struct * files = current->files;
	int fd, error;

  	error = -EMFILE;
	write_lock(&files->file_lock);

repeat:
 	fd = find_next_zero_bit(files->open_fds, 
				files->max_fdset, 
				files->next_fd);//在open_fds中，找到空闲打开文件号

	/*
	 * N.B. For clone tasks sharing a files structure, this test
	 * will limit the total number of files that can be opened.
	 */
	if (fd >= current->rlim[RLIMIT_NOFILE].rlim_cur)
		goto out;

	/* Do we need to expand the fdset array? */
	if (fd >= files->max_fdset) {//如果位图容量不够，则扩展
		error = expand_fdset(files, fd);
		if (!error) {
			error = -EMFILE;
			goto repeat;
		}
		goto out;
	}
	
	/* 
	 * Check whether we need to expand the fd array.
	 */
	if (fd >= files->max_fds) {//如果file结构指针数组的容量不够，则扩展
		error = expand_fd_array(files, fd);
		if (!error) {
			error = -EMFILE;
			goto repeat;
		}
		goto out;
	}

	FD_SET(fd, files->open_fds);//置位，下次就找不到了
	FD_CLR(fd, files->close_on_exec);
	files->next_fd = fd + 1;//下一个打开文件号加1 
#if 1
	/* Sanity check */
	if (files->fd[fd] != NULL) {
		printk("get_unused_fd: slot %d not NULL!\n", fd);
		files->fd[fd] = NULL;
	}
#endif
	error = fd;

out:
	write_unlock(&files->file_lock);
	return error;
}

struct files_struct {
	atomic_t count;
	rwlock_t file_lock;
	int max_fds; //当前file结构指针数组的容量
	int max_fdset;//位图的容量
	int next_fd; //下个打开文件号
	struct file ** fd;	//指向了fd_array
	fd_set *close_on_exec; //指向了close_on_exec_init
	fd_set *open_fds;      //指向了open_fds_init
	fd_set close_on_exec_init;
	fd_set open_fds_init;
	struct file * fd_array[NR_OPEN_DEFAULT];
};

获得了打开文件号以后，filp_open来获得一个file结构，首先列出file结构如下：

struct file {
	struct list_head	f_list;
	struct dentry		*f_dentry;//指向文件的dentry结构的指针f_dentry
	struct vfsmount         *f_vfsmnt;//指向将文件所在设备安装在文件系统中的vfsmnt结构的指针
	struct file_operations	*f_op;
	atomic_t		f_count;
	unsigned int 		f_flags;
	mode_t			f_mode;
	loff_t			f_pos;//当前的读写位置
	unsigned long 		f_reada, f_ramax, f_raend, f_ralen, f_rawin;
	struct fown_struct	f_owner;
	unsigned int		f_uid, f_gid;
	int			f_error;

	unsigned long		f_version;

	/* needed for tty driver, and maybe others */
	void			*private_data;
};

file_open，代码如下：

struct file *filp_open(const char * filename, int flags, int mode)
{
	int namei_flags, error;
	struct nameidata nd;

	namei_flags = flags;
	if ((namei_flags+1) & O_ACCMODE)
		namei_flags++;
	if (namei_flags & O_TRUNC)
		namei_flags |= 2;

	error = open_namei(filename, namei_flags, mode, &nd);//获得nd->dentry结构
	if (!error)
		return dentry_open(nd.dentry, nd.mnt, flags);//根据nd->dentry结构填充file结构

	return ERR_PTR(error);
}

int open_namei(const char * pathname, int flag, int mode, struct nameidata *nd)
{
	int acc_mode, error = 0;
	struct inode *inode;
	struct dentry *dentry;
	struct dentry *dir;
	int count = 0;

	acc_mode = ACC_MODE(flag);

	/*
	 * The simplest case - just a plain lookup.
	 */
	if (!(flag & O_CREAT)) {//假设flag为O_CREATE,如果文件不存在就创建
		if (path_init(pathname, lookup_flags(flag), nd))
			error = path_walk(pathname, nd);
		if (error)
			return error;
		dentry = nd->dentry;
		goto ok;
	}

	/*
	 * Create - we need to know the parent.
	 */
	if (path_init(pathname, LOOKUP_PARENT, nd))
		error = path_walk(pathname, nd);//找到父节点
	if (error)
		return error;

	/*
	 * We have the parent and last component. First of all, check
	 * that we are not asked to creat(2) an obvious directory - that
	 * will not do.
	 */
	error = -EISDIR;
	if (nd->last_type != LAST_NORM || nd->last.name[nd->last.len])//虽然nd->dentry保存的是父节点的dentry结构，而nd->last保存的是最后一个节点的名字，nd->last_type保存的是最后一个节点的类型；这里确保last_type是LAST_NORM，且last节点名必须以/0结尾
		goto exit;

	dir = nd->dentry;
	down(&dir->d_inode->i_sem);
	dentry = lookup_hash(&nd->last, nd->dentry);//寻找最后一个节点的dentry结构

do_last:
	error = PTR_ERR(dentry);
	if (IS_ERR(dentry)) {
		up(&dir->d_inode->i_sem);
		goto exit;
	}

	/* Negative dentry, just create the file */
	if (!dentry->d_inode) {//我们假设最后一个节点存在，也就是inode结构存在
		error = vfs_create(dir->d_inode, dentry, mode);
		up(&dir->d_inode->i_sem);
		dput(nd->dentry);
		nd->dentry = dentry;
		if (error)
			goto exit;
		/* Don‘t check for write permission, don‘t truncate */
		acc_mode = 0;
		flag &= ~O_TRUNC;
		goto ok;
	}

	/*
	 * It already exists.
	 */
	up(&dir->d_inode->i_sem);

	error = -EEXIST;
	if (flag & O_EXCL)
		goto exit_dput;

	if (d_mountpoint(dentry)) {//是否是挂载点
		error = -ELOOP;
		if (flag & O_NOFOLLOW)
			goto exit_dput;
		do __follow_down(&nd->mnt,&dentry); while(d_mountpoint(dentry));
	}
	error = -ENOENT;
	if (!dentry->d_inode)
		goto exit_dput;
	if (dentry->d_inode->i_op && dentry->d_inode->i_op->follow_link)
		goto do_link;

	dput(nd->dentry);
	nd->dentry = dentry;//最后一个节点的dentry结构保存在nd->dentry中
	error = -EISDIR;
	if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode))
		goto exit;
ok:
	error = -ENOENT;//往下暂不关心
	inode = dentry->d_inode;
	if (!inode)
		goto exit;

	error = -ELOOP;
	if (S_ISLNK(inode->i_mode))
		goto exit;
	
	error = -EISDIR;
	if (S_ISDIR(inode->i_mode) && (flag & FMODE_WRITE))
		goto exit;

	error = permission(inode,acc_mode);
	if (error)
		goto exit;

	/*
	 * FIFO‘s, sockets and device files are special: they don‘t
	 * actually live on the filesystem itself, and as such you
	 * can write to them even if the filesystem is read-only.
	 */
	if (S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
	    	flag &= ~O_TRUNC;
	} else if (S_ISBLK(inode->i_mode) || S_ISCHR(inode->i_mode)) {
		error = -EACCES;
		if (IS_NODEV(inode))
			goto exit;

		flag &= ~O_TRUNC;
	} else {
		error = -EROFS;
		if (IS_RDONLY(inode) && (flag & 2))
			goto exit;
	}
	/*
	 * An append-only file must be opened in append mode for writing.
	 */
	error = -EPERM;
	if (IS_APPEND(inode)) {
		if  ((flag & FMODE_WRITE) && !(flag & O_APPEND))
			goto exit;
		if (flag & O_TRUNC)
			goto exit;
	}

	/*
	 * Ensure there are no outstanding leases on the file.
	 */
	error = get_lease(inode, flag);
	if (error)
		goto exit;

	if (flag & O_TRUNC) {
		error = get_write_access(inode);
		if (error)
			goto exit;

		/*
		 * Refuse to truncate files with mandatory locks held on them.
		 */
		error = locks_verify_locked(inode);
		if (!error) {
			DQUOT_INIT(inode);
			
			error = do_truncate(dentry, 0);
		}
		put_write_access(inode);
		if (error)
			goto exit;
	} else
		if (flag & FMODE_WRITE)
			DQUOT_INIT(inode);

	return 0;

exit_dput:
	dput(dentry);
exit:
	path_release(nd);
	return error;

do_link:
	error = -ELOOP;
	if (flag & O_NOFOLLOW)
		goto exit_dput;
	/*
	 * This is subtle. Instead of calling do_follow_link() we do the
	 * thing by hands. The reason is that this way we have zero link_count
	 * and path_walk() (called from ->follow_link) honoring LOOKUP_PARENT.
	 * After that we have the parent and last component, i.e.
	 * we are in the same situation as after the first path_walk().
	 * Well, almost - if the last component is normal we get its copy
	 * stored in nd->last.name and we will have to putname() it when we
	 * are done. Procfs-like symlinks just set LAST_BIND.
	 */
	UPDATE_ATIME(dentry->d_inode);
	error = dentry->d_inode->i_op->follow_link(dentry, nd);
	dput(dentry);
	if (error)
		return error;
	if (nd->last_type == LAST_BIND) {
		dentry = nd->dentry;
		goto ok;
	}
	error = -EISDIR;
	if (nd->last_type != LAST_NORM)
		goto exit;
	if (nd->last.name[nd->last.len]) {
		putname(nd->last.name);
		goto exit;
	}
	if (count++==32) {
		dentry = nd->dentry;
		putname(nd->last.name);
		goto ok;
	}
	dir = nd->dentry;
	down(&dir->d_inode->i_sem);
	dentry = lookup_hash(&nd->last, nd->dentry);
	putname(nd->last.name);
	goto do_last;
}

返回file_open，继续执行dentry_open，来填充file结构，代码如下：

struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags)
{
	struct file * f;
	struct inode *inode;
	int error;

	error = -ENFILE;
	f = get_empty_filp();//分配一个空闲的file数据结构
	if (!f)
		goto cleanup_dentry;
	f->f_flags = flags;
	f->f_mode = (flags+1) & O_ACCMODE;
	inode = dentry->d_inode;
	if (f->f_mode & FMODE_WRITE) {
		error = get_write_access(inode);
		if (error)
			goto cleanup_file;
	}

	f->f_dentry = dentry;//该节点的dentry结构
	f->f_vfsmnt = mnt;//该节点的vfsmount结构
	f->f_pos = 0;
	f->f_reada = 0;
	f->f_op = fops_get(inode->i_fop);//f->f_op被赋值为inode_i_fop
	if (inode->i_sb)
		file_move(f, &inode->i_sb->s_files);//将其从中间队列脱链而挂入该文件所在设备的super_block结构中的file结构队列s_files
	if (f->f_op && f->f_op->open) {
		error = f->f_op->open(inode,f);
		if (error)
			goto cleanup_all;
	}
	f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);

	return f;

cleanup_all:
	fops_put(f->f_op);
	if (f->f_mode & FMODE_WRITE)
		put_write_access(inode);
	f->f_dentry = NULL;
	f->f_vfsmnt = NULL;
cleanup_file:
	put_filp(f);
cleanup_dentry:
	dput(dentry);
	mntput(mnt);
	return ERR_PTR(error);
}

get_empty_filp，分配一个空闲的file数据结构。内核中有一个空闲file结构的队列free_list，需要file结构时就从该队列中摘下一个，并将其暂时挂入一个中间队列anon_list。在确认了对该文件可以进行写操作以后，就对这个空闲file结构进行初始化。然后通过file_move()将其从中间队列脱链而挂入该文件所在设备的super_block结构中的file结构队列s_files。

struct file * get_empty_filp(void)
{
	static int old_max = 0;
	struct file * f;

	file_list_lock();
	if (files_stat.nr_free_files > NR_RESERVED_FILES) {
	used_one:
		f = list_entry(free_list.next, struct file, f_list);
		list_del(&f->f_list);//内核中有一个空闲file结构的队列free_list，需要file结构时就从该队列中摘下一个
		files_stat.nr_free_files--;
	new_one:
		memset(f, 0, sizeof(*f));
		atomic_set(&f->f_count,1);
		f->f_version = ++event;
		f->f_uid = current->fsuid;
		f->f_gid = current->fsgid;
		list_add(&f->f_list, &anon_list);//并将其暂时挂入一个中间队列anon_list
		file_list_unlock();
		return f;
	}
	/*
	 * Use a reserved one if we‘re the superuser
	 */
	if (files_stat.nr_free_files && !current->euid)
		goto used_one;
	/*
	 * Allocate a new one if we‘re below the limit.
	 */
	if (files_stat.nr_files < files_stat.max_files) {
		file_list_unlock();
		f = kmem_cache_alloc(filp_cachep, SLAB_KERNEL);
		file_list_lock();
		if (f) {
			files_stat.nr_files++;
			goto new_one;
		}
		/* Big problems... */
		printk("VFS: filp allocation failed\n");

	} else if (files_stat.max_files > old_max) {
		printk("VFS: file-max limit %d reached\n", files_stat.max_files);
		old_max = files_stat.max_files;
	}
	file_list_unlock();
	return NULL;
}

至此，filp_open分析完成，返回到sys_open，执行fd_install，将新建的file数据结构的指针"安装"到当前进程的file_struct结构中，代码如下：

static inline void fd_install(unsigned int fd, struct file * file)
{
	struct files_struct *files = current->files;
	
	write_lock(&files->file_lock);
	if (files->fd[fd])
		BUG();
	files->fd[fd] = file;
	write_unlock(&files->file_lock);
}

郑重声明：本站内容如果来自互联网及其他传播媒体，其版权均属原媒体及文章作者所有。转载目的在于传递更多信息及用于网络分享，并不代表本站赞同其观点和对其真实性负责，也不构成任何其他建议。

Linux内核源代码情景分析-文件的打开