Linux内核源代码情景分析-特殊文件系统/proc-对/proc/self/cwd的访问

    继上篇文章Linux内核源代码情景分析-特殊文件系统/proc,我们对/proc/loadavg访问后,这篇文章是对/proc/self/cwd的访问。

int __user_walk(const char *name, unsigned flags, struct nameidata *nd)
{
	char *tmp;
	int err;

	tmp = getname(name);//在系统空间分配一个页面,并从用户空间把文件名复制到这个页面
	err = PTR_ERR(tmp);
	if (!IS_ERR(tmp)) {
		err = 0;
		if (path_init(tmp, flags, nd))
			err = path_walk(tmp, nd);
		putname(tmp);
	}
	return err;
}
    name就为/proc/self/cwd,重要分析下path_walk函数,请参考Linux内核源代码情景分析-从路径名到目标节点

    第一次循环path_walk发现/proc是个安装节点而通过_follow_down找到了proc文件系统的根节点的dentry结构,nameidata结构中的指针dentry指向这个数据结构。、

    第二次循环搜索路径名中的下一个节点self,由于这个节点并不是路径名的最后一个节点,所以执行的代码如下:

dentry = cached_lookup(nd->dentry, &this, LOOKUP_CONTINUE);//在内存中寻找该节点业已建立的dentry结构  
        if (!dentry) {//如果没有找到  
            dentry = real_lookup(nd->dentry, &this, LOOKUP_CONTINUE);//那么就要建立该节点的dentry结构  
            err = PTR_ERR(dentry);  
            if (IS_ERR(dentry))  
                break;  
        }  
    参考Linux内核源代码情景分析-特殊文件系统/proc,最终也要通过proc_root_lookup()调用proc_lookup(),试图为节点建立起其dentry结构和inode结构。可是由于/proc/self并没有一个固定的proc_dir_entry结构,所以对proc_lookup()的调用必然会失败,因而会进一步调用proc_pid_lookup(),代码如下:

static struct dentry *proc_root_lookup(struct inode * dir, struct dentry * dentry)
{
	if (dir->i_ino == PROC_ROOT_INO) { /* check for safety... */
		int nlink = proc_root.nlink;

		nlink += nr_threads;

		dir->i_nlink = nlink;
	}

	if (!proc_lookup(dir, dentry))///由于/proc/self并没有一个固定的proc_dir_entry结构,所以对proc_lookup()的调用必然会失败
		return NULL;
	
	return proc_pid_lookup(dir, dentry);//会调用这个函数
}
struct dentry *proc_lookup(struct inode * dir, struct dentry *dentry)
{
	struct inode *inode;
	struct proc_dir_entry * de;
	int error;

	error = -ENOENT;
	inode = NULL;
	de = (struct proc_dir_entry *) dir->u.generic_ip;
	if (de) {//找不到/proc/self节点
		for (de = de->subdir; de ; de = de->next) {
			if (!de || !de->low_ino)
				continue;
			if (de->namelen != dentry->d_name.len)
				continue;
			if (!memcmp(dentry->d_name.name, de->name, de->namelen)) {
				int ino = de->low_ino;
				error = -EINVAL;
				inode = proc_get_inode(dir->i_sb, ino, de);
				break;
			}
		}
	}

	if (inode) {
		dentry->d_op = &proc_dentry_operations;
		d_add(dentry, inode);
		return NULL;
	}
	return ERR_PTR(error);//返回错误码
}

struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry)
{
	unsigned int pid, c;
	struct task_struct *task;
	const char *name;
	struct inode *inode;
	int len;

	pid = 0;
	name = dentry->d_name.name;
	len = dentry->d_name.len;
	if (len == 4 && !memcmp(name, "self", 4)) {//执行这里,name等于self
		inode = new_inode(dir->i_sb);
		if (!inode)
			return ERR_PTR(-ENOMEM);
		inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
		inode->i_ino = fake_ino(0, PROC_PID_INO);
		inode->u.proc_i.file = NULL;
		inode->u.proc_i.task = NULL;
		inode->i_mode = S_IFLNK|S_IRWXUGO;
		inode->i_uid = inode->i_gid = 0;
		inode->i_size = 64;
		inode->i_op = &proc_self_inode_operations;
		d_add(dentry, inode);
		return NULL;//返回了
	}
	while (len-- > 0) {
		c = *name - ‘0‘;
		name++;
		if (c > 9)
			goto out;
		if (pid >= MAX_MULBY10)
			goto out;
		pid *= 10;
		pid += c;
		if (!pid)
			goto out;
	}

	read_lock(&tasklist_lock);
	task = find_task_by_pid(pid);
	if (task)
		get_task_struct(task);
	read_unlock(&tasklist_lock);
	if (!task)
		goto out;

	inode = proc_pid_make_inode(dir->i_sb, task, PROC_PID_INO);

	free_task_struct(task);

	if (!inode)
		goto out;
	inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO;
	inode->i_op = &proc_base_inode_operations;
	inode->i_fop = &proc_base_operations;
	inode->i_nlink = 3;
	inode->i_flags|=S_IMMUTABLE;

	dentry->d_op = &pid_base_dentry_operations;
	d_add(dentry, inode);
	return NULL;
out:
	return ERR_PTR(-ENOENT);
}
    其中proc_self_inode_operations结构定义如下:

static struct inode_operations proc_self_inode_operations = {
	readlink:	proc_self_readlink,
	follow_link:	proc_self_follow_link,
};
    还是第二轮循环,从proc_root_lookup返回到path_walk中以后,接着要检查和处理两件事,第一件是新找到的节点是否为安装点;第二件就是它是否是一个连接节点。这正是我们在这里所关心的,因为/proc/self就是个连接节点。继续看path_walk,代码如下:

if (inode->i_op->follow_link) {//看看这个指针是否为NULL,这个指针是在ext2_read_inode中设置的  
            err = do_follow_link(dentry, nd); 
static inline int do_follow_link(struct dentry *dentry, struct nameidata *nd)//参数dentry为proc/self节点的dentry结构
{
	int err;
	if (current->link_count >= 8)
		goto loop;
	current->link_count++;
	UPDATE_ATIME(dentry->d_inode);
	err = dentry->d_inode->i_op->follow_link(dentry, nd);//proc_self_follow_link
	current->link_count--;
	return err;
loop:
	path_release(nd);
	return -ELOOP;
}
    entry->d_inode->i_op->follow_link指向proc_self_follow_link,代码如下:

static int proc_self_follow_link(struct dentry *dentry, struct nameidata *nd)
{
	char tmp[30];
	sprintf(tmp, "%d", current->pid);
	return vfs_follow_link(nd,tmp);
}	
int vfs_follow_link(struct nameidata *nd, const char *link)
{
	return __vfs_follow_link(nd, link);
}
static inline int
__vfs_follow_link(struct nameidata *nd, const char *link)
{
	int res = 0;
	char *name;
	if (IS_ERR(link))
		goto fail;

	if (*link == ‘/‘) {
		path_release(nd);
		if (!walk_init_root(link, nd))
			/* weird __emul_prefix() stuff did it */
			goto out;
	}
	res = path_walk(link, nd);
out:
	if (current->link_count || res || nd->last_type!=LAST_NORM)
		return res;
	/*
	 * If it is an iterative symlinks resolution in open_namei() we
	 * have to copy the last component. And all that crap because of
	 * bloody create() on broken symlinks. Furrfu...
	 */
	name = __getname();
	if (IS_ERR(name))
		goto fail_name;
	strcpy(name, nd->last.name);
	nd->last.name = name;
	return 0;
fail_name:
	link = name;
fail:
	path_release(nd);
	return PTR_ERR(link);
}
    在__vfs_follow_link()中会调用path_walk()来寻找连接的目标节点,所以又会调用其父节点/proc的lookup函数,即proc_root_lookup(),不同的只是这次寻找的不是"self",而是当前进程的pid字符串。

struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry)
{
	unsigned int pid, c;
	struct task_struct *task;
	const char *name;
	struct inode *inode;
	int len;

	pid = 0;
	name = dentry->d_name.name;
	len = dentry->d_name.len;
	if (len == 4 && !memcmp(name, "self", 4)) {//不执行这里,name不等于self
		inode = new_inode(dir->i_sb);
		if (!inode)
			return ERR_PTR(-ENOMEM);
		inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
		inode->i_ino = fake_ino(0, PROC_PID_INO);
		inode->u.proc_i.file = NULL;
		inode->u.proc_i.task = NULL;
		inode->i_mode = S_IFLNK|S_IRWXUGO;
		inode->i_uid = inode->i_gid = 0;
		inode->i_size = 64;
		inode->i_op = &proc_self_inode_operations;
		d_add(dentry, inode);
		return NULL;//返回了
	}
	while (len-- > 0) {//执行这里
		c = *name - ‘0‘;
		name++;
		if (c > 9)
			goto out;
		if (pid >= MAX_MULBY10)
			goto out;
		pid *= 10;
		pid += c;
		if (!pid)
			goto out;
	}

	read_lock(&tasklist_lock);
	task = find_task_by_pid(pid);
	if (task)
		get_task_struct(task);
	read_unlock(&tasklist_lock);
	if (!task)
		goto out;

	inode = proc_pid_make_inode(dir->i_sb, task, PROC_PID_INO);

	free_task_struct(task);

	if (!inode)
		goto out;
	inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO;
	inode->i_op = &proc_base_inode_operations;//注意这个指针,一会会用到
	inode->i_fop = &proc_base_operations;
	inode->i_nlink = 3;
	inode->i_flags|=S_IMMUTABLE;

	dentry->d_op = &pid_base_dentry_operations;
	d_add(dentry, inode);
	return NULL;
out:
	return ERR_PTR(-ENOENT);
}

static struct inode_operations proc_base_inode_operations = {
	lookup:		proc_base_lookup,
};

    proc_pid_make_inode,为进程创建一个inode结构

static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task, int ino)
{
	struct inode * inode;

	/* We need a new inode */
	
	inode = new_inode(sb);
	if (!inode)
		goto out;

	/* Common stuff */

	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
	inode->i_ino = fake_ino(task->pid, ino);

	inode->u.proc_i.file = NULL;
	/*
	 * grab the reference to task.
	 */
	inode->u.proc_i.task = task;//当前进程的task赋值到这里
	get_task_struct(task);
	if (!task->p_pptr)
		goto out_unlock;

	inode->i_uid = 0;
	inode->i_gid = 0;
	if (ino == PROC_PID_INO || task->dumpable) {
		inode->i_uid = task->euid;
		inode->i_gid = task->egid;
	}

out:
	return inode;

out_unlock:
	iput(inode);
	return NULL;
}

    从path_walk返回后,nd->dentry已指向代表着当前进程的目录节点的dentry结构,之后层层返回到proc_self_follow_link,最后返回到主path_walk的第二次循环中,开始执行第三次循环。


    第三次循环,最后一个节点是"cwd",这一次所搜索的节点已经是路径名中的最后一个节点,所以转到last_component的地方,同样也是在real_lookup()中通过父节点的inode_operations结构中的lookup函数指针执行实际的操作,也就是proc_base_lookup,代码如下:

static struct dentry *proc_base_lookup(struct inode *dir, struct dentry *dentry)
{
	struct inode *inode;
	int error;
	struct task_struct *task = dir->u.proc_i.task;//取出当前进程的task
	struct pid_entry *p;

	error = -ENOENT;
	inode = NULL;

	for (p = base_stuff; p->name; p++) {//在base_stuff中找到cwd,关于base_stuff如下面所示
		if (p->len != dentry->d_name.len)
			continue;
		if (!memcmp(dentry->d_name.name, p->name, p->len))
			break;
	}
	if (!p->name)
		goto out;

	error = -EINVAL;
	inode = proc_pid_make_inode(dir->i_sb, task, p->type);//p->type为5
	if (!inode)
		goto out;

	inode->i_mode = p->mode;
	/*
	 * Yes, it does not scale. And it should not. Don‘t add
	 * new entries into /proc/<pid>/ without very good reasons.
	 */
	switch(p->type) {
		case PROC_PID_FD:
			inode->i_nlink = 2;
			inode->i_op = &proc_fd_inode_operations;
			inode->i_fop = &proc_fd_operations;
			break;
		case PROC_PID_EXE:
			inode->i_op = &proc_pid_link_inode_operations;
			inode->u.proc_i.op.proc_get_link = proc_exe_link;
			break;
		case PROC_PID_CWD:
			inode->i_op = &proc_pid_link_inode_operations;//两者很重要
			inode->u.proc_i.op.proc_get_link = proc_cwd_link;//两者很重要,inode->u.proc_i指向了proc_inode_info,结构如下面所示
			break;
		case PROC_PID_ROOT:
			inode->i_op = &proc_pid_link_inode_operations;
			inode->u.proc_i.op.proc_get_link = proc_root_link;
			break;
		case PROC_PID_ENVIRON:
			inode->i_fop = &proc_info_file_operations;
			inode->u.proc_i.op.proc_read = proc_pid_environ;
			break;
		case PROC_PID_STATUS:
			inode->i_fop = &proc_info_file_operations;
			inode->u.proc_i.op.proc_read = proc_pid_status;
			break;
		case PROC_PID_STAT:
			inode->i_fop = &proc_info_file_operations;
			inode->u.proc_i.op.proc_read = proc_pid_stat;
			break;
		case PROC_PID_CMDLINE:
			inode->i_fop = &proc_info_file_operations;
			inode->u.proc_i.op.proc_read = proc_pid_cmdline;
			break;
		case PROC_PID_STATM:
			inode->i_fop = &proc_info_file_operations;
			inode->u.proc_i.op.proc_read = proc_pid_statm;
			break;
		case PROC_PID_MAPS:
			inode->i_fop = &proc_maps_operations;
			break;
#ifdef CONFIG_SMP
		case PROC_PID_CPU:
			inode->i_fop = &proc_info_file_operations;
			inode->u.proc_i.op.proc_read = proc_pid_cpu;
			break;
#endif
		case PROC_PID_MEM:
			inode->i_op = &proc_mem_inode_operations;
			inode->i_fop = &proc_mem_operations;
			break;
		default:
			printk("procfs: impossible type (%d)",p->type);
			iput(inode);
			return ERR_PTR(-EINVAL);
	}
	dentry->d_op = &pid_dentry_operations;
	d_add(dentry, inode);
	return NULL;

out:
	return ERR_PTR(error);
}
struct pid_entry {
	int type;
	int len;
	char *name;
	mode_t mode;
};

enum pid_directory_inos {
	PROC_PID_INO = 2,
	PROC_PID_STATUS,
	PROC_PID_MEM,
	PROC_PID_CWD,
	PROC_PID_ROOT,
	PROC_PID_EXE,
	PROC_PID_FD,
	PROC_PID_ENVIRON,
	PROC_PID_CMDLINE,
	PROC_PID_STAT,
	PROC_PID_STATM,
	PROC_PID_MAPS,
	PROC_PID_CPU,
	PROC_PID_FD_DIR = 0x8000,	/* 0x8000-0xffff */
};

#define E(type,name,mode) {(type),sizeof(name)-1,(name),(mode)}
static struct pid_entry base_stuff[] = {
  E(PROC_PID_FD,	"fd",		S_IFDIR|S_IRUSR|S_IXUSR),
  E(PROC_PID_ENVIRON,	"environ",	S_IFREG|S_IRUSR),
  E(PROC_PID_STATUS,	"status",	S_IFREG|S_IRUGO),
  E(PROC_PID_CMDLINE,	"cmdline",	S_IFREG|S_IRUGO),
  E(PROC_PID_STAT,	"stat",		S_IFREG|S_IRUGO),
  E(PROC_PID_STATM,	"statm",	S_IFREG|S_IRUGO),
#ifdef CONFIG_SMP
  E(PROC_PID_CPU,	"cpu",		S_IFREG|S_IRUGO),
#endif
  E(PROC_PID_MAPS,	"maps",		S_IFREG|S_IRUGO),
  E(PROC_PID_MEM,	"mem",		S_IFREG|S_IRUSR|S_IWUSR),
  E(PROC_PID_CWD,	"cwd",		S_IFLNK|S_IRWXUGO),
  E(PROC_PID_ROOT,	"root",		S_IFLNK|S_IRWXUGO),
  E(PROC_PID_EXE,	"exe",		S_IFLNK|S_IRWXUGO),
  {0,0,NULL,0}
};
#undef E
struct proc_inode_info {
	struct task_struct *task;
	int type;
	union {
		int (*proc_get_link)(struct inode *, struct dentry **, struct vfsmount **);
		int (*proc_read)(struct task_struct *task, char *page);
	} op;
	struct file *file;
};
    从proc_base_lookup()经由real_lookup()返回到path_walk时,返回值dentry已经指向了这个特定"cwd"节点dentry结构。但是接着同样要受到对其Inode结构中的i_op指针以及相应inode_operations结构的指针follow_link的检验,看path_walk的代码:

inode = dentry->d_inode;  
        if ((lookup_flags & LOOKUP_FOLLOW)//和第一次和第二次循环不同,必须LOOKUP_FOLLOW标志位置1  
            && inode && inode->i_op && inode->i_op->follow_link) {  
            err = do_follow_link(dentry, nd);  
static inline int do_follow_link(struct dentry *dentry, struct nameidata *nd)
{
	int err;
	if (current->link_count >= 8)
		goto loop;
	current->link_count++;
	UPDATE_ATIME(dentry->d_inode);
	err = dentry->d_inode->i_op->follow_link(dentry, nd);
	current->link_count--;
	return err;
loop:
	path_release(nd);
	return -ELOOP;
}

    dentry->d_inode->i_op指向了proc_pid_link_inode_operations结构,是在这里设置的:

		case PROC_PID_CWD:
			inode->i_op = &proc_pid_link_inode_operations;//两者很重要
			inode->u.proc_i.op.proc_get_link = proc_cwd_link;//两者很重要,inode->u.proc_i指向了proc_inode_info结构

static struct inode_operations proc_pid_link_inode_operations = {
	readlink:	proc_pid_readlink,
	follow_link:	proc_pid_follow_link
};
     dentry->d_inode->i_op->follow_link(dentry, nd),proc_pid_follow_link(dentry, nd),也就是代码如下:

static int proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd)//参数dentry为"cwd"节点dentry结构
{
	struct inode *inode = dentry->d_inode;
	int error = -EACCES;

	/* We don‘t need a base pointer in the /proc filesystem */
	path_release(nd);

	if (current->fsuid != inode->i_uid && !capable(CAP_DAC_OVERRIDE))
		goto out;
	error = proc_check_root(inode);
	if (error)
		goto out;

	error = inode->u.proc_i.op.proc_get_link(inode, &nd->dentry, &nd->mnt);//也就是proc_cwd_link
	nd->last_type = LAST_BIND;
out:
	return error;
}
    inode->u.proc_i.op.proc_get_link(inode, &nd->dentry, &nd->mnt),也就是proc_cwd_link(inode, &nd->dentry, &nd->mnt),代码如下:

static int proc_cwd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
{
	struct fs_struct *fs;
	int result = -ENOENT;
	task_lock(inode->u.proc_i.task);
	fs = inode->u.proc_i.task->fs;//task指向相应进程的task_struct结构,进而可以得到这个进程的fs_struct结构
	if(fs)
		atomic_inc(&fs->count);
	task_unlock(inode->u.proc_i.task);
	if (fs) {
		read_lock(&fs->lock);
		*mnt = mntget(fs->pwdmnt);//nd->mnt指向了该目录所在设备安装时的vfsmount结构
		*dentry = dget(fs->pwd);//nd->dentry指向了该进程的"当前工作目录"的dentry结构
		read_unlock(&fs->lock);
		result = 0;
		put_fs_struct(fs);
	}
	return result;
}
    当从proc_cwd_link()经由do_follow_link()返回到path_walk()中时,nameidata结构中指针已经指向最终的目标,即当前进程的当前工作目录。

    也就是:

int __user_walk(const char *name, unsigned flags, struct nameidata *nd)
{
	char *tmp;
	int err;

	tmp = getname(name);//在系统空间分配一个页面,并从用户空间把文件名复制到这个页面
	err = PTR_ERR(tmp);
	if (!IS_ERR(tmp)) {
		err = 0;
		if (path_init(tmp, flags, nd))
			err = path_walk(tmp, nd);
		putname(tmp);
	}
	return err;
}
   此时nd->mnt指向了该目录所在设备安装时的vfsmount结构,nd->dentry指向了该进程的"当前工作目录"的dentry结构。

郑重声明:本站内容如果来自互联网及其他传播媒体,其版权均属原媒体及文章作者所有。转载目的在于传递更多信息及用于网络分享,并不代表本站赞同其观点和对其真实性负责,也不构成任何其他建议。