Linux内核源代码情景分析-特殊文件系统/proc
由于proc文件系统并不物理地存在于任何设备上,它的安装过程是特殊的。对proc文件系统不能直接通过mount()来安装,而要先由系统内核在内核初始化时自动地通过一个函数kern_mount()安装一次,然后再由处理系统初始化的进程通过mount()安装,实际上是"重安装"。
一、在内核初始化时调用init_proc_fs(),代码如下:
static DECLARE_FSTYPE(proc_fs_type, "proc", proc_read_super, FS_SINGLE); static int __init init_proc_fs(void) { int err = register_filesystem(&proc_fs_type);//向系统登记"proc"这么一种文件系统 if (!err) { proc_mnt = kern_mount(&proc_fs_type);//将一个具体的proc文件系统安装到系统中的/proc节点上 err = PTR_ERR(proc_mnt); if (IS_ERR(proc_mnt)) unregister_filesystem(&proc_fs_type); else err = 0; } return err; }
#define DECLARE_FSTYPE(var,type,read,flags) struct file_system_type var = { name: type, read_super: read, fs_flags: flags, owner: THIS_MODULE, }register_filesystem,向系统登记"proc"这么一种文件系统,代码如下:
int register_filesystem(struct file_system_type * fs) { int res = 0; struct file_system_type ** p; if (!fs) return -EINVAL; if (fs->next) return -EBUSY; write_lock(&file_systems_lock); p = find_filesystem(fs->name); if (*p) res = -EBUSY; else *p = fs;//向系统登记"proc"这么一种文件系统 write_unlock(&file_systems_lock); return res; }
static struct file_system_type **find_filesystem(const char *name) { struct file_system_type **p; for (p=&file_systems; *p; p=&(*p)->next) if (strcmp((*p)->name,name) == 0) break; return p; }
struct vfsmount *kern_mount(struct file_system_type *type) { kdev_t dev = get_unnamed_dev();//获得一个设备号 struct super_block *sb; struct vfsmount *mnt; if (!dev) return ERR_PTR(-EMFILE); sb = read_super(dev, NULL, type, 0, NULL, 0);//先分配一个空白的super_block数据结构,然后通过由具体文件系统的file_system_type数据结构中的函数指针read_super调用具体的函数来读入超级块 if (!sb) { put_unnamed_dev(dev); return ERR_PTR(-EINVAL); } mnt = add_vfsmnt(NULL, sb->s_root, NULL); if (!mnt) { kill_super(sb, 0); return ERR_PTR(-ENOMEM); } type->kern_mnt = mnt;//最后把根节点vfsmount赋值给type->kern_mnt return mnt; }read_super,先分配一个空白的super_block数据结构,然后通过由具体文件系统的file_system_type数据结构中的函数指针read_super调用具体的函数来读入超级块。
static struct super_block * read_super(kdev_t dev, struct block_device *bdev, struct file_system_type *type, int flags, void *data, int silent) { struct super_block * s; s = get_empty_super(); if (!s) goto out; s->s_dev = dev; s->s_bdev = bdev; s->s_flags = flags; s->s_dirt = 0; sema_init(&s->s_vfs_rename_sem,1); sema_init(&s->s_nfsd_free_path_sem,1); s->s_type = type; sema_init(&s->s_dquot.dqio_sem, 1); sema_init(&s->s_dquot.dqoff_sem, 1); s->s_dquot.flags = 0; lock_super(s); if (!type->read_super(s, data, silent)) goto out_fail; unlock_super(s); /* tell bdcache that we are going to keep this one */ if (bdev) atomic_inc(&bdev->bd_count); out: return s; out_fail: s->s_dev = 0; s->s_bdev = 0; s->s_type = NULL; unlock_super(s); return NULL; }type->read_super对于proc文件系统来说,这个函数为proc_read_super()。代码如下:
struct super_block *proc_read_super(struct super_block *s,void *data, int silent) { struct inode * root_inode; struct task_struct *p; s->s_blocksize = 1024; s->s_blocksize_bits = 10; s->s_magic = PROC_SUPER_MAGIC; s->s_op = &proc_sops; root_inode = proc_get_inode(s, PROC_ROOT_INO, &proc_root);//根据根目录项,得到根节点的inode结构 if (!root_inode) goto out_no_root; /* * Fixup the root inode‘s nlink value */ read_lock(&tasklist_lock); for_each_task(p) if (p->pid) root_inode->i_nlink++; read_unlock(&tasklist_lock); s->s_root = d_alloc_root(root_inode);//分配根节点的dentry结构,并把根节点的inode结构和dentry结构相连,并赋值给s->s_root if (!s->s_root) goto out_no_root; parse_options(data, &root_inode->i_uid, &root_inode->i_gid); return s; out_no_root: printk("proc_read_super: get root inode failed\n"); iput(root_inode); return NULL; }读入超级块,实际上是生成超级块,还有super_block结构中的super_operations指针s_op被设置成指向proc_sops,定义如下:
static struct super_operations proc_sops = { read_inode: proc_read_inode, put_inode: force_delete, delete_inode: proc_delete_inode, statfs: proc_statfs, };不仅如此,proc文件系统中的目录项结构,即dentry结构,在设备上也没有对应物,而以内存中的proc_dir_entry数据结构来代替,定义如下:
struct proc_dir_entry { unsigned short low_ino; unsigned short namelen; const char *name; mode_t mode; nlink_t nlink; uid_t uid; gid_t gid; unsigned long size; struct inode_operations * proc_iops; struct file_operations * proc_fops; get_info_t *get_info; struct module *owner; struct proc_dir_entry *next, *parent, *subdir; void *data; read_proc_t *read_proc; write_proc_t *write_proc; atomic_t count; /* use count */ int deleted; /* delete flag */ kdev_t rdev; }最重要的就是/proc节点的proc_dir_entry结构(目录项)proc_root,定义如下:
struct proc_dir_entry proc_root = { low_ino: PROC_ROOT_INO, namelen: 5, name: "/proc", mode: S_IFDIR | S_IRUGO | S_IXUGO, nlink: 2, proc_iops: &proc_root_inode_operations, proc_fops: &proc_root_operations, parent: &proc_root, };
struct inode * proc_get_inode(struct super_block * sb, int ino, struct proc_dir_entry * de) { struct inode * inode; /* * Increment the use count so the dir entry can‘t disappear. */ de_get(de); #if 1 /* shouldn‘t ever happen */ if (de && de->deleted) printk("proc_iget: using deleted entry %s, count=%d\n", de->name, atomic_read(&de->count)); #endif inode = iget(sb, ino); if (!inode) goto out_fail; inode->u.generic_ip = (void *) de;//根目录项结构放到了这里 if (de) {//根据根目录项结构,填充根节点的inode结构 if (de->mode) { inode->i_mode = de->mode; inode->i_uid = de->uid; inode->i_gid = de->gid; } if (de->size) inode->i_size = de->size; if (de->nlink) inode->i_nlink = de->nlink; if (de->owner) __MOD_INC_USE_COUNT(de->owner); if (S_ISBLK(de->mode)||S_ISCHR(de->mode)||S_ISFIFO(de->mode)) init_special_inode(inode,de->mode,kdev_t_to_nr(de->rdev)); else { if (de->proc_iops) inode->i_op = de->proc_iops;//proc_root_inode_operations if (de->proc_fops) inode->i_fop = de->proc_fops;//proc_root_operations } } out: return inode; out_fail: de_put(de); goto out; }返回到proc_read_super,开始执行d_alloc_root,分配根节点的dentry结构,并把根节点的inode结构和dentry结构相连。
struct dentry * d_alloc_root(struct inode * root_inode) { struct dentry *res = NULL; if (root_inode) { res = d_alloc(NULL, &(const struct qstr) { "/", 1, 0 }); if (res) { res->d_sb = root_inode->i_sb; res->d_parent = res;//已经是根节点的dentry结构了,没有上一级了 d_instantiate(res, root_inode);//把根节点的inode结构和dentry结构相连 } } return res; }
static struct vfsmount *add_vfsmnt(struct nameidata *nd, struct dentry *root, const char *dev_name) { struct vfsmount *mnt; struct super_block *sb = root->d_inode->i_sb; char *name; mnt = kmalloc(sizeof(struct vfsmount), GFP_KERNEL); if (!mnt) goto out; memset(mnt, 0, sizeof(struct vfsmount)); if (nd || dev_name) mnt->mnt_flags = MNT_VISIBLE; /* It may be NULL, but who cares? */ if (dev_name) { name = kmalloc(strlen(dev_name)+1, GFP_KERNEL); if (name) { strcpy(name, dev_name); mnt->mnt_devname = name; } } mnt->mnt_owner = current->uid; atomic_set(&mnt->mnt_count,1); mnt->mnt_sb = sb;//重点 spin_lock(&dcache_lock); if (nd && !IS_ROOT(nd->dentry) && d_unhashed(nd->dentry)) goto fail; mnt->mnt_root = dget(root);//重点 mnt->mnt_mountpoint = nd ? dget(nd->dentry) : dget(root);//本身就是挂载节点dentry结构 mnt->mnt_parent = nd ? mntget(nd->mnt) : mnt;//本身就是挂载节点vfsmount结构 if (nd) { list_add(&mnt->mnt_child, &nd->mnt->mnt_mounts); list_add(&mnt->mnt_clash, &nd->dentry->d_vfsmnt); } else { INIT_LIST_HEAD(&mnt->mnt_child); INIT_LIST_HEAD(&mnt->mnt_clash); } INIT_LIST_HEAD(&mnt->mnt_mounts); list_add(&mnt->mnt_instances, &sb->s_mounts); list_add(&mnt->mnt_list, vfsmntlist.prev); spin_unlock(&dcache_lock); out: return mnt; fail: spin_unlock(&dcache_lock); if (mnt->mnt_devname) kfree(mnt->mnt_devname); kfree(mnt); return NULL; }
二、光是kern_mount()还不够,还得由系统的初始化进程从内核外部通过系统调用mount()再安装一次。通常,这个命令行为是:mount -nvt proc /dev/null proc
前面我们提到过,proc文件系统的file_system_type数据结构中的FS_SINGLE标志位为1,它起着重要的作用。为什么重要呢?因为它使sys_mount()的主体do_mount()通过get_sb_single(),而不是get_sb_bdev(),来取得所安装文件系统的super_block数据结构。相关代码如下:
if (fstype->fs_flags & FS_NOMOUNT) sb = ERR_PTR(-EINVAL); else if (fstype->fs_flags & FS_REQUIRES_DEV) sb = get_sb_bdev(fstype, dev_name, flags, data_page); else if (fstype->fs_flags & FS_SINGLE) sb = get_sb_single(fstype, flags, data_page); else sb = get_sb_nodev(fstype, flags, data_page);
static struct super_block *get_sb_single(struct file_system_type *fs_type, int flags, void *data) { struct super_block * sb; /* * Get the superblock of kernel-wide instance, but * keep the reference to fs_type. */ down(&mount_sem); sb = fs_type->kern_mnt->mnt_sb; if (!sb) BUG(); get_filesystem(fs_type); do_remount_sb(sb, flags, data); return sb; }取得了proc文件系统的super_block结构以后,回到do_mount()代码中,以后的操作就与普通文件系统的安装无异了。这样就将proc文件系统安装到了节点/proc上。
三、刚才我们看到了/proc节点的proc_dir_entry结构proc_root,现在我们创建/proc节点以下的子节点的proc_dir_entry结构,这是由内核在初始化时调用proc_root_init()完成的,代码如下:
void __init proc_root_init(void) { proc_misc_init(); proc_net = proc_mkdir("net", 0); #ifdef CONFIG_SYSVIPC proc_mkdir("sysvipc", 0); #endif #ifdef CONFIG_SYSCTL proc_sys_root = proc_mkdir("sys", 0); #endif proc_root_fs = proc_mkdir("fs", 0); proc_root_driver = proc_mkdir("driver", 0); #if defined(CONFIG_SUN_OPENPROMFS) || defined(CONFIG_SUN_OPENPROMFS_MODULE) /* just give it a mountpoint */ proc_mkdir("openprom", 0); #endif proc_tty_init(); #ifdef CONFIG_PROC_DEVICETREE proc_device_tree_init(); #endif proc_bus = proc_mkdir("bus", 0); }
proc_misc_init,主要创建/proc节点以下的子节点的proc_dir_entry结构,而且子节点大多是文件,不是目录。
void __init proc_misc_init(void) { struct proc_dir_entry *entry; static struct { char *name; int (*read_proc)(char*,char**,off_t,int,int*,void*); } *p, simple_ones[] = { {"loadavg", loadavg_read_proc}, {"uptime", uptime_read_proc}, {"meminfo", meminfo_read_proc}, {"version", version_read_proc}, {"cpuinfo", cpuinfo_read_proc}, #ifdef CONFIG_PROC_HARDWARE {"hardware", hardware_read_proc}, #endif #ifdef CONFIG_STRAM_PROC {"stram", stram_read_proc}, #endif #ifdef CONFIG_DEBUG_MALLOC {"malloc", malloc_read_proc}, #endif #ifdef CONFIG_MODULES {"modules", modules_read_proc}, {"ksyms", ksyms_read_proc}, #endif {"stat", kstat_read_proc}, {"devices", devices_read_proc}, {"partitions", partitions_read_proc}, #if !defined(CONFIG_ARCH_S390) {"interrupts", interrupts_read_proc}, #endif {"filesystems", filesystems_read_proc}, {"dma", dma_read_proc}, {"ioports", ioports_read_proc}, {"cmdline", cmdline_read_proc}, #ifdef CONFIG_SGI_DS1286 {"rtc", ds1286_read_proc}, #endif {"locks", locks_read_proc}, {"mounts", mounts_read_proc}, {"swaps", swaps_read_proc}, {"iomem", memory_read_proc}, {"execdomains", execdomains_read_proc}, {NULL,} }; for (p = simple_ones; p->name; p++) create_proc_read_entry(p->name, 0, NULL, p->read_proc, NULL); /* And now for trickier ones */ entry = create_proc_entry("kmsg", S_IRUSR, &proc_root); if (entry) entry->proc_fops = &proc_kmsg_operations; proc_root_kcore = create_proc_entry("kcore", S_IRUSR, NULL); if (proc_root_kcore) { proc_root_kcore->proc_fops = &proc_kcore_operations; proc_root_kcore->size = (size_t)high_memory - PAGE_OFFSET + PAGE_SIZE; } if (prof_shift) { entry = create_proc_entry("profile", S_IWUSR | S_IRUGO, NULL); if (entry) { entry->proc_fops = &proc_profile_operations; entry->size = (1+prof_len) * sizeof(unsigned int); } } #ifdef __powerpc__ { extern struct file_operations ppc_htab_operations; entry = create_proc_entry("ppc_htab", S_IRUGO|S_IWUSR, NULL); if (entry) entry->proc_fops = &ppc_htab_operations; } #endif entry = create_proc_read_entry("slabinfo", S_IWUSR | S_IRUGO, NULL, slabinfo_read_proc, NULL); if (entry) entry->write_proc = slabinfo_write_proc; }
create_proc_read_entry,主要创建/proc节点以下的子节点的proc_dir_entry结构,而且子节点大多是文件,不是目录。
extern inline struct proc_dir_entry *create_proc_read_entry(const char *name,//我们拿第一个举例,name为loadavg,mode为0,base为NULL,read_proc为loadavg_read_proc,data为NULL mode_t mode, struct proc_dir_entry *base, read_proc_t *read_proc, void * data) { struct proc_dir_entry *res=create_proc_entry(name,mode,base); if (res) { res->read_proc=read_proc; res->data=data; } return res; }
struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode, struct proc_dir_entry *parent) { struct proc_dir_entry *ent = NULL; const char *fn = name; int len; if (!parent && xlate_proc_name(name, &parent, &fn) != 0) goto out; len = strlen(fn); ent = kmalloc(sizeof(struct proc_dir_entry) + len + 1, GFP_KERNEL);//创建proc_dir_entry结构 if (!ent) goto out; memset(ent, 0, sizeof(struct proc_dir_entry)); memcpy(((char *) ent) + sizeof(*ent), fn, len + 1);//前面是proc_dir_entry结构 ent->name = ((char *) ent) + sizeof(*ent);//后面是名字和长度 ent->namelen = len; if (S_ISDIR(mode)) { if ((mode & S_IALLUGO) == 0) mode |= S_IRUGO | S_IXUGO; ent->proc_fops = &proc_dir_operations; ent->proc_iops = &proc_dir_inode_operations; ent->nlink = 2; } else { if ((mode & S_IFMT) == 0) mode |= S_IFREG; if ((mode & S_IALLUGO) == 0) mode |= S_IRUGO; ent->nlink = 1; } ent->mode = mode; proc_register(parent, ent);//把loadavg节点的proc_dir_entry结构登记到根节点的proc_dir_entry结构 out: return ent; }xlate_proc_name,parent返回的是父节点的proc_dir_entry结构,fn返回当前的节点名,现在name为loadavg,返回的fn还是loadavg,parent是根节点的proc_dir_entry结构proc_root。
static int xlate_proc_name(const char *name, struct proc_dir_entry **ret, const char **residual) { const char *cp = name, *next; struct proc_dir_entry *de; int len; de = &proc_root; while (1) { next = strchr(cp, ‘/‘);//此时next为空 if (!next) break; len = next - cp; for (de = de->subdir; de ; de = de->next) { if (proc_match(len, cp, de)) break; } if (!de) return -ENOENT; cp += len + 1; } *residual = cp;//指向loadavg *ret = de;//指向根节点的proc_dir_entry结构 return 0; }proc_register(parent, ent),把loadavg节点的proc_dir_entry结构登记到根节点的proc_dir_entry结构。
static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp) { int i; i = make_inode_number(); if (i < 0) return -EAGAIN; dp->low_ino = i; dp->next = dir->subdir; dp->parent = dir;//子节点的proc_dir_dentry通过subdir指向父节点的proc_dir_dentry dir->subdir = dp;//父节点的proc_dir_dentry通过subdir指向子节点的proc_dir_dentry if (S_ISDIR(dp->mode)) { if (dp->proc_iops == NULL) { dp->proc_fops = &proc_dir_operations; dp->proc_iops = &proc_dir_inode_operations; } dir->nlink++; } else if (S_ISLNK(dp->mode)) { if (dp->proc_iops == NULL) dp->proc_iops = &proc_link_inode_operations; } else if (S_ISREG(dp->mode)) {//loadvag是普通文件 if (dp->proc_fops == NULL) dp->proc_fops = &proc_file_operations; } return 0; }proc_misc_init中的其他类似的代码就不解释了,例如:
entry = create_proc_entry("kmsg", S_IRUSR, &proc_root); proc_root_kcore = create_proc_entry("kcore", S_IRUSR, NULL); entry = create_proc_entry("profile", S_IWUSR | S_IRUGO, NULL); entry = create_proc_read_entry("slabinfo", S_IWUSR | S_IRUGO, NULL, slabinfo_read_proc, NULL);
返回到proc_root_init,执行proc_mkdir("net", 0),代码如下:
struct proc_dir_entry *proc_mkdir(const char *name, struct proc_dir_entry *parent) { struct proc_dir_entry *ent = NULL; const char *fn = name; int len; if (!parent && xlate_proc_name(name, &parent, &fn) != 0) goto out; len = strlen(fn); ent = kmalloc(sizeof(struct proc_dir_entry) + len + 1, GFP_KERNEL); if (!ent) goto out; memset(ent, 0, sizeof(struct proc_dir_entry)); memcpy(((char *) ent) + sizeof(*ent), fn, len + 1); ent->name = ((char *) ent) + sizeof(*ent); ent->namelen = len; ent->proc_fops = &proc_dir_operations;//主要区别 ent->proc_iops = &proc_dir_inode_operations; ent->nlink = 2; ent->mode = S_IFDIR | S_IRUGO | S_IXUGO; proc_register(parent, ent); out: return ent; }和上面的操作区别在于:
ent->proc_fops = &proc_dir_operations; ent->proc_iops = &proc_dir_inode_operations;proc_root_init还有其他类似的操作,就不解释了:
proc_mkdir("sysvipc", 0); proc_sys_root = proc_mkdir("sys", 0); proc_root_fs = proc_mkdir("fs", 0); proc_root_driver = proc_mkdir("driver", 0) proc_mkdir("openprom", 0); proc_tty_init(); proc_bus = proc_mkdir("bus", 0);
我们主要关心proc_tty_init,代码如下:
void __init proc_tty_init(void) { if (!proc_mkdir("tty", 0)) return; proc_tty_ldisc = proc_mkdir("tty/ldisc", 0); proc_tty_driver = proc_mkdir("tty/driver", 0); create_proc_read_entry("tty/ldiscs", 0, 0, tty_ldiscs_read_proc,NULL); create_proc_read_entry("tty/drivers", 0, 0, tty_drivers_read_proc,NULL); }proc_mkdir("tty", 0)和上面的步骤一样,proc_mkdir("tty/ldisc", 0)的执行,比较不同,如下:
struct proc_dir_entry *proc_mkdir(const char *name, struct proc_dir_entry *parent) { struct proc_dir_entry *ent = NULL; const char *fn = name; int len; if (!parent && xlate_proc_name(name, &parent, &fn) != 0)//name指向tty/ldisc,返回parent为tty节点的proc_dir_dentry结构,fn指向ldisc字符串 goto out; len = strlen(fn); ent = kmalloc(sizeof(struct proc_dir_entry) + len + 1, GFP_KERNEL); if (!ent) goto out; memset(ent, 0, sizeof(struct proc_dir_entry)); memcpy(((char *) ent) + sizeof(*ent), fn, len + 1); ent->name = ((char *) ent) + sizeof(*ent); ent->namelen = len; ent->proc_fops = &proc_dir_operations; ent->proc_iops = &proc_dir_inode_operations; ent->nlink = 2; ent->mode = S_IFDIR | S_IRUGO | S_IXUGO; proc_register(parent, ent);//将ldisc这个节点的proc_dir_entry结构登记到tty这个节点的proc_dir_entry结构 out: return ent; }
static int xlate_proc_name(const char *name, struct proc_dir_entry **ret, const char **residual)//name指向tty/ldisc { const char *cp = name, *next; struct proc_dir_entry *de; int len; de = &proc_root; while (1) { next = strchr(cp, ‘/‘);//next指向ldisc if (!next) break; len = next - cp;//tty的长度,cp还指向tty for (de = de->subdir; de ; de = de->next) { if (proc_match(len, cp, de))//在根节点的proc_dir_entry结构的subdir寻找子节点的proc_dir_entry,直到匹配tty这个节点 break;//跳出for循环 } if (!de) return -ENOENT; cp += len + 1;//cp指向了ldisc } *residual = cp;//指向了ldisc *ret = de;//tty这个节点的proc_dir_entry结构 return 0; }
四、这个场景是对/proc/loadavg的访问,这个文件提供有关系统在过去1分钟、5分钟和15分钟内的平均负荷的统计信息。这个文件只支持读操作,其proc_dir_entry结构是在proc_misc_init()中通过create_proc_read_entry()创建的。
首先调用open("/proc/loadavg"),具体过程请参考Linux内核源代码情景分析-文件的打开,open_namei里面这部分会有些不同:
if (path_init(pathname, LOOKUP_PARENT, nd)) error = path_walk(pathname, nd);//找到父节点找到"/proc/loadavg"的父节点,也就是/proc的节点,参考Linux内核源代码情景分析-文件系统安装后的访问,会调用是否是挂载点,while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry)),这个函数找到proc节点的dentry结构。
然后再调用dentry = lookup_hash(&nd->last, nd->dentry),nd->last就是下一个节点名"loadavg"。这个函数先通过cached_lookup()看看下一个节点的dentry结构是否已经建立在内存中,如果没有就要通过real_lookup()从设备上读入该节点的目录项(以及索引节点)并在内存中为之创建起它的dentry结构。
static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, int flags) { struct dentry * result; struct inode *dir = parent->d_inode; down(&dir->i_sem); /* * First re-do the cached lookup just in case it was created * while we waited for the directory semaphore.. * * FIXME! This could use version numbering or similar to * avoid unnecessary cache lookups. */ result = d_lookup(parent, name); if (!result) { struct dentry * dentry = d_alloc(parent, name); result = ERR_PTR(-ENOMEM); if (dentry) { lock_kernel(); result = dir->i_op->lookup(dir, dentry); unlock_kernel(); if (result) dput(dentry); else result = dentry; } up(&dir->i_sem); return result; } /* * Uhhuh! Nasty case: the cache was re-populated while * we waited on the semaphore. Need to revalidate. */ up(&dir->i_sem); if (result->d_op && result->d_op->d_revalidate) { if (!result->d_op->d_revalidate(result, flags) && !d_invalidate(result)) { dput(result); result = ERR_PTR(-ENOENT); } } return result; }对于/proc根节点的inode结构中的i_op指针指向proc_root_inode_operations,这是在proc_get_inode中设置的,如下:
if (de->proc_iops) inode->i_op = de->proc_iops;//proc_root_inode_operations if (de->proc_fops) inode->i_fop = de->proc_fops;//proc_root_operations
static struct inode_operations proc_root_inode_operations = { lookup: proc_root_lookup, };dir->i_op->lookup执行的代码是proc_root_lookup,代码如下:
static struct dentry *proc_root_lookup(struct inode * dir, struct dentry * dentry) { if (dir->i_ino == PROC_ROOT_INO) { /* check for safety... */ int nlink = proc_root.nlink; nlink += nr_threads; dir->i_nlink = nlink; } if (!proc_lookup(dir, dentry)) return NULL; return proc_pid_lookup(dir, dentry); }
struct dentry *proc_lookup(struct inode * dir, struct dentry *dentry) { struct inode *inode; struct proc_dir_entry * de; int error; error = -ENOENT; inode = NULL; de = (struct proc_dir_entry *) dir->u.generic_ip; if (de) { for (de = de->subdir; de ; de = de->next) { if (!de || !de->low_ino) continue; if (de->namelen != dentry->d_name.len) continue; if (!memcmp(dentry->d_name.name, de->name, de->namelen)) {//找到loadavg节点的proc_dir_entry结构 int ino = de->low_ino; error = -EINVAL; inode = proc_get_inode(dir->i_sb, ino, de);//根据loadavg节点的proc_dir_entry结构得到loadavg节点的inode结构 break; } } } if (inode) { dentry->d_op = &proc_dentry_operations; d_add(dentry, inode); return NULL; } return ERR_PTR(error); }
struct inode * proc_get_inode(struct super_block * sb, int ino, struct proc_dir_entry * de) { struct inode * inode; /* * Increment the use count so the dir entry can‘t disappear. */ de_get(de); #if 1 /* shouldn‘t ever happen */ if (de && de->deleted) printk("proc_iget: using deleted entry %s, count=%d\n", de->name, atomic_read(&de->count)); #endif inode = iget(sb, ino); if (!inode) goto out_fail; inode->u.generic_ip = (void *) de; if (de) { if (de->mode) { inode->i_mode = de->mode; inode->i_uid = de->uid; inode->i_gid = de->gid; } if (de->size) inode->i_size = de->size; if (de->nlink) inode->i_nlink = de->nlink; if (de->owner) __MOD_INC_USE_COUNT(de->owner); if (S_ISBLK(de->mode)||S_ISCHR(de->mode)||S_ISFIFO(de->mode)) init_special_inode(inode,de->mode,kdev_t_to_nr(de->rdev)); else { if (de->proc_iops)//loadavg节点proc_dir_entry结构这个指针为NULL inode->i_op = de->proc_iops; if (de->proc_fops) inode->i_fop = de->proc_fops;//dp->proc_fops = &proc_file_operations,这是在create_proc_entry设置的 } } out: return inode; out_fail: de_put(de); goto out; }open("/proc/loadavg"),执行完open_namei,继续执行dentry_open。
struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags) { struct file * f; struct inode *inode; int error; error = -ENFILE; f = get_empty_filp();//分配一个空闲的file数据结构 if (!f) goto cleanup_dentry; f->f_flags = flags; f->f_mode = (flags+1) & O_ACCMODE; inode = dentry->d_inode; if (f->f_mode & FMODE_WRITE) { error = get_write_access(inode); if (error) goto cleanup_file; } f->f_dentry = dentry;//该节点的dentry结构 f->f_vfsmnt = mnt;//该节点的vfsmount结构 f->f_pos = 0; f->f_reada = 0; f->f_op = fops_get(inode->i_fop);//f->f_op被赋值为inode_i_fop,这里为proc_file_operations if (inode->i_sb) file_move(f, &inode->i_sb->s_files);//将其从中间队列脱链而挂入该文件所在设备的super_block结构中的file结构队列s_files if (f->f_op && f->f_op->open) { error = f->f_op->open(inode,f); if (error) goto cleanup_all; } f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC); return f; cleanup_all: fops_put(f->f_op); if (f->f_mode & FMODE_WRITE) put_write_access(inode); f->f_dentry = NULL; f->f_vfsmnt = NULL; cleanup_file: put_filp(f); cleanup_dentry: dput(dentry); mntput(mnt); return ERR_PTR(error); }
asmlinkage ssize_t sys_read(unsigned int fd, char * buf, size_t count) { ssize_t ret; struct file * file; ret = -EBADF; file = fget(fd); if (file) { if (file->f_mode & FMODE_READ) { ret = locks_verify_area(FLOCK_VERIFY_READ, file->f_dentry->d_inode, file, file->f_pos, count); if (!ret) { ssize_t (*read)(struct file *, char *, size_t, loff_t *); ret = -EINVAL; if (file->f_op && (read = file->f_op->read) != NULL) ret = read(file, buf, count, &file->f_pos);//proc_file_read } } if (ret > 0) inode_dir_notify(file->f_dentry->d_parent->d_inode, DN_ACCESS); fput(file); } return ret; }对于,proc文件系统来说,file->fop指向了proc_file_operations结构(见dentry_open里面的说明),代码如下:
static struct file_operations proc_file_operations = { llseek: proc_file_lseek, read: proc_file_read, write: proc_file_write, };
static ssize_t proc_file_read(struct file * file, char * buf, size_t nbytes, loff_t *ppos) { struct inode * inode = file->f_dentry->d_inode; char *page; ssize_t retval=0; int eof=0; ssize_t n, count; char *start; struct proc_dir_entry * dp; dp = (struct proc_dir_entry *) inode->u.generic_ip;//取出loadavg节点的proc_dir_entry结构 if (!(page = (char*) __get_free_page(GFP_KERNEL))) return -ENOMEM; while ((nbytes > 0) && !eof) { count = MIN(PROC_BLOCK_SIZE, nbytes); start = NULL; if (dp->get_info) { /* * Handle backwards compatibility with the old net * routines. */ n = dp->get_info(page, &start, *ppos, count); if (n < count) eof = 1; } else if (dp->read_proc) { n = dp->read_proc(page, &start, *ppos, //loadavg_read_proc count, &eof, dp->data);//相关信息读到page上 } else break; if (!start) { /* * For proc files that are less than 4k */ start = page + *ppos; n -= *ppos; if (n <= 0) break; if (n > count) n = count; } if (n == 0) break; /* End of file */ if (n < 0) { if (retval == 0) retval = n; break; } /* This is a hack to allow mangling of file pos independent * of actual bytes read. Simply place the data at page, * return the bytes, and set `start‘ to the desired offset * as an unsigned int. - [email protected] */ n -= copy_to_user(buf, start < page ? page : start, n);//相关信息返回给用户 if (n == 0) { if (retval == 0) retval = -EFAULT; break; } *ppos += start < page ? (long)start : n; /* Move down the file */ nbytes -= n; buf += n; retval += n; } free_page((unsigned long) page); return retval; }
在前面代码中,设置了dp->read_proc,如下:
extern inline struct proc_dir_entry *create_proc_read_entry(const char *name,//我们拿第一个举例,name为loadavg,mode为0,base为NULL,read_proc为loadavg_read_proc,data为NULL mode_t mode, struct proc_dir_entry *base, read_proc_t *read_proc, void * data) { struct proc_dir_entry *res=create_proc_entry(name,mode,base); if (res) { res->read_proc=read_proc; res->data=data; } return res; }所以dp->read_proc,执行代码如下:
static int loadavg_read_proc(char *page, char **start, off_t off, int count, int *eof, void *data) { int a, b, c; int len; a = avenrun[0] + (FIXED_1/200); b = avenrun[1] + (FIXED_1/200); c = avenrun[2] + (FIXED_1/200); len = sprintf(page,"%d.%02d %d.%02d %d.%02d %d/%d %d\n", LOAD_INT(a), LOAD_FRAC(a), LOAD_INT(b), LOAD_FRAC(b), LOAD_INT(c), LOAD_FRAC(c), nr_running, nr_threads, last_pid);//过去1分钟,5分钟以及15分钟内的系统平均CPU负荷等统计信息sprintf()”打印“到缓冲区页面中,统计信息中还包括系统当前处于可运行状态的进程个数nr_running以及系统中进程的总数nr_threads,还有系统中已分配使用的最大进程号last_pid return proc_calc_metrics(page, start, off, count, eof, len); }
static int proc_calc_metrics(char *page, char **start, off_t off, int count, int *eof, int len) { if (len <= off+count) *eof = 1; *start = page + off; len -= off; if (len>count) len = count; if (len<0) len = 0; return len; }它的作用就是将数组avenrun[]中积累的在过去1分钟,5分钟以及15分钟内的系统平均CPU负荷等统计信息sprintf()”打印“到缓冲区页面中。这些平均负荷的数值是每隔5秒钟在时钟中断服务程序中进行计算的,统计信息中还包括系统当前处于可运行状态的进程个数nr_running以及系统中进程的总数nr_threads,还有系统中已分配使用的最大进程号last_pid。
郑重声明:本站内容如果来自互联网及其他传播媒体,其版权均属原媒体及文章作者所有。转载目的在于传递更多信息及用于网络分享,并不代表本站赞同其观点和对其真实性负责,也不构成任何其他建议。