open()系统调用的服务例程为sys_open()函数,该函数接收的参数为:要打开的文件的路径名filename、访问模式的一些标志flags,以及如果该文件被创建所需要的许可位掩码mode。如果该系统调用成功,就返回一个文件描述符,也就是指向文件对象的指针数组current-> files-> fd_array或者current-> files-> fdtable.fd中新分配给文件的索引;否则,返回-1。 open()系统调用的所有标志 --------------------------------------------------------------------- include/asm-generic/fcntl.h #define O_ACCMODE 00000003 #define O_RDONLY 00000000 /* 为只读而打开 */ #define O_WRONLY 00000001 /* 为只写而打开 */ #define O_RDWR 00000002 /* 为读和写而打开 */ #ifndef O_CREAT /*如果文件不存在则创建它 */ #define O_CREAT 00000100 /* not fcntl */ #endif #ifndef O_EXCL /* 对于O_CREAT标志,如果文件已经存在,则失败 */ #define O_EXCL 00000200 /* not fcntl */ #endif #ifndef O_NOCTTY /* 从不把文件看作终端 */ #define O_NOCTTY 00000400 /* not fcntl */ #endif #ifndef O_TRUNC /* 截断文件(删除所有的现有内容) */ #define O_TRUNC 00001000 /* not fcntl */ #endif #ifndef O_APPEND /* 总是在文件末尾写 */ #define O_APPEND 00002000 #endif #ifndef O_NONBLOCK /* 非阻塞打开 */ #define O_NONBLOCK 00004000 #endif #ifndef O_DSYNC /* 同步写(阻塞,直到物理写终止) */ #define O_DSYNC 00010000 /* used to be O_SYNC, see below */ #endif #ifndef FASYNC /* 通过信号发出I/O事件通知 */ #define FASYNC 00020000 /* fcntl, for BSD compatibility */ #endif #ifndef O_DIRECT #define O_DIRECT 00040000 /* direct disk access hint */ #endif /* 大型文件(文件长度大于off_t所能表示的范围但小于off64_t)*/ #ifndef O_LARGEFILE #define O_LARGEFILE 00100000 #endif #ifndef O_DIRECTORY /* 如果文件不是一个目录,则失败 */ #define O_DIRECTORY 00200000 /* must be a directory */ #endif #ifndef O_NOFOLLOW /* 不解析路径名尾部的符号链接 */ #define O_NOFOLLOW 00400000 /* don't follow links */ #endif #ifndef O_NOATIME /*不更新索引节点的访问时间。*/ #define O_NOATIME 01000000 #endif #ifndef O_CLOEXEC #define O_CLOEXEC 02000000 /* set close_on_exec */ #endif --------------------------------------------------------------------- 有一些标志的定义是因体系结构而异的。 sys_open()定义如下: --------------------------------------------------------------------- fs/open.c SYSCALL_DEFINE3(open, const char __user *, filename, int, flags, int, mode) { long ret; if (force_o_largefile()) flags |= O_LARGEFILE; ret = do_sys_open(AT_FDCWD, filename, flags, mode); /* avoid REGPARM breakage on x86: */ asmlinkage_protect(3, ret, filename, flags, mode); return ret; } --------------------------------------------------------------------- 这个函数的操作如下: 首先,调用force_o_largefile()来判断是否支持大文件,若是,则设置标志的O_LARGEFILE位。force_o_largefile()其实是一个宏。这个宏也是因体系结构而异的。 其次,调用do_sys_open(AT_FDCWD, filename, flags, mode)来完成实际的打开文件的任务。下面有更详细说明。 最后,调用asmlinkage_protect()以使系统调用正确返回。它也是一个宏,为了防止编译器错误而设。其他平台为空,只有x86平台有定义,为: --------------------------------------------------------------------- arch/x86/include/asm/linkage.h /* * Make sure the compiler doesn't do anything stupid with the * arguments on the stack - they are owned by the *caller*, not * the callee. This just fools gcc into not spilling into them, * and keeps it from doing tailcall recursion and/or using the * stack slots for temporaries, since they are live and "used" * all the way to the end of the function. * * NOTE! On x86-64, all the arguments are in registers, so this * only matters on a 32-bit kernel. */#define asmlinkage_protect(n, ret, args...) \ __asmlinkage_protect##n(ret, ##args) #define __asmlinkage_protect_n(ret, args...) \ __asm__ __volatile__ ("" : "=r" (ret) : "0" (ret), ##args) #define __asmlinkage_protect0(ret) \ __asmlinkage_protect_n(ret) #define __asmlinkage_protect1(ret, arg1) \ __asmlinkage_protect_n(ret, "g" (arg1)) #define __asmlinkage_protect2(ret, arg1, arg2) \ __asmlinkage_protect_n(ret, "g" (arg1), "g" (arg2)) #define __asmlinkage_protect3(ret, arg1, arg2, arg3) \ __asmlinkage_protect_n(ret, "g" (arg1), "g" (arg2), "g" (arg3)) --------------------------------------------------------------------- do_sys_open()函数定义如下: --------------------------------------------------------------------- fs/open.c long do_sys_open(int dfd, const char __user *filename, int flags, int mode) { char *tmp = getname(filename); int fd = PTR_ERR(tmp); if (!IS_ERR(tmp)) { fd = get_unused_fd_flags(flags); if (fd >= 0) { struct file *f = do_filp_open(dfd, tmp, flags, mode, 0); if (IS_ERR(f)) { put_unused_fd(fd); fd = PTR_ERR(f); } else { fsnotify_open(f->f_path.dentry); fd_install(fd, f); } } putname(tmp); } return fd; } --------------------------------------------------------------------- 第一个参数是多么的眼熟啊,查找路径名的do_path_lookup()函数的第一个参数也是它,用于说明查找路径名的基目录。关于它,前面路径名查找已有说明了。其余参数则是sys_open()将传进来的参数传给了它。 这个函数执行如下操作: 1、调用getname(filename)从进程地址空间读取文件的路径名,将路径名的地址存放在局部变量tmp。getname(filename)本质上,首先从名为"names_cache"路径名slab缓存中分配内存区,然后将路径名从用户空间复制到该内存区中。 2、调用get_unused_fd_flags (flags)从当前进程的文件描述符表中找一个空位。其定义为: --------------------------------------------------------------------- include/linux/file.h #define get_unused_fd_flags(flags) alloc_fd(0, (flags)) --------------------------------------------------------------------- 这是一个宏,仅仅是对alloc_fd()函数特殊参数下调用的别名,对alloc_fd()函数定义如下: --------------------------------------------------------------------- fs/file.c /* * allocate a file descriptor, mark it busy. */ int alloc_fd(unsigned start, unsigned flags) { struct files_struct *files = current->files; unsigned int fd; int error; struct fdtable *fdt; spin_lock(&files->file_lock); repeat: fdt = files_fdtable(files); fd = start; if (fd < files->next_fd) fd = files->next_fd; if (fd < fdt->max_fds) fd = find_next_zero_bit(fdt->open_fds->fds_bits, fdt->max_fds, fd); error = expand_files(files, fd); if (error < 0) goto out; /* * If we needed to expand the fs array we * might have blocked - try again. */ if (error) goto repeat; if (start <= files->next_fd) files->next_fd = fd + 1; FD_SET(fd, fdt->open_fds); if (flags & O_CLOEXEC) FD_SET(fd, fdt->close_on_exec); else FD_CLR(fd, fdt->close_on_exec); error = fd; #if 1 /* Sanity check */ if (rcu_dereference_raw(fdt->fd[fd]) != NULL) { printk(KERN_WARNING "alloc_fd: slot %d not NULL!\n", fd); rcu_assign_pointer(fdt->fd[fd], NULL); } #endif out: spin_unlock(&files->file_lock); return error; } --------------------------------------------------------------------- 这个函数执行如下操作: a.将current->files-> next_fd字段赋值给局部变量。 b.调用find_next_zero_bit(fdt->open_fds->fds_bits, fdt->max_fds, fd)来在文件描述符表中寻找下一个可以分配的文件描述符。这个函数也是因体系结构而异,系统中通用的函数定义为: --------------------------------------------------------------------- lib/find_next_bit.c unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size, unsigned long offset) { const unsigned long *p = addr + BITOP_WORD(offset); unsigned long result = offset & ~(BITS_PER_LONG-1); unsigned long tmp; if (offset >= size) return size; size -= result; offset %= BITS_PER_LONG; if (offset) { tmp = *(p++); tmp |= ~0UL >> (BITS_PER_LONG - offset); if (size < BITS_PER_LONG) goto found_first; if (~tmp) goto found_middle; size -= BITS_PER_LONG; result += BITS_PER_LONG; } while (size & ~(BITS_PER_LONG-1)) { if (~(tmp = *(p++))) goto found_middle; result += BITS_PER_LONG; size -= BITS_PER_LONG; } if (!size) return result; tmp = *p; found_first: tmp |= ~0UL << size; if (tmp == ~0UL) /* Are any bits zero? */ return result + size; /* Nope. */ found_middle: return result + ffz(tmp); } --------------------------------------------------------------------- (1)、上面的BITOP_WORD(offset)清一色定义为: #define BITOP_WORD(nr) ((nr) / BITS_PER_LONG) 首先求得第一个要查找的long的位置。 (2)、offset & ~(BITS_PER_LONG-1)等价于offset除以32再乘以32,以此来求得所要查找的第一个字第0位在表中的位置,只不过会比除法运算效率高很多。 (3)、如果传递的起始查找位置甚至大于最大可能值,则返回最大可能值。 (4)、查找第一个0位所在的long型值的位置。而局部变量tmp中会保存该long型量的值。 (5)、返回查找到的第一个0位的位置或可能的最大值。 c.调用expand_files(files, fd),来扩展文件描述符表,files_struct的fd_array数组成员可以在打开的文件较少时使用,但当打开的文件较多时,就会对文件描述符表进行扩展。 d.更新files->next_fd字段,将分配的文件描述符添加进fdt->open_fds,如果设置了O_CLOEXEC则将文件描述符添加进fdt->close_on_exec,若没有,则清除fdt->close_on_exec中的相应位。 e.返回文件描述符。 3、调用do_filp_open(dfd, tmp, flags, mode, 0)函数,传递给它的参数依次为查找路径名的基目录、文件路径名、访问模式标志以及许可权位掩码、访问模式位。这个函数定义为: --------------------------------------------------------------------- fs/namei.c 1761 /* 1762 * Note that the low bits of the passed in "open_flag" 1763 * are not the same as in the local variable "flag". See 1764 * open_to_namei_flags() for more details. 1765 */ 1766 struct file *do_filp_open(int dfd, const char *pathname, 1767 int open_flag, int mode, int acc_mode) 1768 { 1769 struct file *filp; 1770 struct nameidata nd; 1771 int error; 1772 struct path path; 1773 int count = 0; 1774 int flag = open_to_namei_flags(open_flag); 1775 int force_reval = 0; 1776 1777 if (!(open_flag & O_CREAT)) 1778 mode = 0; 1779 1780 /* 1781 * O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only 1782 * check for O_DSYNC if the need any syncing at all we enforce it's 1783 * always set instead of having to deal with possibly weird behaviour 1784 * for malicious applications setting only __O_SYNC. 1785 */ 1786 if (open_flag & __O_SYNC) 1787 open_flag |= O_DSYNC; 1788 1789 if (!acc_mode) 1790 acc_mode = MAY_OPEN | ACC_MODE(open_flag); 1791 1792 /* O_TRUNC implies we need access checks for write permissions */ 1793 if (open_flag & O_TRUNC) 1794 acc_mode |= MAY_WRITE; 1795 1796 /* Allow the LSM permission hook to distinguish append 1797 access from general write access. */ 1798 if (open_flag & O_APPEND) 1799 acc_mode |= MAY_APPEND; 1800 1801 /* find the parent */ 1802 reval: 1803 error = path_init(dfd, pathname, LOOKUP_PARENT, &nd); 1804 if (error) 1805 return ERR_PTR(error); 1806 if (force_reval) 1807 nd.flags |= LOOKUP_REVAL; 1808 1809 current->total_link_count = 0; 1810 error = link_path_walk(pathname, &nd); 1811 if (error) { 1812 filp = ERR_PTR(error); 1813 goto out; 1814 } 1815 if (unlikely(!audit_dummy_context()) && (open_flag & O_CREAT)) 1816 audit_inode(pathname, nd.path.dentry); 1817 1818 /* 1819 * We have the parent and last component. 1820 */ 1821 1822 error = -ENFILE; 1823 filp = get_empty_filp(); 1824 if (filp == NULL) 1825 goto exit_parent; 1826 nd.intent.open.file = filp; 1827 filp->f_flags = open_flag; 1828 nd.intent.open.flags = flag; 1829 nd.intent.open.create_mode = mode; 1830 nd.flags &= ~LOOKUP_PARENT; 1831 nd.flags |= LOOKUP_OPEN; 1832 if (open_flag & O_CREAT) { 1833 nd.flags |= LOOKUP_CREATE; 1834 if (open_flag & O_EXCL) 1835 nd.flags |= LOOKUP_EXCL; 1836 } 1837 if (open_flag & O_DIRECTORY) 1838 nd.flags |= LOOKUP_DIRECTORY; 1839 if (!(open_flag & O_NOFOLLOW)) 1840 nd.flags |= LOOKUP_FOLLOW; 1841 filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname); 1842 while (unlikely(!filp)) { /* trailing symlink */ 1843 struct path holder; 1844 struct inode *inode = path.dentry->d_inode; 1845 void *cookie; 1846 error = -ELOOP; 1847 /* S_ISDIR part is a temporary automount kludge */ 1848 if (!(nd.flags & LOOKUP_FOLLOW) && !S_ISDIR(inode->i_mode)) 1849 goto exit_dput; 1850 if (count++ == 32) 1851 goto exit_dput; 1852 /* 1853 * This is subtle. Instead of calling do_follow_link() we do 1854 * the thing by hands. The reason is that this way we have zero 1855 * link_count and path_walk() (called from ->follow_link) 1856 * honoring LOOKUP_PARENT. After that we have the parent and 1857 * last component, i.e. we are in the same situation as after 1858 * the first path_walk(). Well, almost - if the last component 1859 * is normal we get its copy stored in nd->last.name and we will 1860 * have to putname() it when we are done. Procfs-like symlinks 1861 * just set LAST_BIND. 1862 */ 1863 nd.flags |= LOOKUP_PARENT; 1864 error = security_inode_follow_link(path.dentry, &nd); 1865 if (error) 1866 goto exit_dput; 1867 error = __do_follow_link(&path, &nd, &cookie); 1868 if (unlikely(error)) { 1869 /* nd.path had been dropped */ 1870 if (!IS_ERR(cookie) && inode->i_op->put_link) 1871 inode->i_op->put_link(path.dentry, &nd, cookie); 1872 path_put(&path); 1873 release_open_intent(&nd); 1874 filp = ERR_PTR(error); 1875 goto out; 1876 } 1877 holder = path; 1878 nd.flags &= ~LOOKUP_PARENT; 1879 filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname); 1880 if (inode->i_op->put_link) 1881 inode->i_op->put_link(holder.dentry, &nd, cookie); 1882 path_put(&holder); 1883 } 1884 out: 1885 if (nd.root.mnt) 1886 path_put(&nd.root); 1887 if (filp == ERR_PTR(-ESTALE) && !force_reval) { 1888 force_reval = 1; 1889 goto reval; 1890 } 1891 return filp; 1892 1893 exit_dput: 1894 path_put_conditional(&path, &nd); 1895 if (!IS_ERR(nd.intent.open.file)) 1896 release_open_intent(&nd); 1897 exit_parent: 1898 path_put(&nd.path); 1899 filp = ERR_PTR(error); 1900 goto out; 1901 } --------------------------------------------------------------------- 这个函数一次执行下列步骤: a.把访问模式拷贝到flag标志中,但是,用特殊的格式对方问模式标志O_RDONLY、O_WRONLY和O_RDWR进行编码。由函数open_to_namei_flags()完成,其定义如下: --------------------------------------------------------------------- fs/namei.c /* * Note that while the flag value (low two bits) for sys_open means: * 00 - read-only * 01 - write-only * 10 - read-write * 11 - special * it is changed into * 00 - no permissions needed * 01 - read-permission * 10 - write-permission * 11 - read-write * for the internal routines (ie open_namei()/follow_link() etc) * This is more logical, and also allows the 00 "no perm needed" * to be used for symlinks (where the permissions are checked * later). * */ static inline int open_to_namei_flags(int flag) { if ((flag+1) & O_ACCMODE) flag++; return flag; } --------------------------------------------------------------------- 注释中说的已经很清楚了。 根据open_flag原来的值适当更新打开标志open_flag,根据open_flag适当更新访问模式acc_mode b.调用path_init(dfd, pathname, LOOKUP_PARENT, &nd),将查找的路径名的基路径找到,并赋给nd的path字段。注意在这个函数中设置了nd->flags的LOOKUP_PARENT,也就是要查找路径名最后一个分量的父目录。 c.设置current->total_link_count为0。 d.调用link_path_walk(pathname, &nd)查找路径名最后一个分量的父母的路径path结构体,保存在nd.path中。因为,路径名的最后一个分量有可能是不存在的而需要创建。 e.调用get_empty_filp()函数从file的slab缓冲区filp_cachep中分配一个file结构,并初始化它的一些字段。如果返回值为NULL,则返回错误码-ENFILE的指针形式。 f.设置nd.intent.open.file为上一步分配的file结构的地址filp,设置filp的打开文件时所制定的标志f_flags为传递进来并经过适当修改的打开标志open_flag。设置nd.intent.open.flags为访问模式flag,设置nd.intent.open.create_mode为创建模式mode。清除nd.flags的LOOKUP_PARENT标志,设置其LOOKUP_OPEN标志。如果在打开标志中设置了O_CREAT,则设置nd.flags的LOOKUP_CREATE标志,若同时设置了打开标志的O_EXCL,则同时设置nd.flags的OOKUP_EXCL。若设置了打开标志的O_DIRECTORY,则设置nd.flags的LOOKUP_DIRECTORY。若没有设置打开标志的O_NOFOLLOW位,则设置nd.flags的LOOKUP_FOLLOW。 即是根据打开标志来设置nd.flags的相应位。这些标志似乎都只与查找的路径名的最后一个分量有关。 g.调用do_last()来完成路径名最后一个分量的处理。又是一个非常长的函数,它接受六个参数,nd为前面查找的路径名最后一个分量的父目录的nameidata结构,path为一个path结构的局部变量,打开标志,acc_mode,创建模式(如果需要的话)mode和路径名。该函数定义如下: --------------------------------------------------------------------- fs/namei.c 1617 static struct file *do_last(struct nameidata *nd, struct path *path, 1618 int open_flag, int acc_mode, 1619 int mode, const char *pathname) 1620 { 1621 struct dentry *dir = nd->path.dentry; 1622 struct file *filp; 1623 int error = -EISDIR; 1624 1625 switch (nd->last_type) { 1626 case LAST_DOTDOT: 1627 follow_dotdot(nd); 1628 dir = nd->path.dentry; 1629 case LAST_DOT: 1630 if (nd->path.mnt->mnt_sb->s_type->fs_flags & FS_REVAL_DOT) { 1631 if (!dir->d_op->d_revalidate(dir, nd)) { 1632 error = -ESTALE; 1633 goto exit; 1634 } 1635 } 1636 /* fallthrough */ 1637 case LAST_ROOT: 1638 if (open_flag & O_CREAT) 1639 goto exit; 1640 /* fallthrough */ 1641 case LAST_BIND: 1642 audit_inode(pathname, dir); 1643 goto ok; 1644 } 1645 1646 /* trailing slashes? */ 1647 if (nd->last.name[nd->last.len]) { 1648 if (open_flag & O_CREAT) 1649 goto exit; 1650 nd->flags |= LOOKUP_DIRECTORY | LOOKUP_FOLLOW; 1651 } 1652 1653 /* just plain open? */ 1654 if (!(open_flag & O_CREAT)) { 1655 error = do_lookup(nd, &nd->last, path); 1656 if (error) 1657 goto exit; 1658 error = -ENOENT; 1659 if (!path->dentry->d_inode) 1660 goto exit_dput; 1661 if (path->dentry->d_inode->i_op->follow_link) 1662 return NULL; 1663 error = -ENOTDIR; 1664 if (nd->flags & LOOKUP_DIRECTORY) { 1665 if (!path->dentry->d_inode->i_op->lookup) 1666 goto exit_dput; 1667 } 1668 path_to_nameidata(path, nd); 1669 audit_inode(pathname, nd->path.dentry); 1670 goto ok; 1671 } 1672 1673 /* OK, it's O_CREAT */ 1674 mutex_lock(&dir->d_inode->i_mutex); 1675 1676 path->dentry = lookup_hash(nd); 1677 path->mnt = nd->path.mnt; 1678 1679 error = PTR_ERR(path->dentry); 1680 if (IS_ERR(path->dentry)) { 1681 mutex_unlock(&dir->d_inode->i_mutex); 1682 goto exit; 1683 } 1684 1685 if (IS_ERR(nd->intent.open.file)) { 1686 error = PTR_ERR(nd->intent.open.file); 1687 goto exit_mutex_unlock; 1688 } 1689 1690 /* Negative dentry, just create the file */ 1691 if (!path->dentry->d_inode) { 1692 /* 1693 * This write is needed to ensure that a 1694 * ro->rw transition does not occur between 1695 * the time when the file is created and when 1696 * a permanent write count is taken through 1697 * the 'struct file' in nameidata_to_filp(). 1698 */ 1699 error = mnt_want_write(nd->path.mnt); 1700 if (error) 1701 goto exit_mutex_unlock; 1702 error = __open_namei_create(nd, path, open_flag, mode); 1703 if (error) { 1704 mnt_drop_write(nd->path.mnt); 1705 goto exit; 1706 } 1707 filp = nameidata_to_filp(nd); 1708 mnt_drop_write(nd->path.mnt); 1709 if (!IS_ERR(filp)) { 1710 error = ima_file_check(filp, acc_mode); 1711 if (error) { 1712 fput(filp); 1713 filp = ERR_PTR(error); 1714 } 1715 } 1716 return filp; 1717 } 1718 1719 /* 1720 * It already exists. 1721 */ 1722 mutex_unlock(&dir->d_inode->i_mutex); 1723 audit_inode(pathname, path->dentry); 1724 1725 error = -EEXIST; 1726 if (open_flag & O_EXCL) 1727 goto exit_dput; 1728 1729 if (__follow_mount(path)) { 1730 error = -ELOOP; 1731 if (open_flag & O_NOFOLLOW) 1732 goto exit_dput; 1733 } 1734 1735 error = -ENOENT; 1736 if (!path->dentry->d_inode) 1737 goto exit_dput; 1738 1739 if (path->dentry->d_inode->i_op->follow_link) 1740 return NULL; 1741 1742 path_to_nameidata(path, nd); 1743 error = -EISDIR; 1744 if (S_ISDIR(path->dentry->d_inode->i_mode)) 1745 goto exit; 1746 ok: 1747 filp = finish_open(nd, open_flag, acc_mode); 1748 return filp; 1749 1750 exit_mutex_unlock: 1751 mutex_unlock(&dir->d_inode->i_mutex); 1752 exit_dput: 1753 path_put_conditional(path, nd); 1754 exit: 1755 if (!IS_ERR(nd->intent.open.file)) 1756 release_open_intent(nd); 1757 path_put(&nd->path); 1758 return ERR_PTR(error); 1759 } --------------------------------------------------------------------- 再对这些参数进行以下说明,nd指向的nameidata的path中存放的是路径名最后一个分量的父目录的路径path,其last字段中存放的是路径名最后一个分量的名字的信息qstr结构 (1)这个函数首先根据路径名的最后一个分量的名字信息,来采取一些动作。 如果最后一个分量是“..”则调用follow_dotdot(nd)返回上一级目录,并设置局部变量dir为nd->path.dentry。 若最后一个分量是“.”,则检查nd->path.mnt->mnt_sb->s_type->fs_flags的FS_REVAL_DOT,若设置了该标志,则调用目录项的dir->d_op->d_revalidate(dir, nd)方法,若该方法失败,则释放先前分配的file结构,减少nd->path的引用计数,并返回错误码-ESTALE。 若最后一个分量为根目录。若设置了打开标志为O_CREAT,则释放先前分配的file结构,减少nd->path的引用计数,并返回错误码-EISDIR。 若为符号链接,则调用finish_open()来完成最后的打开文件操作,并返回file结构指针filp。finish_open()函数稍后解释。 这一步中处理那些最后一个分量的路径已经获得并保存在nd->path中或者最后一个分量为符号链接的情况。同时我们也可以看到,是可以直接使用open来打开目录的,但是不能创建目录。 (2)、若路径名的最后一个分量是以“/”结尾(这根据nd->last.name[nd->last.len]的值来判断,在link_path_walk()函数中求出nd->last值的相关部分可以看出,若已“/”结尾,则nd->last.name[nd->last.len]的值正是字符’ /’)的,则检查打开标志是否设置了O_CREAT,若是则释放先前分配的file结构,减少nd->path的引用计数,并返回错误码-EISDIR;若没有则,设置nd查找标志nd->flags的LOOKUP_DIRECTORY 和LOOKUP_FOLLOW位。 (3)、若打开标志没有设置O_CREAT。则调用do_lookup(nd, &nd->last, path)来完成最路径名中最后一个分量路径的查找。 若返回错误码,则释放先前分配的file结构,减少nd->path的引用计数,并返回该错误码。 若查找的结果path->dentry->d_inode为NULL,则调用path_put_conditional(path, nd)来释放查找到的path->dentry,若最后一个分量表示的是挂载点则还要释放path->mnt。释放先前分配的file结构,减少nd->path的引用计数,并返回错误码-ENOENT。 若最后一个分量表示的是符号链接,则返回NULL。 若设置了查找标志的LOOKUP_DIRECTORY位,则还有判断找到的是否为一个目录(通过检查path->dentry->d_inode->i_op->lookup),若不是目录,则调用path_put_conditional(path, nd)来释放查找到的path->dentry,若最后一个分量表示的是挂载点则还要释放path->mnt。释放先前分配的file结构,减少nd->path的引用计数,并返回错误码-ENOTDIR。 调用path_to_nameidata(path, nd)将使得nd->path中保存有路径名最后一个分量的路径。完成审计信息记录。 调用finish_open()来完成最后的打开文件操作,并返回file结构指针filp。finish_open()函数稍后解释。 (4)、打开标志设置了O_CREAT,若文件不存在则要创建的情况。首先要对父目录的inode上锁(mutex_lock(&dir->d_inode->i_mutex)),调用lookup_hash(nd)在目录项缓存中查找或者创建路径名最后一个分量的目录项。其定义为: --------------------------------------------------------------------- fs/namei.c 1122 static struct dentry *__lookup_hash(struct qstr *name, 1123 struct dentry *base, struct nameidata *nd) 1124 { 1125 struct dentry *dentry; 1126 struct inode *inode; 1127 int err; 1128 1129 inode = base->d_inode; 1130 1131 /* 1132 * See if the low-level filesystem might want 1133 * to use its own hash.. 1134 */ 1135 if (base->d_op && base->d_op->d_hash) { 1136 err = base->d_op->d_hash(base, name); 1137 dentry = ERR_PTR(err); 1138 if (err < 0) 1139 goto out; 1140 } 1141 1142 dentry = __d_lookup(base, name); 1143 1144 /* lockess __d_lookup may fail due to concurrent d_move() 1145 * in some unrelated directory, so try with d_lookup 1146 */ 1147 if (!dentry) 1148 dentry = d_lookup(base, name); 1149 1150 if (dentry && dentry->d_op && dentry->d_op->d_revalidate) 1151 dentry = do_revalidate(dentry, nd); 1152 1153 if (!dentry) { 1154 struct dentry *new; 1155 1156 /* Don't create child dentry for a dead directory. */ 1157 dentry = ERR_PTR(-ENOENT); 1158 if (IS_DEADDIR(inode)) 1159 goto out; 1160 1161 new = d_alloc(base, name); 1162 dentry = ERR_PTR(-ENOMEM); 1163 if (!new) 1164 goto out; 1165 dentry = inode->i_op->lookup(inode, new, nd); 1166 if (!dentry) 1167 dentry = new; 1168 else 1169 dput(new); 1170 } 1171 out: 1172 return dentry; 1173 } 1175 /* 1176 * Restricted form of lookup. Doesn't follow links, single-component only, 1177 * needs parent already locked. Doesn't follow mounts. 1178 * SMP-safe. 1179 */ 1180 static struct dentry *lookup_hash(struct nameidata *nd) 1181 { 1182 int err; 1183 1184 err = exec_permission(nd->path.dentry->d_inode); 1185 if (err) 1186 return ERR_PTR(err); 1187 return __lookup_hash(&nd->last, nd->path.dentry, nd); 1188 } --------------------------------------------------------------------- lookup_hash(nd)函数当在目录项缓存中没有找到要找的目录项时,会分配目录项,并且会调用父目录的inode->i_op->lookup(inode, new, nd)方法来创建所要查找的文件的inode等信息,并设置目录项的适当字段。但是在目录中没有所要查找的文件时,lookup(inode, new, nd)方法并不返回错误。 lookup_hash(nd)函数返回的结果被赋给path->dentry。path用来存放路径名最后一个分量的path结构。 初始化path->mnt为父目录的vfsmount对象。 (5)、检查path->dentry是否是一个错误码的指针形式,若是则对父目录的inode解锁(mutex_unlock(&dir->d_inode->i_mutex)),减少nd->path的引用计数,并返回该错误码。 (6)、检查nd->intent.open.file是否包含一个错误码,若是则首先对父目录的inode解锁,接着调用path_put_conditional(path, nd)来释放由lookup_hash(nd)查找到或创建的path->dentry,若最后一个分量表示的是挂载点则还要释放path->mnt。释放先前分配的file结构,减少nd->path的引用计数,并返回错误码-ENOTDIR。 (7)、检查path->dentry->d_inode是否为NULL,若是,则说明要创建一个文件。 首先调用mnt_want_write(nd->path.mnt),来确保在创建文件和在nameidata_to_filp()中通过struct file取得固定的写计数之间不会发生ro -> rw的转换。这个函数本质上增加vfsmount对象的写着计数器mnt->mnt_writers。 调用__open_namei_create(nd, path, open_flag, mode)函数来创建一个文件,这个函数定义为: --------------------------------------------------------------------- fs/namei.c 1502 /* 1503 * Be careful about ever adding any more callers of this 1504 * function. Its flags must be in the namei format, not 1505 * what get passed to sys_open(). 1506 */ 1507 static int __open_namei_create(struct nameidata *nd, struct path *path, 1508 int open_flag, int mode) 1509 { 1510 int error; 1511 struct dentry *dir = nd->path.dentry; 1512 1513 if (!IS_POSIXACL(dir->d_inode)) 1514 mode &= ~current_umask(); 1515 error = security_path_mknod(&nd->path, path->dentry, mode, 0); 1516 if (error) 1517 goto out_unlock; 1518 error = vfs_create(dir->d_inode, path->dentry, mode, nd); 1519 out_unlock: 1520 mutex_unlock(&dir->d_inode->i_mutex); 1521 dput(nd->path.dentry); 1522 nd->path.dentry = path->dentry; 1523 if (error) 1524 return error; 1525 /* Don't check for write permission, don't truncate */ 1526 return may_open(&nd->path, 0, open_flag & ~O_TRUNC); 1527 } --------------------------------------------------------------------- __open_namei_create()函数在执行了访问权限检查后,就调用父目录inode的create方法dir->i_op->create(dir, dentry, mode, nd)来创建文件。之后,__open_namei_create()解除对于父目录inode的锁定,释放父目录目录项,并将路径名最后一个分量目录项path->dentry赋给nd->path.dentry。然后返回对may_open(&nd->path, 0, open_flag & ~O_TRUNC)调用的返回值。 调用nameidata_to_filp(nd)来将一个nameidata转换为一个打开的filp,这个函数本质上主要调用__dentry_open(nd->path.dentry, nd->path.mnt, filp, NULL, cred)来根据当前进程的状态和获得的目录项来设置nd->intent.open.file所指向的file结构的各字段。 调用mnt_drop_write(nd->path.mnt)来减少nd->path.mnt写者计数器的值。 返回filp。 (8)、尽管设置了打开标志的O_CREAT,但是却找到了所需的文件。则首先对父目录inode解锁。检查打开标志是否设置了O_EXCL,若是,则调用path_put_conditional(path, nd)来释放查找到的path->dentry,若最后一个分量表示的是挂载点则还要释放path->mnt。释放先前分配的file结构,减少nd->path的引用计数,并返回错误码-EEXIST。 调用__follow_mount(path) 找到挂载在本路径上的文件系统,即vfsmount对象的地址和目录项对象地址。 调用finish_open()来完成打开草走。 (9)、返回filp。 h.若最后一个分量是一个符号链接,则追踪符号链接。 i、返回filp 4、将文件安装在fd数组中。 5、释放路径名所占用的临时内存空间tmp 6、返回文件描述符fd。 |
|