diff options
Diffstat (limited to 'fs/overlayfs/inode.c')
-rw-r--r-- | fs/overlayfs/inode.c | 139 |
1 files changed, 97 insertions, 42 deletions
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index 79e8994e3bc1..8be6cd264f66 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -10,6 +10,7 @@ #include <linux/xattr.h> #include <linux/posix_acl.h> #include <linux/ratelimit.h> +#include <linux/fiemap.h> #include "overlayfs.h" @@ -58,6 +59,24 @@ int ovl_setattr(struct dentry *dentry, struct iattr *attr) if (attr->ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID)) attr->ia_valid &= ~ATTR_MODE; + /* + * We might have to translate ovl file into real file object + * once use cases emerge. For now, simply don't let underlying + * filesystem rely on attr->ia_file + */ + attr->ia_valid &= ~ATTR_FILE; + + /* + * If open(O_TRUNC) is done, VFS calls ->setattr with ATTR_OPEN + * set. Overlayfs does not pass O_TRUNC flag to underlying + * filesystem during open -> do not pass ATTR_OPEN. This + * disables optimization in fuse which assumes open(O_TRUNC) + * already set file size to 0. But we never passed O_TRUNC to + * fuse. So by clearing ATTR_OPEN, fuse will be forced to send + * setattr request to server. + */ + attr->ia_valid &= ~ATTR_OPEN; + inode_lock(upperdentry->d_inode); old_cred = ovl_override_creds(dentry->d_sb); err = notify_change(upperdentry, attr, NULL); @@ -79,6 +98,7 @@ static int ovl_map_dev_ino(struct dentry *dentry, struct kstat *stat, int fsid) { bool samefs = ovl_same_fs(dentry->d_sb); unsigned int xinobits = ovl_xino_bits(dentry->d_sb); + unsigned int xinoshift = 64 - xinobits; if (samefs) { /* @@ -89,22 +109,22 @@ static int ovl_map_dev_ino(struct dentry *dentry, struct kstat *stat, int fsid) stat->dev = dentry->d_sb->s_dev; return 0; } else if (xinobits) { - unsigned int shift = 64 - xinobits; /* * All inode numbers of underlying fs should not be using the * high xinobits, so we use high xinobits to partition the * overlay st_ino address space. The high bits holds the fsid - * (upper fsid is 0). This way overlay inode numbers are unique - * and all inodes use overlay st_dev. Inode numbers are also - * persistent for a given layer configuration. + * (upper fsid is 0). The lowest xinobit is reserved for mapping + * the non-peresistent inode numbers range in case of overflow. + * This way all overlay inode numbers are unique and use the + * overlay st_dev. */ - if (stat->ino >> shift) { - pr_warn_ratelimited("inode number too big (%pd2, ino=%llu, xinobits=%d)\n", - dentry, stat->ino, xinobits); - } else { - stat->ino |= ((u64)fsid) << shift; + if (likely(!(stat->ino >> xinoshift))) { + stat->ino |= ((u64)fsid) << (xinoshift + 1); stat->dev = dentry->d_sb->s_dev; return 0; + } else if (ovl_xino_warn(dentry->d_sb)) { + pr_warn_ratelimited("inode number too big (%pd2, ino=%llu, xinobits=%d)\n", + dentry, stat->ino, xinobits); } } @@ -437,7 +457,7 @@ int ovl_update_time(struct inode *inode, struct timespec64 *ts, int flags) if (flags & S_ATIME) { struct ovl_fs *ofs = inode->i_sb->s_fs_info; struct path upperpath = { - .mnt = ofs->upper_mnt, + .mnt = ovl_upper_mnt(ofs), .dentry = ovl_upperdentry_dereference(OVL_I(inode)), }; @@ -460,10 +480,6 @@ static int ovl_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, return -EOPNOTSUPP; old_cred = ovl_override_creds(inode->i_sb); - - if (fieinfo->fi_flags & FIEMAP_FLAG_SYNC) - filemap_write_and_wait(realinode->i_mapping); - err = realinode->i_op->fiemap(realinode, fieinfo, start, len); revert_creds(old_cred); @@ -504,7 +520,7 @@ static const struct address_space_operations ovl_aops = { /* * It is possible to stack overlayfs instance on top of another - * overlayfs instance as lower layer. We need to annonate the + * overlayfs instance as lower layer. We need to annotate the * stackable i_mutex locks according to stack level of the super * block instance. An overlayfs instance can never be in stack * depth 0 (there is always a real fs below it). An overlayfs @@ -561,27 +577,73 @@ static inline void ovl_lockdep_annotate_inode_mutex_key(struct inode *inode) #endif } -static void ovl_fill_inode(struct inode *inode, umode_t mode, dev_t rdev, - unsigned long ino, int fsid) +static void ovl_next_ino(struct inode *inode) +{ + struct ovl_fs *ofs = inode->i_sb->s_fs_info; + + inode->i_ino = atomic_long_inc_return(&ofs->last_ino); + if (unlikely(!inode->i_ino)) + inode->i_ino = atomic_long_inc_return(&ofs->last_ino); +} + +static void ovl_map_ino(struct inode *inode, unsigned long ino, int fsid) { int xinobits = ovl_xino_bits(inode->i_sb); + unsigned int xinoshift = 64 - xinobits; /* * When d_ino is consistent with st_ino (samefs or i_ino has enough * bits to encode layer), set the same value used for st_ino to i_ino, * so inode number exposed via /proc/locks and a like will be * consistent with d_ino and st_ino values. An i_ino value inconsistent - * with d_ino also causes nfsd readdirplus to fail. When called from - * ovl_new_inode(), ino arg is 0, so i_ino will be updated to real - * upper inode i_ino on ovl_inode_init() or ovl_inode_update(). + * with d_ino also causes nfsd readdirplus to fail. */ - if (ovl_same_dev(inode->i_sb)) { - inode->i_ino = ino; - if (xinobits && fsid && !(ino >> (64 - xinobits))) - inode->i_ino |= (unsigned long)fsid << (64 - xinobits); - } else { - inode->i_ino = get_next_ino(); + inode->i_ino = ino; + if (ovl_same_fs(inode->i_sb)) { + return; + } else if (xinobits && likely(!(ino >> xinoshift))) { + inode->i_ino |= (unsigned long)fsid << (xinoshift + 1); + return; } + + /* + * For directory inodes on non-samefs with xino disabled or xino + * overflow, we allocate a non-persistent inode number, to be used for + * resolving st_ino collisions in ovl_map_dev_ino(). + * + * To avoid ino collision with legitimate xino values from upper + * layer (fsid 0), use the lowest xinobit to map the non + * persistent inode numbers to the unified st_ino address space. + */ + if (S_ISDIR(inode->i_mode)) { + ovl_next_ino(inode); + if (xinobits) { + inode->i_ino &= ~0UL >> xinobits; + inode->i_ino |= 1UL << xinoshift; + } + } +} + +void ovl_inode_init(struct inode *inode, struct ovl_inode_params *oip, + unsigned long ino, int fsid) +{ + struct inode *realinode; + + if (oip->upperdentry) + OVL_I(inode)->__upperdentry = oip->upperdentry; + if (oip->lowerpath && oip->lowerpath->dentry) + OVL_I(inode)->lower = igrab(d_inode(oip->lowerpath->dentry)); + if (oip->lowerdata) + OVL_I(inode)->lowerdata = igrab(d_inode(oip->lowerdata)); + + realinode = ovl_inode_real(inode); + ovl_copyattr(realinode, inode); + ovl_copyflags(realinode, inode); + ovl_map_ino(inode, ino, fsid); +} + +static void ovl_fill_inode(struct inode *inode, umode_t mode, dev_t rdev) +{ inode->i_mode = mode; inode->i_flags |= S_NOCMTIME; #ifdef CONFIG_FS_POSIX_ACL @@ -719,7 +781,7 @@ struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, dev_t rdev) inode = new_inode(sb); if (inode) - ovl_fill_inode(inode, mode, rdev, 0, 0); + ovl_fill_inode(inode, mode, rdev); return inode; } @@ -843,7 +905,7 @@ struct inode *ovl_get_trap_inode(struct super_block *sb, struct dentry *dir) * Does overlay inode need to be hashed by lower inode? */ static bool ovl_hash_bylower(struct super_block *sb, struct dentry *upper, - struct dentry *lower, struct dentry *index) + struct dentry *lower, bool index) { struct ovl_fs *ofs = sb->s_fs_info; @@ -856,7 +918,7 @@ static bool ovl_hash_bylower(struct super_block *sb, struct dentry *upper, return true; /* Yes, if won't be copied up */ - if (!ofs->upper_mnt) + if (!ovl_upper_mnt(ofs)) return true; /* No, if lower hardlink is or will be broken on copy up */ @@ -891,8 +953,8 @@ struct inode *ovl_get_inode(struct super_block *sb, struct dentry *lowerdentry = lowerpath ? lowerpath->dentry : NULL; bool bylower = ovl_hash_bylower(sb, upperdentry, lowerdentry, oip->index); - int fsid = bylower ? oip->lowerpath->layer->fsid : 0; - bool is_dir, metacopy = false; + int fsid = bylower ? lowerpath->layer->fsid : 0; + bool is_dir; unsigned long ino = 0; int err = oip->newinode ? -EEXIST : -ENOMEM; @@ -941,9 +1003,11 @@ struct inode *ovl_get_inode(struct super_block *sb, err = -ENOMEM; goto out_err; } + ino = realinode->i_ino; + fsid = lowerpath->layer->fsid; } - ovl_fill_inode(inode, realinode->i_mode, realinode->i_rdev, ino, fsid); - ovl_inode_init(inode, upperdentry, lowerdentry, oip->lowerdata); + ovl_fill_inode(inode, realinode->i_mode, realinode->i_rdev); + ovl_inode_init(inode, oip, ino, fsid); if (upperdentry && ovl_is_impuredir(upperdentry)) ovl_set_flag(OVL_IMPURE, inode); @@ -951,15 +1015,6 @@ struct inode *ovl_get_inode(struct super_block *sb, if (oip->index) ovl_set_flag(OVL_INDEX, inode); - if (upperdentry) { - err = ovl_check_metacopy_xattr(upperdentry); - if (err < 0) - goto out_err; - metacopy = err; - if (!metacopy) - ovl_set_flag(OVL_UPPERDATA, inode); - } - OVL_I(inode)->redirect = oip->redirect; if (bylower) |