diff options
Diffstat (limited to 'fs/overlayfs/inode.c')
-rw-r--r-- | fs/overlayfs/inode.c | 188 |
1 files changed, 121 insertions, 67 deletions
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index 3b1bd469accd..6e3815fb006b 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -16,13 +16,6 @@ #include "overlayfs.h" -static dev_t ovl_get_pseudo_dev(struct dentry *dentry) -{ - struct ovl_entry *oe = dentry->d_fsdata; - - return oe->lowerstack[0].layer->pseudo_dev; -} - int ovl_setattr(struct dentry *dentry, struct iattr *attr) { int err; @@ -66,6 +59,69 @@ out: return err; } +static int ovl_map_dev_ino(struct dentry *dentry, struct kstat *stat, + struct ovl_layer *lower_layer) +{ + bool samefs = ovl_same_sb(dentry->d_sb); + unsigned int xinobits = ovl_xino_bits(dentry->d_sb); + + if (samefs) { + /* + * When all layers are on the same fs, all real inode + * number are unique, so we use the overlay st_dev, + * which is friendly to du -x. + */ + stat->dev = dentry->d_sb->s_dev; + return 0; + } else if (xinobits) { + unsigned int shift = 64 - xinobits; + /* + * All inode numbers of underlying fs should not be using the + * high xinobits, so we use high xinobits to partition the + * overlay st_ino address space. The high bits holds the fsid + * (upper fsid is 0). This way overlay inode numbers are unique + * and all inodes use overlay st_dev. Inode numbers are also + * persistent for a given layer configuration. + */ + if (stat->ino >> shift) { + pr_warn_ratelimited("overlayfs: inode number too big (%pd2, ino=%llu, xinobits=%d)\n", + dentry, stat->ino, xinobits); + } else { + if (lower_layer) + stat->ino |= ((u64)lower_layer->fsid) << shift; + + stat->dev = dentry->d_sb->s_dev; + return 0; + } + } + + /* The inode could not be mapped to a unified st_ino address space */ + if (S_ISDIR(dentry->d_inode->i_mode)) { + /* + * Always use the overlay st_dev for directories, so 'find + * -xdev' will scan the entire overlay mount and won't cross the + * overlay mount boundaries. + * + * If not all layers are on the same fs the pair {real st_ino; + * overlay st_dev} is not unique, so use the non persistent + * overlay st_ino for directories. + */ + stat->dev = dentry->d_sb->s_dev; + stat->ino = dentry->d_inode->i_ino; + } else if (lower_layer && lower_layer->fsid) { + /* + * For non-samefs setup, if we cannot map all layers st_ino + * to a unified address space, we need to make sure that st_dev + * is unique per lower fs. Upper layer uses real st_dev and + * lower layers use the unique anonymous bdev assigned to the + * lower fs. + */ + stat->dev = lower_layer->fs->pseudo_dev; + } + + return 0; +} + int ovl_getattr(const struct path *path, struct kstat *stat, u32 request_mask, unsigned int flags) { @@ -75,6 +131,7 @@ int ovl_getattr(const struct path *path, struct kstat *stat, const struct cred *old_cred; bool is_dir = S_ISDIR(dentry->d_inode->i_mode); bool samefs = ovl_same_sb(dentry->d_sb); + struct ovl_layer *lower_layer = NULL; int err; type = ovl_path_real(dentry, &realpath); @@ -84,14 +141,18 @@ int ovl_getattr(const struct path *path, struct kstat *stat, goto out; /* - * For non-dir or same fs, we use st_ino of the copy up origin, if we - * know it. This guaranties constant st_dev/st_ino across copy up. + * For non-dir or same fs, we use st_ino of the copy up origin. + * This guaranties constant st_dev/st_ino across copy up. + * With xino feature and non-samefs, we use st_ino of the copy up + * origin masked with high bits that represent the layer id. * - * If filesystem supports NFS export ops, this also guaranties + * If lower filesystem supports NFS file handles, this also guaranties * persistent st_ino across mount cycle. */ - if (!is_dir || samefs) { - if (OVL_TYPE_ORIGIN(type)) { + if (!is_dir || samefs || ovl_xino_bits(dentry->d_sb)) { + if (!OVL_TYPE_UPPER(type)) { + lower_layer = ovl_layer_lower(dentry); + } else if (OVL_TYPE_ORIGIN(type)) { struct kstat lowerstat; u32 lowermask = STATX_INO | (!is_dir ? STATX_NLINK : 0); @@ -118,43 +179,17 @@ int ovl_getattr(const struct path *path, struct kstat *stat, */ if (ovl_test_flag(OVL_INDEX, d_inode(dentry)) || (!ovl_verify_lower(dentry->d_sb) && - (is_dir || lowerstat.nlink == 1))) + (is_dir || lowerstat.nlink == 1))) { stat->ino = lowerstat.ino; - - if (samefs) - WARN_ON_ONCE(stat->dev != lowerstat.dev); - else - stat->dev = ovl_get_pseudo_dev(dentry); - } - if (samefs) { - /* - * When all layers are on the same fs, all real inode - * number are unique, so we use the overlay st_dev, - * which is friendly to du -x. - */ - stat->dev = dentry->d_sb->s_dev; - } else if (!OVL_TYPE_UPPER(type)) { - /* - * For non-samefs setup, to make sure that st_dev/st_ino - * pair is unique across the system, we use a unique - * anonymous st_dev for lower layer inode. - */ - stat->dev = ovl_get_pseudo_dev(dentry); + lower_layer = ovl_layer_lower(dentry); + } } - } else { - /* - * Always use the overlay st_dev for directories, so 'find - * -xdev' will scan the entire overlay mount and won't cross the - * overlay mount boundaries. - * - * If not all layers are on the same fs the pair {real st_ino; - * overlay st_dev} is not unique, so use the non persistent - * overlay st_ino for directories. - */ - stat->dev = dentry->d_sb->s_dev; - stat->ino = dentry->d_inode->i_ino; } + err = ovl_map_dev_ino(dentry, stat, lower_layer); + if (err) + goto out; + /* * It's probably not worth it to count subdirs to get the * correct link count. nlink=1 seems to pacify 'find' and @@ -383,24 +418,18 @@ int ovl_open_maybe_copy_up(struct dentry *dentry, unsigned int file_flags) int ovl_update_time(struct inode *inode, struct timespec *ts, int flags) { - struct dentry *alias; - struct path upperpath; - - if (!(flags & S_ATIME)) - return 0; - - alias = d_find_any_alias(inode); - if (!alias) - return 0; - - ovl_path_upper(alias, &upperpath); - if (upperpath.dentry) { - touch_atime(&upperpath); - inode->i_atime = d_inode(upperpath.dentry)->i_atime; + if (flags & S_ATIME) { + struct ovl_fs *ofs = inode->i_sb->s_fs_info; + struct path upperpath = { + .mnt = ofs->upper_mnt, + .dentry = ovl_upperdentry_dereference(OVL_I(inode)), + }; + + if (upperpath.dentry) { + touch_atime(&upperpath); + inode->i_atime = d_inode(upperpath.dentry)->i_atime; + } } - - dput(alias); - return 0; } @@ -459,9 +488,27 @@ static inline void ovl_lockdep_annotate_inode_mutex_key(struct inode *inode) #endif } -static void ovl_fill_inode(struct inode *inode, umode_t mode, dev_t rdev) +static void ovl_fill_inode(struct inode *inode, umode_t mode, dev_t rdev, + unsigned long ino, int fsid) { - inode->i_ino = get_next_ino(); + int xinobits = ovl_xino_bits(inode->i_sb); + + /* + * When NFS export is enabled and d_ino is consistent with st_ino + * (samefs or i_ino has enough bits to encode layer), set the same + * value used for d_ino to i_ino, because nfsd readdirplus compares + * d_ino values to i_ino values of child entries. When called from + * ovl_new_inode(), ino arg is 0, so i_ino will be updated to real + * upper inode i_ino on ovl_inode_init() or ovl_inode_update(). + */ + if (inode->i_sb->s_export_op && + (ovl_same_sb(inode->i_sb) || xinobits)) { + inode->i_ino = ino; + if (xinobits && fsid && !(ino >> (64 - xinobits))) + inode->i_ino |= (unsigned long)fsid << (64 - xinobits); + } else { + inode->i_ino = get_next_ino(); + } inode->i_mode = mode; inode->i_flags |= S_NOCMTIME; #ifdef CONFIG_FS_POSIX_ACL @@ -597,7 +644,7 @@ struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, dev_t rdev) inode = new_inode(sb); if (inode) - ovl_fill_inode(inode, mode, rdev); + ovl_fill_inode(inode, mode, rdev, 0, 0); return inode; } @@ -703,13 +750,16 @@ static bool ovl_hash_bylower(struct super_block *sb, struct dentry *upper, } struct inode *ovl_get_inode(struct super_block *sb, struct dentry *upperdentry, - struct dentry *lowerdentry, struct dentry *index, + struct ovl_path *lowerpath, struct dentry *index, unsigned int numlower) { struct inode *realinode = upperdentry ? d_inode(upperdentry) : NULL; struct inode *inode; + struct dentry *lowerdentry = lowerpath ? lowerpath->dentry : NULL; bool bylower = ovl_hash_bylower(sb, upperdentry, lowerdentry, index); + int fsid = bylower ? lowerpath->layer->fsid : 0; bool is_dir; + unsigned long ino = 0; if (!realinode) realinode = d_inode(lowerdentry); @@ -748,18 +798,22 @@ struct inode *ovl_get_inode(struct super_block *sb, struct dentry *upperdentry, if (!is_dir) nlink = ovl_get_nlink(lowerdentry, upperdentry, nlink); set_nlink(inode, nlink); + ino = key->i_ino; } else { /* Lower hardlink that will be broken on copy up */ inode = new_inode(sb); if (!inode) goto out_nomem; } - ovl_fill_inode(inode, realinode->i_mode, realinode->i_rdev); + ovl_fill_inode(inode, realinode->i_mode, realinode->i_rdev, ino, fsid); ovl_inode_init(inode, upperdentry, lowerdentry); if (upperdentry && ovl_is_impuredir(upperdentry)) ovl_set_flag(OVL_IMPURE, inode); + if (index) + ovl_set_flag(OVL_INDEX, inode); + /* Check for non-merge dir that may have whiteouts */ if (is_dir) { if (((upperdentry && lowerdentry) || numlower > 1) || |